#:include "common.fypp" submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_svd_comp implicit none contains #:for ik,it,ii in LINALG_INT_KINDS_TYPES pure module subroutine stdlib${ii}$_sgebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info ) !! SGEBRD reduces a general real M-by-N matrix A to upper or lower !! bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, lwork, m, n ! Array Arguments real(sp), intent(inout) :: a(lda,*) real(sp), intent(out) :: d(*), e(*), taup(*), tauq(*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: i, iinfo, j, ldwrkx, ldwrky, lwkopt, minmn, nb, nbmin, nx, ws ! Intrinsic Functions ! Executable Statements ! test the input parameters info = 0_${ik}$ nb = max( 1_${ik}$, stdlib${ii}$_ilaenv( 1_${ik}$, 'SGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) ) lwkopt = ( m+n )*nb work( 1_${ik}$ ) = real( lwkopt,KIND=sp) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -4_${ik}$ else if( lwork<max( 1_${ik}$, m, n ) .and. .not.lquery ) then info = -10_${ik}$ end if if( info<0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SGEBRD', -info ) return else if( lquery ) then return end if ! quick return if possible minmn = min( m, n ) if( minmn==0_${ik}$ ) then work( 1_${ik}$ ) = 1_${ik}$ return end if ws = max( m, n ) ldwrkx = m ldwrky = n if( nb>1_${ik}$ .and. nb<minmn ) then ! set the crossover point nx. nx = max( nb, stdlib${ii}$_ilaenv( 3_${ik}$, 'SGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) ) ! determine when to switch from blocked to unblocked code. if( nx<minmn ) then ws = ( m+n )*nb if( lwork<ws ) then ! not enough work space for the optimal nb, consider using ! a smaller block size. nbmin = stdlib${ii}$_ilaenv( 2_${ik}$, 'SGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) if( lwork>=( m+n )*nbmin ) then nb = lwork / ( m+n ) else nb = 1_${ik}$ nx = minmn end if end if end if else nx = minmn end if do i = 1, minmn - nx, nb ! reduce rows and columns i:i+nb-1 to bidiagonal form and return ! the matrices x and y which are needed to update the unreduced ! part of the matrix call stdlib${ii}$_slabrd( m-i+1, n-i+1, nb, a( i, i ), lda, d( i ), e( i ),tauq( i ), & taup( i ), work, ldwrkx,work( ldwrkx*nb+1 ), ldwrky ) ! update the trailing submatrix a(i+nb:m,i+nb:n), using an update ! of the form a := a - v*y**t - x*u**t call stdlib${ii}$_sgemm( 'NO TRANSPOSE', 'TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -one, a( i+& nb, i ), lda,work( ldwrkx*nb+nb+1 ), ldwrky, one,a( i+nb, i+nb ), lda ) call stdlib${ii}$_sgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -one, & work( nb+1 ), ldwrkx, a( i, i+nb ), lda,one, a( i+nb, i+nb ), lda ) ! copy diagonal and off-diagonal elements of b back into a if( m>=n ) then do j = i, i + nb - 1 a( j, j ) = d( j ) a( j, j+1 ) = e( j ) end do else do j = i, i + nb - 1 a( j, j ) = d( j ) a( j+1, j ) = e( j ) end do end if end do ! use unblocked code to reduce the remainder of the matrix call stdlib${ii}$_sgebd2( m-i+1, n-i+1, a( i, i ), lda, d( i ), e( i ),tauq( i ), taup( i ), & work, iinfo ) work( 1_${ik}$ ) = ws return end subroutine stdlib${ii}$_sgebrd pure module subroutine stdlib${ii}$_dgebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info ) !! DGEBRD reduces a general real M-by-N matrix A to upper or lower !! bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, lwork, m, n ! Array Arguments real(dp), intent(inout) :: a(lda,*) real(dp), intent(out) :: d(*), e(*), taup(*), tauq(*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: i, iinfo, j, ldwrkx, ldwrky, lwkopt, minmn, nb, nbmin, nx, ws ! Intrinsic Functions ! Executable Statements ! test the input parameters info = 0_${ik}$ nb = max( 1_${ik}$, stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) ) lwkopt = ( m+n )*nb work( 1_${ik}$ ) = real( lwkopt,KIND=dp) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -4_${ik}$ else if( lwork<max( 1_${ik}$, m, n ) .and. .not.lquery ) then info = -10_${ik}$ end if if( info<0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGEBRD', -info ) return else if( lquery ) then return end if ! quick return if possible minmn = min( m, n ) if( minmn==0_${ik}$ ) then work( 1_${ik}$ ) = 1_${ik}$ return end if ws = max( m, n ) ldwrkx = m ldwrky = n if( nb>1_${ik}$ .and. nb<minmn ) then ! set the crossover point nx. nx = max( nb, stdlib${ii}$_ilaenv( 3_${ik}$, 'DGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) ) ! determine when to switch from blocked to unblocked code. if( nx<minmn ) then ws = ( m+n )*nb if( lwork<ws ) then ! not enough work space for the optimal nb, consider using ! a smaller block size. nbmin = stdlib${ii}$_ilaenv( 2_${ik}$, 'DGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) if( lwork>=( m+n )*nbmin ) then nb = lwork / ( m+n ) else nb = 1_${ik}$ nx = minmn end if end if end if else nx = minmn end if do i = 1, minmn - nx, nb ! reduce rows and columns i:i+nb-1 to bidiagonal form and return ! the matrices x and y which are needed to update the unreduced ! part of the matrix call stdlib${ii}$_dlabrd( m-i+1, n-i+1, nb, a( i, i ), lda, d( i ), e( i ),tauq( i ), & taup( i ), work, ldwrkx,work( ldwrkx*nb+1 ), ldwrky ) ! update the trailing submatrix a(i+nb:m,i+nb:n), using an update ! of the form a := a - v*y**t - x*u**t call stdlib${ii}$_dgemm( 'NO TRANSPOSE', 'TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -one, a( i+& nb, i ), lda,work( ldwrkx*nb+nb+1 ), ldwrky, one,a( i+nb, i+nb ), lda ) call stdlib${ii}$_dgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -one, & work( nb+1 ), ldwrkx, a( i, i+nb ), lda,one, a( i+nb, i+nb ), lda ) ! copy diagonal and off-diagonal elements of b back into a if( m>=n ) then do j = i, i + nb - 1 a( j, j ) = d( j ) a( j, j+1 ) = e( j ) end do else do j = i, i + nb - 1 a( j, j ) = d( j ) a( j+1, j ) = e( j ) end do end if end do ! use unblocked code to reduce the remainder of the matrix call stdlib${ii}$_dgebd2( m-i+1, n-i+1, a( i, i ), lda, d( i ), e( i ),tauq( i ), taup( i ), & work, iinfo ) work( 1_${ik}$ ) = ws return end subroutine stdlib${ii}$_dgebrd #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$gebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info ) !! DGEBRD: reduces a general real M-by-N matrix A to upper or lower !! bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, lwork, m, n ! Array Arguments real(${rk}$), intent(inout) :: a(lda,*) real(${rk}$), intent(out) :: d(*), e(*), taup(*), tauq(*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: i, iinfo, j, ldwrkx, ldwrky, lwkopt, minmn, nb, nbmin, nx, ws ! Intrinsic Functions ! Executable Statements ! test the input parameters info = 0_${ik}$ nb = max( 1_${ik}$, stdlib${ii}$_ilaenv( 1_${ik}$, 'DGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) ) lwkopt = ( m+n )*nb work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -4_${ik}$ else if( lwork<max( 1_${ik}$, m, n ) .and. .not.lquery ) then info = -10_${ik}$ end if if( info<0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGEBRD', -info ) return else if( lquery ) then return end if ! quick return if possible minmn = min( m, n ) if( minmn==0_${ik}$ ) then work( 1_${ik}$ ) = 1_${ik}$ return end if ws = max( m, n ) ldwrkx = m ldwrky = n if( nb>1_${ik}$ .and. nb<minmn ) then ! set the crossover point nx. nx = max( nb, stdlib${ii}$_ilaenv( 3_${ik}$, 'DGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) ) ! determine when to switch from blocked to unblocked code. if( nx<minmn ) then ws = ( m+n )*nb if( lwork<ws ) then ! not enough work space for the optimal nb, consider using ! a smaller block size. nbmin = stdlib${ii}$_ilaenv( 2_${ik}$, 'DGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) if( lwork>=( m+n )*nbmin ) then nb = lwork / ( m+n ) else nb = 1_${ik}$ nx = minmn end if end if end if else nx = minmn end if do i = 1, minmn - nx, nb ! reduce rows and columns i:i+nb-1 to bidiagonal form and return ! the matrices x and y which are needed to update the unreduced ! part of the matrix call stdlib${ii}$_${ri}$labrd( m-i+1, n-i+1, nb, a( i, i ), lda, d( i ), e( i ),tauq( i ), & taup( i ), work, ldwrkx,work( ldwrkx*nb+1 ), ldwrky ) ! update the trailing submatrix a(i+nb:m,i+nb:n), using an update ! of the form a := a - v*y**t - x*u**t call stdlib${ii}$_${ri}$gemm( 'NO TRANSPOSE', 'TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -one, a( i+& nb, i ), lda,work( ldwrkx*nb+nb+1 ), ldwrky, one,a( i+nb, i+nb ), lda ) call stdlib${ii}$_${ri}$gemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -one, & work( nb+1 ), ldwrkx, a( i, i+nb ), lda,one, a( i+nb, i+nb ), lda ) ! copy diagonal and off-diagonal elements of b back into a if( m>=n ) then do j = i, i + nb - 1 a( j, j ) = d( j ) a( j, j+1 ) = e( j ) end do else do j = i, i + nb - 1 a( j, j ) = d( j ) a( j+1, j ) = e( j ) end do end if end do ! use unblocked code to reduce the remainder of the matrix call stdlib${ii}$_${ri}$gebd2( m-i+1, n-i+1, a( i, i ), lda, d( i ), e( i ),tauq( i ), taup( i ), & work, iinfo ) work( 1_${ik}$ ) = ws return end subroutine stdlib${ii}$_${ri}$gebrd #:endif #:endfor pure module subroutine stdlib${ii}$_cgebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info ) !! CGEBRD reduces a general complex M-by-N matrix A to upper or lower !! bidiagonal form B by a unitary transformation: Q**H * A * P = B. !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, lwork, m, n ! Array Arguments real(sp), intent(out) :: d(*), e(*) complex(sp), intent(inout) :: a(lda,*) complex(sp), intent(out) :: taup(*), tauq(*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: i, iinfo, j, ldwrkx, ldwrky, lwkopt, minmn, nb, nbmin, nx, ws ! Intrinsic Functions ! Executable Statements ! test the input parameters info = 0_${ik}$ nb = max( 1_${ik}$, stdlib${ii}$_ilaenv( 1_${ik}$, 'CGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) ) lwkopt = ( m+n )*nb work( 1_${ik}$ ) = real( lwkopt,KIND=sp) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -4_${ik}$ else if( lwork<max( 1_${ik}$, m, n ) .and. .not.lquery ) then info = -10_${ik}$ end if if( info<0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CGEBRD', -info ) return else if( lquery ) then return end if ! quick return if possible minmn = min( m, n ) if( minmn==0_${ik}$ ) then work( 1_${ik}$ ) = 1_${ik}$ return end if ws = max( m, n ) ldwrkx = m ldwrky = n if( nb>1_${ik}$ .and. nb<minmn ) then ! set the crossover point nx. nx = max( nb, stdlib${ii}$_ilaenv( 3_${ik}$, 'CGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) ) ! determine when to switch from blocked to unblocked code. if( nx<minmn ) then ws = ( m+n )*nb if( lwork<ws ) then ! not enough work space for the optimal nb, consider using ! a smaller block size. nbmin = stdlib${ii}$_ilaenv( 2_${ik}$, 'CGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) if( lwork>=( m+n )*nbmin ) then nb = lwork / ( m+n ) else nb = 1_${ik}$ nx = minmn end if end if end if else nx = minmn end if do i = 1, minmn - nx, nb ! reduce rows and columns i:i+ib-1 to bidiagonal form and return ! the matrices x and y which are needed to update the unreduced ! part of the matrix call stdlib${ii}$_clabrd( m-i+1, n-i+1, nb, a( i, i ), lda, d( i ), e( i ),tauq( i ), & taup( i ), work, ldwrkx,work( ldwrkx*nb+1 ), ldwrky ) ! update the trailing submatrix a(i+ib:m,i+ib:n), using ! an update of the form a := a - v*y**h - x*u**h call stdlib${ii}$_cgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m-i-nb+1,n-i-nb+1, nb, -& cone, a( i+nb, i ), lda,work( ldwrkx*nb+nb+1 ), ldwrky, cone,a( i+nb, i+nb ), lda ) call stdlib${ii}$_cgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -cone, & work( nb+1 ), ldwrkx, a( i, i+nb ), lda,cone, a( i+nb, i+nb ), lda ) ! copy diagonal and off-diagonal elements of b back into a if( m>=n ) then do j = i, i + nb - 1 a( j, j ) = d( j ) a( j, j+1 ) = e( j ) end do else do j = i, i + nb - 1 a( j, j ) = d( j ) a( j+1, j ) = e( j ) end do end if end do ! use unblocked code to reduce the remainder of the matrix call stdlib${ii}$_cgebd2( m-i+1, n-i+1, a( i, i ), lda, d( i ), e( i ),tauq( i ), taup( i ), & work, iinfo ) work( 1_${ik}$ ) = ws return end subroutine stdlib${ii}$_cgebrd pure module subroutine stdlib${ii}$_zgebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info ) !! ZGEBRD reduces a general complex M-by-N matrix A to upper or lower !! bidiagonal form B by a unitary transformation: Q**H * A * P = B. !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, lwork, m, n ! Array Arguments real(dp), intent(out) :: d(*), e(*) complex(dp), intent(inout) :: a(lda,*) complex(dp), intent(out) :: taup(*), tauq(*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: i, iinfo, j, ldwrkx, ldwrky, lwkopt, minmn, nb, nbmin, nx, ws ! Intrinsic Functions ! Executable Statements ! test the input parameters info = 0_${ik}$ nb = max( 1_${ik}$, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) ) lwkopt = ( m+n )*nb work( 1_${ik}$ ) = real( lwkopt,KIND=dp) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -4_${ik}$ else if( lwork<max( 1_${ik}$, m, n ) .and. .not.lquery ) then info = -10_${ik}$ end if if( info<0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGEBRD', -info ) return else if( lquery ) then return end if ! quick return if possible minmn = min( m, n ) if( minmn==0_${ik}$ ) then work( 1_${ik}$ ) = 1_${ik}$ return end if ws = max( m, n ) ldwrkx = m ldwrky = n if( nb>1_${ik}$ .and. nb<minmn ) then ! set the crossover point nx. nx = max( nb, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) ) ! determine when to switch from blocked to unblocked code. if( nx<minmn ) then ws = ( m+n )*nb if( lwork<ws ) then ! not enough work space for the optimal nb, consider using ! a smaller block size. nbmin = stdlib${ii}$_ilaenv( 2_${ik}$, 'ZGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) if( lwork>=( m+n )*nbmin ) then nb = lwork / ( m+n ) else nb = 1_${ik}$ nx = minmn end if end if end if else nx = minmn end if do i = 1, minmn - nx, nb ! reduce rows and columns i:i+ib-1 to bidiagonal form and return ! the matrices x and y which are needed to update the unreduced ! part of the matrix call stdlib${ii}$_zlabrd( m-i+1, n-i+1, nb, a( i, i ), lda, d( i ), e( i ),tauq( i ), & taup( i ), work, ldwrkx,work( ldwrkx*nb+1 ), ldwrky ) ! update the trailing submatrix a(i+ib:m,i+ib:n), using ! an update of the form a := a - v*y**h - x*u**h call stdlib${ii}$_zgemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m-i-nb+1,n-i-nb+1, nb, -& cone, a( i+nb, i ), lda,work( ldwrkx*nb+nb+1 ), ldwrky, cone,a( i+nb, i+nb ), lda ) call stdlib${ii}$_zgemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -cone, & work( nb+1 ), ldwrkx, a( i, i+nb ), lda,cone, a( i+nb, i+nb ), lda ) ! copy diagonal and off-diagonal elements of b back into a if( m>=n ) then do j = i, i + nb - 1 a( j, j ) = d( j ) a( j, j+1 ) = e( j ) end do else do j = i, i + nb - 1 a( j, j ) = d( j ) a( j+1, j ) = e( j ) end do end if end do ! use unblocked code to reduce the remainder of the matrix call stdlib${ii}$_zgebd2( m-i+1, n-i+1, a( i, i ), lda, d( i ), e( i ),tauq( i ), taup( i ), & work, iinfo ) work( 1_${ik}$ ) = ws return end subroutine stdlib${ii}$_zgebrd #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$gebrd( m, n, a, lda, d, e, tauq, taup, work, lwork,info ) !! ZGEBRD: reduces a general complex M-by-N matrix A to upper or lower !! bidiagonal form B by a unitary transformation: Q**H * A * P = B. !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, lwork, m, n ! Array Arguments real(${ck}$), intent(out) :: d(*), e(*) complex(${ck}$), intent(inout) :: a(lda,*) complex(${ck}$), intent(out) :: taup(*), tauq(*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery integer(${ik}$) :: i, iinfo, j, ldwrkx, ldwrky, lwkopt, minmn, nb, nbmin, nx, ws ! Intrinsic Functions ! Executable Statements ! test the input parameters info = 0_${ik}$ nb = max( 1_${ik}$, stdlib${ii}$_ilaenv( 1_${ik}$, 'ZGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) ) lwkopt = ( m+n )*nb work( 1_${ik}$ ) = real( lwkopt,KIND=${ck}$) lquery = ( lwork==-1_${ik}$ ) if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -4_${ik}$ else if( lwork<max( 1_${ik}$, m, n ) .and. .not.lquery ) then info = -10_${ik}$ end if if( info<0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGEBRD', -info ) return else if( lquery ) then return end if ! quick return if possible minmn = min( m, n ) if( minmn==0_${ik}$ ) then work( 1_${ik}$ ) = 1_${ik}$ return end if ws = max( m, n ) ldwrkx = m ldwrky = n if( nb>1_${ik}$ .and. nb<minmn ) then ! set the crossover point nx. nx = max( nb, stdlib${ii}$_ilaenv( 3_${ik}$, 'ZGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) ) ! determine when to switch from blocked to unblocked code. if( nx<minmn ) then ws = ( m+n )*nb if( lwork<ws ) then ! not enough work space for the optimal nb, consider using ! a smaller block size. nbmin = stdlib${ii}$_ilaenv( 2_${ik}$, 'ZGEBRD', ' ', m, n, -1_${ik}$, -1_${ik}$ ) if( lwork>=( m+n )*nbmin ) then nb = lwork / ( m+n ) else nb = 1_${ik}$ nx = minmn end if end if end if else nx = minmn end if do i = 1, minmn - nx, nb ! reduce rows and columns i:i+ib-1 to bidiagonal form and return ! the matrices x and y which are needed to update the unreduced ! part of the matrix call stdlib${ii}$_${ci}$labrd( m-i+1, n-i+1, nb, a( i, i ), lda, d( i ), e( i ),tauq( i ), & taup( i ), work, ldwrkx,work( ldwrkx*nb+1 ), ldwrky ) ! update the trailing submatrix a(i+ib:m,i+ib:n), using ! an update of the form a := a - v*y**h - x*u**h call stdlib${ii}$_${ci}$gemm( 'NO TRANSPOSE', 'CONJUGATE TRANSPOSE', m-i-nb+1,n-i-nb+1, nb, -& cone, a( i+nb, i ), lda,work( ldwrkx*nb+nb+1 ), ldwrky, cone,a( i+nb, i+nb ), lda ) call stdlib${ii}$_${ci}$gemm( 'NO TRANSPOSE', 'NO TRANSPOSE', m-i-nb+1, n-i-nb+1,nb, -cone, & work( nb+1 ), ldwrkx, a( i, i+nb ), lda,cone, a( i+nb, i+nb ), lda ) ! copy diagonal and off-diagonal elements of b back into a if( m>=n ) then do j = i, i + nb - 1 a( j, j ) = d( j ) a( j, j+1 ) = e( j ) end do else do j = i, i + nb - 1 a( j, j ) = d( j ) a( j+1, j ) = e( j ) end do end if end do ! use unblocked code to reduce the remainder of the matrix call stdlib${ii}$_${ci}$gebd2( m-i+1, n-i+1, a( i, i ), lda, d( i ), e( i ),tauq( i ), taup( i ), & work, iinfo ) work( 1_${ik}$ ) = ws return end subroutine stdlib${ii}$_${ci}$gebrd #:endif #:endfor pure module subroutine stdlib${ii}$_sgebd2( m, n, a, lda, d, e, tauq, taup, work, info ) !! SGEBD2 reduces a real general m by n matrix A to upper or lower !! bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, m, n ! Array Arguments real(sp), intent(inout) :: a(lda,*) real(sp), intent(out) :: d(*), e(*), taup(*), tauq(*), work(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i ! Intrinsic Functions ! Executable Statements ! test the input parameters info = 0_${ik}$ if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -4_${ik}$ end if if( info<0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SGEBD2', -info ) return end if if( m>=n ) then ! reduce to upper bidiagonal form do i = 1, n ! generate elementary reflector h(i) to annihilate a(i+1:m,i) call stdlib${ii}$_slarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tauq( i ) ) d( i ) = a( i, i ) a( i, i ) = one ! apply h(i) to a(i:m,i+1:n) from the left if( i<n )call stdlib${ii}$_slarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tauq( i ),a( i, i+& 1_${ik}$ ), lda, work ) a( i, i ) = d( i ) if( i<n ) then ! generate elementary reflector g(i) to annihilate ! a(i,i+2:n) call stdlib${ii}$_slarfg( n-i, a( i, i+1 ), a( i, min( i+2, n ) ),lda, taup( i ) ) e( i ) = a( i, i+1 ) a( i, i+1 ) = one ! apply g(i) to a(i+1:m,i+1:n) from the right call stdlib${ii}$_slarf( 'RIGHT', m-i, n-i, a( i, i+1 ), lda,taup( i ), a( i+1, i+1 & ), lda, work ) a( i, i+1 ) = e( i ) else taup( i ) = zero end if end do else ! reduce to lower bidiagonal form do i = 1, m ! generate elementary reflector g(i) to annihilate a(i,i+1:n) call stdlib${ii}$_slarfg( n-i+1, a( i, i ), a( i, min( i+1, n ) ), lda,taup( i ) ) d( i ) = a( i, i ) a( i, i ) = one ! apply g(i) to a(i+1:m,i:n) from the right if( i<m )call stdlib${ii}$_slarf( 'RIGHT', m-i, n-i+1, a( i, i ), lda,taup( i ), a( i+& 1_${ik}$, i ), lda, work ) a( i, i ) = d( i ) if( i<m ) then ! generate elementary reflector h(i) to annihilate ! a(i+2:m,i) call stdlib${ii}$_slarfg( m-i, a( i+1, i ), a( min( i+2, m ), i ), 1_${ik}$,tauq( i ) ) e( i ) = a( i+1, i ) a( i+1, i ) = one ! apply h(i) to a(i+1:m,i+1:n) from the left call stdlib${ii}$_slarf( 'LEFT', m-i, n-i, a( i+1, i ), 1_${ik}$, tauq( i ),a( i+1, i+1 ), & lda, work ) a( i+1, i ) = e( i ) else tauq( i ) = zero end if end do end if return end subroutine stdlib${ii}$_sgebd2 pure module subroutine stdlib${ii}$_dgebd2( m, n, a, lda, d, e, tauq, taup, work, info ) !! DGEBD2 reduces a real general m by n matrix A to upper or lower !! bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, m, n ! Array Arguments real(dp), intent(inout) :: a(lda,*) real(dp), intent(out) :: d(*), e(*), taup(*), tauq(*), work(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i ! Intrinsic Functions ! Executable Statements ! test the input parameters info = 0_${ik}$ if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -4_${ik}$ end if if( info<0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGEBD2', -info ) return end if if( m>=n ) then ! reduce to upper bidiagonal form do i = 1, n ! generate elementary reflector h(i) to annihilate a(i+1:m,i) call stdlib${ii}$_dlarfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tauq( i ) ) d( i ) = a( i, i ) a( i, i ) = one ! apply h(i) to a(i:m,i+1:n) from the left if( i<n )call stdlib${ii}$_dlarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tauq( i ),a( i, i+& 1_${ik}$ ), lda, work ) a( i, i ) = d( i ) if( i<n ) then ! generate elementary reflector g(i) to annihilate ! a(i,i+2:n) call stdlib${ii}$_dlarfg( n-i, a( i, i+1 ), a( i, min( i+2, n ) ),lda, taup( i ) ) e( i ) = a( i, i+1 ) a( i, i+1 ) = one ! apply g(i) to a(i+1:m,i+1:n) from the right call stdlib${ii}$_dlarf( 'RIGHT', m-i, n-i, a( i, i+1 ), lda,taup( i ), a( i+1, i+1 & ), lda, work ) a( i, i+1 ) = e( i ) else taup( i ) = zero end if end do else ! reduce to lower bidiagonal form do i = 1, m ! generate elementary reflector g(i) to annihilate a(i,i+1:n) call stdlib${ii}$_dlarfg( n-i+1, a( i, i ), a( i, min( i+1, n ) ), lda,taup( i ) ) d( i ) = a( i, i ) a( i, i ) = one ! apply g(i) to a(i+1:m,i:n) from the right if( i<m )call stdlib${ii}$_dlarf( 'RIGHT', m-i, n-i+1, a( i, i ), lda,taup( i ), a( i+& 1_${ik}$, i ), lda, work ) a( i, i ) = d( i ) if( i<m ) then ! generate elementary reflector h(i) to annihilate ! a(i+2:m,i) call stdlib${ii}$_dlarfg( m-i, a( i+1, i ), a( min( i+2, m ), i ), 1_${ik}$,tauq( i ) ) e( i ) = a( i+1, i ) a( i+1, i ) = one ! apply h(i) to a(i+1:m,i+1:n) from the left call stdlib${ii}$_dlarf( 'LEFT', m-i, n-i, a( i+1, i ), 1_${ik}$, tauq( i ),a( i+1, i+1 ), & lda, work ) a( i+1, i ) = e( i ) else tauq( i ) = zero end if end do end if return end subroutine stdlib${ii}$_dgebd2 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$gebd2( m, n, a, lda, d, e, tauq, taup, work, info ) !! DGEBD2: reduces a real general m by n matrix A to upper or lower !! bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, m, n ! Array Arguments real(${rk}$), intent(inout) :: a(lda,*) real(${rk}$), intent(out) :: d(*), e(*), taup(*), tauq(*), work(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i ! Intrinsic Functions ! Executable Statements ! test the input parameters info = 0_${ik}$ if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -4_${ik}$ end if if( info<0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGEBD2', -info ) return end if if( m>=n ) then ! reduce to upper bidiagonal form do i = 1, n ! generate elementary reflector h(i) to annihilate a(i+1:m,i) call stdlib${ii}$_${ri}$larfg( m-i+1, a( i, i ), a( min( i+1, m ), i ), 1_${ik}$,tauq( i ) ) d( i ) = a( i, i ) a( i, i ) = one ! apply h(i) to a(i:m,i+1:n) from the left if( i<n )call stdlib${ii}$_${ri}$larf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$, tauq( i ),a( i, i+& 1_${ik}$ ), lda, work ) a( i, i ) = d( i ) if( i<n ) then ! generate elementary reflector g(i) to annihilate ! a(i,i+2:n) call stdlib${ii}$_${ri}$larfg( n-i, a( i, i+1 ), a( i, min( i+2, n ) ),lda, taup( i ) ) e( i ) = a( i, i+1 ) a( i, i+1 ) = one ! apply g(i) to a(i+1:m,i+1:n) from the right call stdlib${ii}$_${ri}$larf( 'RIGHT', m-i, n-i, a( i, i+1 ), lda,taup( i ), a( i+1, i+1 & ), lda, work ) a( i, i+1 ) = e( i ) else taup( i ) = zero end if end do else ! reduce to lower bidiagonal form do i = 1, m ! generate elementary reflector g(i) to annihilate a(i,i+1:n) call stdlib${ii}$_${ri}$larfg( n-i+1, a( i, i ), a( i, min( i+1, n ) ), lda,taup( i ) ) d( i ) = a( i, i ) a( i, i ) = one ! apply g(i) to a(i+1:m,i:n) from the right if( i<m )call stdlib${ii}$_${ri}$larf( 'RIGHT', m-i, n-i+1, a( i, i ), lda,taup( i ), a( i+& 1_${ik}$, i ), lda, work ) a( i, i ) = d( i ) if( i<m ) then ! generate elementary reflector h(i) to annihilate ! a(i+2:m,i) call stdlib${ii}$_${ri}$larfg( m-i, a( i+1, i ), a( min( i+2, m ), i ), 1_${ik}$,tauq( i ) ) e( i ) = a( i+1, i ) a( i+1, i ) = one ! apply h(i) to a(i+1:m,i+1:n) from the left call stdlib${ii}$_${ri}$larf( 'LEFT', m-i, n-i, a( i+1, i ), 1_${ik}$, tauq( i ),a( i+1, i+1 ), & lda, work ) a( i+1, i ) = e( i ) else tauq( i ) = zero end if end do end if return end subroutine stdlib${ii}$_${ri}$gebd2 #:endif #:endfor pure module subroutine stdlib${ii}$_cgebd2( m, n, a, lda, d, e, tauq, taup, work, info ) !! CGEBD2 reduces a complex general m by n matrix A to upper or lower !! real bidiagonal form B by a unitary transformation: Q**H * A * P = B. !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, m, n ! Array Arguments real(sp), intent(out) :: d(*), e(*) complex(sp), intent(inout) :: a(lda,*) complex(sp), intent(out) :: taup(*), tauq(*), work(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i complex(sp) :: alpha ! Intrinsic Functions ! Executable Statements ! test the input parameters info = 0_${ik}$ if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -4_${ik}$ end if if( info<0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CGEBD2', -info ) return end if if( m>=n ) then ! reduce to upper bidiagonal form do i = 1, n ! generate elementary reflector h(i) to annihilate a(i+1:m,i) alpha = a( i, i ) call stdlib${ii}$_clarfg( m-i+1, alpha, a( min( i+1, m ), i ), 1_${ik}$,tauq( i ) ) d( i ) = real( alpha,KIND=sp) a( i, i ) = cone ! apply h(i)**h to a(i:m,i+1:n) from the left if( i<n )call stdlib${ii}$_clarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$,conjg( tauq( i ) ), & a( i, i+1 ), lda, work ) a( i, i ) = d( i ) if( i<n ) then ! generate elementary reflector g(i) to annihilate ! a(i,i+2:n) call stdlib${ii}$_clacgv( n-i, a( i, i+1 ), lda ) alpha = a( i, i+1 ) call stdlib${ii}$_clarfg( n-i, alpha, a( i, min( i+2, n ) ),lda, taup( i ) ) e( i ) = real( alpha,KIND=sp) a( i, i+1 ) = cone ! apply g(i) to a(i+1:m,i+1:n) from the right call stdlib${ii}$_clarf( 'RIGHT', m-i, n-i, a( i, i+1 ), lda,taup( i ), a( i+1, i+1 & ), lda, work ) call stdlib${ii}$_clacgv( n-i, a( i, i+1 ), lda ) a( i, i+1 ) = e( i ) else taup( i ) = czero end if end do else ! reduce to lower bidiagonal form do i = 1, m ! generate elementary reflector g(i) to annihilate a(i,i+1:n) call stdlib${ii}$_clacgv( n-i+1, a( i, i ), lda ) alpha = a( i, i ) call stdlib${ii}$_clarfg( n-i+1, alpha, a( i, min( i+1, n ) ), lda,taup( i ) ) d( i ) = real( alpha,KIND=sp) a( i, i ) = cone ! apply g(i) to a(i+1:m,i:n) from the right if( i<m )call stdlib${ii}$_clarf( 'RIGHT', m-i, n-i+1, a( i, i ), lda,taup( i ), a( i+& 1_${ik}$, i ), lda, work ) call stdlib${ii}$_clacgv( n-i+1, a( i, i ), lda ) a( i, i ) = d( i ) if( i<m ) then ! generate elementary reflector h(i) to annihilate ! a(i+2:m,i) alpha = a( i+1, i ) call stdlib${ii}$_clarfg( m-i, alpha, a( min( i+2, m ), i ), 1_${ik}$,tauq( i ) ) e( i ) = real( alpha,KIND=sp) a( i+1, i ) = cone ! apply h(i)**h to a(i+1:m,i+1:n) from the left call stdlib${ii}$_clarf( 'LEFT', m-i, n-i, a( i+1, i ), 1_${ik}$,conjg( tauq( i ) ), a( i+& 1_${ik}$, i+1 ), lda,work ) a( i+1, i ) = e( i ) else tauq( i ) = czero end if end do end if return end subroutine stdlib${ii}$_cgebd2 pure module subroutine stdlib${ii}$_zgebd2( m, n, a, lda, d, e, tauq, taup, work, info ) !! ZGEBD2 reduces a complex general m by n matrix A to upper or lower !! real bidiagonal form B by a unitary transformation: Q**H * A * P = B. !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, m, n ! Array Arguments real(dp), intent(out) :: d(*), e(*) complex(dp), intent(inout) :: a(lda,*) complex(dp), intent(out) :: taup(*), tauq(*), work(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i complex(dp) :: alpha ! Intrinsic Functions ! Executable Statements ! test the input parameters info = 0_${ik}$ if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -4_${ik}$ end if if( info<0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGEBD2', -info ) return end if if( m>=n ) then ! reduce to upper bidiagonal form do i = 1, n ! generate elementary reflector h(i) to annihilate a(i+1:m,i) alpha = a( i, i ) call stdlib${ii}$_zlarfg( m-i+1, alpha, a( min( i+1, m ), i ), 1_${ik}$,tauq( i ) ) d( i ) = real( alpha,KIND=dp) a( i, i ) = cone ! apply h(i)**h to a(i:m,i+1:n) from the left if( i<n )call stdlib${ii}$_zlarf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$,conjg( tauq( i ) ), & a( i, i+1 ), lda, work ) a( i, i ) = d( i ) if( i<n ) then ! generate elementary reflector g(i) to annihilate ! a(i,i+2:n) call stdlib${ii}$_zlacgv( n-i, a( i, i+1 ), lda ) alpha = a( i, i+1 ) call stdlib${ii}$_zlarfg( n-i, alpha, a( i, min( i+2, n ) ), lda,taup( i ) ) e( i ) = real( alpha,KIND=dp) a( i, i+1 ) = cone ! apply g(i) to a(i+1:m,i+1:n) from the right call stdlib${ii}$_zlarf( 'RIGHT', m-i, n-i, a( i, i+1 ), lda,taup( i ), a( i+1, i+1 & ), lda, work ) call stdlib${ii}$_zlacgv( n-i, a( i, i+1 ), lda ) a( i, i+1 ) = e( i ) else taup( i ) = czero end if end do else ! reduce to lower bidiagonal form do i = 1, m ! generate elementary reflector g(i) to annihilate a(i,i+1:n) call stdlib${ii}$_zlacgv( n-i+1, a( i, i ), lda ) alpha = a( i, i ) call stdlib${ii}$_zlarfg( n-i+1, alpha, a( i, min( i+1, n ) ), lda,taup( i ) ) d( i ) = real( alpha,KIND=dp) a( i, i ) = cone ! apply g(i) to a(i+1:m,i:n) from the right if( i<m )call stdlib${ii}$_zlarf( 'RIGHT', m-i, n-i+1, a( i, i ), lda,taup( i ), a( i+& 1_${ik}$, i ), lda, work ) call stdlib${ii}$_zlacgv( n-i+1, a( i, i ), lda ) a( i, i ) = d( i ) if( i<m ) then ! generate elementary reflector h(i) to annihilate ! a(i+2:m,i) alpha = a( i+1, i ) call stdlib${ii}$_zlarfg( m-i, alpha, a( min( i+2, m ), i ), 1_${ik}$,tauq( i ) ) e( i ) = real( alpha,KIND=dp) a( i+1, i ) = cone ! apply h(i)**h to a(i+1:m,i+1:n) from the left call stdlib${ii}$_zlarf( 'LEFT', m-i, n-i, a( i+1, i ), 1_${ik}$,conjg( tauq( i ) ), a( i+& 1_${ik}$, i+1 ), lda,work ) a( i+1, i ) = e( i ) else tauq( i ) = czero end if end do end if return end subroutine stdlib${ii}$_zgebd2 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$gebd2( m, n, a, lda, d, e, tauq, taup, work, info ) !! ZGEBD2: reduces a complex general m by n matrix A to upper or lower !! real bidiagonal form B by a unitary transformation: Q**H * A * P = B. !! If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, m, n ! Array Arguments real(${ck}$), intent(out) :: d(*), e(*) complex(${ck}$), intent(inout) :: a(lda,*) complex(${ck}$), intent(out) :: taup(*), tauq(*), work(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: i complex(${ck}$) :: alpha ! Intrinsic Functions ! Executable Statements ! test the input parameters info = 0_${ik}$ if( m<0_${ik}$ ) then info = -1_${ik}$ else if( n<0_${ik}$ ) then info = -2_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -4_${ik}$ end if if( info<0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGEBD2', -info ) return end if if( m>=n ) then ! reduce to upper bidiagonal form do i = 1, n ! generate elementary reflector h(i) to annihilate a(i+1:m,i) alpha = a( i, i ) call stdlib${ii}$_${ci}$larfg( m-i+1, alpha, a( min( i+1, m ), i ), 1_${ik}$,tauq( i ) ) d( i ) = real( alpha,KIND=${ck}$) a( i, i ) = cone ! apply h(i)**h to a(i:m,i+1:n) from the left if( i<n )call stdlib${ii}$_${ci}$larf( 'LEFT', m-i+1, n-i, a( i, i ), 1_${ik}$,conjg( tauq( i ) ), & a( i, i+1 ), lda, work ) a( i, i ) = d( i ) if( i<n ) then ! generate elementary reflector g(i) to annihilate ! a(i,i+2:n) call stdlib${ii}$_${ci}$lacgv( n-i, a( i, i+1 ), lda ) alpha = a( i, i+1 ) call stdlib${ii}$_${ci}$larfg( n-i, alpha, a( i, min( i+2, n ) ), lda,taup( i ) ) e( i ) = real( alpha,KIND=${ck}$) a( i, i+1 ) = cone ! apply g(i) to a(i+1:m,i+1:n) from the right call stdlib${ii}$_${ci}$larf( 'RIGHT', m-i, n-i, a( i, i+1 ), lda,taup( i ), a( i+1, i+1 & ), lda, work ) call stdlib${ii}$_${ci}$lacgv( n-i, a( i, i+1 ), lda ) a( i, i+1 ) = e( i ) else taup( i ) = czero end if end do else ! reduce to lower bidiagonal form do i = 1, m ! generate elementary reflector g(i) to annihilate a(i,i+1:n) call stdlib${ii}$_${ci}$lacgv( n-i+1, a( i, i ), lda ) alpha = a( i, i ) call stdlib${ii}$_${ci}$larfg( n-i+1, alpha, a( i, min( i+1, n ) ), lda,taup( i ) ) d( i ) = real( alpha,KIND=${ck}$) a( i, i ) = cone ! apply g(i) to a(i+1:m,i:n) from the right if( i<m )call stdlib${ii}$_${ci}$larf( 'RIGHT', m-i, n-i+1, a( i, i ), lda,taup( i ), a( i+& 1_${ik}$, i ), lda, work ) call stdlib${ii}$_${ci}$lacgv( n-i+1, a( i, i ), lda ) a( i, i ) = d( i ) if( i<m ) then ! generate elementary reflector h(i) to annihilate ! a(i+2:m,i) alpha = a( i+1, i ) call stdlib${ii}$_${ci}$larfg( m-i, alpha, a( min( i+2, m ), i ), 1_${ik}$,tauq( i ) ) e( i ) = real( alpha,KIND=${ck}$) a( i+1, i ) = cone ! apply h(i)**h to a(i+1:m,i+1:n) from the left call stdlib${ii}$_${ci}$larf( 'LEFT', m-i, n-i, a( i+1, i ), 1_${ik}$,conjg( tauq( i ) ), a( i+& 1_${ik}$, i+1 ), lda,work ) a( i+1, i ) = e( i ) else tauq( i ) = czero end if end do end if return end subroutine stdlib${ii}$_${ci}$gebd2 #:endif #:endfor pure module subroutine stdlib${ii}$_sgbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, c, & !! SGBBRD reduces a real general m-by-n band matrix A to upper !! bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. !! The routine computes B, and optionally forms Q or P**T, or computes !! Q**T*C for a given matrix C. ldc, work, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: vect integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: kl, ku, ldab, ldc, ldpt, ldq, m, n, ncc ! Array Arguments real(sp), intent(inout) :: ab(ldab,*), c(ldc,*) real(sp), intent(out) :: d(*), e(*), pt(ldpt,*), q(ldq,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: wantb, wantc, wantpt, wantq integer(${ik}$) :: i, inca, j, j1, j2, kb, kb1, kk, klm, klu1, kun, l, minmn, ml, ml0, mn,& mu, mu0, nr, nrt real(sp) :: ra, rb, rc, rs ! Intrinsic Functions ! Executable Statements ! test the input parameters wantb = stdlib_lsame( vect, 'B' ) wantq = stdlib_lsame( vect, 'Q' ) .or. wantb wantpt = stdlib_lsame( vect, 'P' ) .or. wantb wantc = ncc>0_${ik}$ klu1 = kl + ku + 1_${ik}$ info = 0_${ik}$ if( .not.wantq .and. .not.wantpt .and. .not.stdlib_lsame( vect, 'N' ) )then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( ncc<0_${ik}$ ) then info = -4_${ik}$ else if( kl<0_${ik}$ ) then info = -5_${ik}$ else if( ku<0_${ik}$ ) then info = -6_${ik}$ else if( ldab<klu1 ) then info = -8_${ik}$ else if( ldq<1_${ik}$ .or. wantq .and. ldq<max( 1_${ik}$, m ) ) then info = -12_${ik}$ else if( ldpt<1_${ik}$ .or. wantpt .and. ldpt<max( 1_${ik}$, n ) ) then info = -14_${ik}$ else if( ldc<1_${ik}$ .or. wantc .and. ldc<max( 1_${ik}$, m ) ) then info = -16_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SGBBRD', -info ) return end if ! initialize q and p**t to the unit matrix, if needed if( wantq )call stdlib${ii}$_slaset( 'FULL', m, m, zero, one, q, ldq ) if( wantpt )call stdlib${ii}$_slaset( 'FULL', n, n, zero, one, pt, ldpt ) ! quick return if possible. if( m==0 .or. n==0 )return minmn = min( m, n ) if( kl+ku>1_${ik}$ ) then ! reduce to upper bidiagonal form if ku > 0; if ku = 0, reduce ! first to lower bidiagonal form and then transform to upper ! bidiagonal if( ku>0_${ik}$ ) then ml0 = 1_${ik}$ mu0 = 2_${ik}$ else ml0 = 2_${ik}$ mu0 = 1_${ik}$ end if ! wherever possible, plane rotations are generated and applied in ! vector operations of length nr over the index set j1:j2:klu1. ! the sines of the plane rotations are stored in work(1:max(m,n)) ! and the cosines in work(max(m,n)+1:2*max(m,n)). mn = max( m, n ) klm = min( m-1, kl ) kun = min( n-1, ku ) kb = klm + kun kb1 = kb + 1_${ik}$ inca = kb1*ldab nr = 0_${ik}$ j1 = klm + 2_${ik}$ j2 = 1_${ik}$ - kun loop_90: do i = 1, minmn ! reduce i-th column and i-th row of matrix to bidiagonal form ml = klm + 1_${ik}$ mu = kun + 1_${ik}$ loop_80: do kk = 1, kb j1 = j1 + kb j2 = j2 + kb ! generate plane rotations to annihilate nonzero elements ! which have been created below the band if( nr>0_${ik}$ )call stdlib${ii}$_slargv( nr, ab( klu1, j1-klm-1 ), inca,work( j1 ), kb1, & work( mn+j1 ), kb1 ) ! apply plane rotations from the left do l = 1, kb if( j2-klm+l-1>n ) then nrt = nr - 1_${ik}$ else nrt = nr end if if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( klu1-l, j1-klm+l-1 ), inca,ab( & klu1-l+1, j1-klm+l-1 ), inca,work( mn+j1 ), work( j1 ), kb1 ) end do if( ml>ml0 ) then if( ml<=m-i+1 ) then ! generate plane rotation to annihilate a(i+ml-1,i) ! within the band, and apply rotation from the left call stdlib${ii}$_slartg( ab( ku+ml-1, i ), ab( ku+ml, i ),work( mn+i+ml-1 ), & work( i+ml-1 ),ra ) ab( ku+ml-1, i ) = ra if( i<n )call stdlib${ii}$_srot( min( ku+ml-2, n-i ),ab( ku+ml-2, i+1 ), ldab-& 1_${ik}$,ab( ku+ml-1, i+1 ), ldab-1,work( mn+i+ml-1 ), work( i+ml-1 ) ) end if nr = nr + 1_${ik}$ j1 = j1 - kb1 end if if( wantq ) then ! accumulate product of plane rotations in q do j = j1, j2, kb1 call stdlib${ii}$_srot( m, q( 1_${ik}$, j-1 ), 1_${ik}$, q( 1_${ik}$, j ), 1_${ik}$,work( mn+j ), work( j & ) ) end do end if if( wantc ) then ! apply plane rotations to c do j = j1, j2, kb1 call stdlib${ii}$_srot( ncc, c( j-1, 1_${ik}$ ), ldc, c( j, 1_${ik}$ ), ldc,work( mn+j ), & work( j ) ) end do end if if( j2+kun>n ) then ! adjust j2 to keep within the bounds of the matrix nr = nr - 1_${ik}$ j2 = j2 - kb1 end if do j = j1, j2, kb1 ! create nonzero element a(j-1,j+ku) above the band ! and store it in work(n+1:2*n) work( j+kun ) = work( j )*ab( 1_${ik}$, j+kun ) ab( 1_${ik}$, j+kun ) = work( mn+j )*ab( 1_${ik}$, j+kun ) end do ! generate plane rotations to annihilate nonzero elements ! which have been generated above the band if( nr>0_${ik}$ )call stdlib${ii}$_slargv( nr, ab( 1_${ik}$, j1+kun-1 ), inca,work( j1+kun ), kb1,& work( mn+j1+kun ),kb1 ) ! apply plane rotations from the right do l = 1, kb if( j2+l-1>m ) then nrt = nr - 1_${ik}$ else nrt = nr end if if( nrt>0_${ik}$ )call stdlib${ii}$_slartv( nrt, ab( l+1, j1+kun-1 ), inca,ab( l, j1+& kun ), inca,work( mn+j1+kun ), work( j1+kun ),kb1 ) end do if( ml==ml0 .and. mu>mu0 ) then if( mu<=n-i+1 ) then ! generate plane rotation to annihilate a(i,i+mu-1) ! within the band, and apply rotation from the right call stdlib${ii}$_slartg( ab( ku-mu+3, i+mu-2 ),ab( ku-mu+2, i+mu-1 ),work( & mn+i+mu-1 ), work( i+mu-1 ),ra ) ab( ku-mu+3, i+mu-2 ) = ra call stdlib${ii}$_srot( min( kl+mu-2, m-i ),ab( ku-mu+4, i+mu-2 ), 1_${ik}$,ab( ku-& mu+3, i+mu-1 ), 1_${ik}$,work( mn+i+mu-1 ), work( i+mu-1 ) ) end if nr = nr + 1_${ik}$ j1 = j1 - kb1 end if if( wantpt ) then ! accumulate product of plane rotations in p**t do j = j1, j2, kb1 call stdlib${ii}$_srot( n, pt( j+kun-1, 1_${ik}$ ), ldpt,pt( j+kun, 1_${ik}$ ), ldpt, work( & mn+j+kun ),work( j+kun ) ) end do end if if( j2+kb>m ) then ! adjust j2 to keep within the bounds of the matrix nr = nr - 1_${ik}$ j2 = j2 - kb1 end if do j = j1, j2, kb1 ! create nonzero element a(j+kl+ku,j+ku-1) below the ! band and store it in work(1:n) work( j+kb ) = work( j+kun )*ab( klu1, j+kun ) ab( klu1, j+kun ) = work( mn+j+kun )*ab( klu1, j+kun ) end do if( ml>ml0 ) then ml = ml - 1_${ik}$ else mu = mu - 1_${ik}$ end if end do loop_80 end do loop_90 end if if( ku==0_${ik}$ .and. kl>0_${ik}$ ) then ! a has been reduced to lower bidiagonal form ! transform lower bidiagonal form to upper bidiagonal by applying ! plane rotations from the left, storing diagonal elements in d ! and off-diagonal elements in e do i = 1, min( m-1, n ) call stdlib${ii}$_slartg( ab( 1_${ik}$, i ), ab( 2_${ik}$, i ), rc, rs, ra ) d( i ) = ra if( i<n ) then e( i ) = rs*ab( 1_${ik}$, i+1 ) ab( 1_${ik}$, i+1 ) = rc*ab( 1_${ik}$, i+1 ) end if if( wantq )call stdlib${ii}$_srot( m, q( 1_${ik}$, i ), 1_${ik}$, q( 1_${ik}$, i+1 ), 1_${ik}$, rc, rs ) if( wantc )call stdlib${ii}$_srot( ncc, c( i, 1_${ik}$ ), ldc, c( i+1, 1_${ik}$ ), ldc, rc,rs ) end do if( m<=n )d( m ) = ab( 1_${ik}$, m ) else if( ku>0_${ik}$ ) then ! a has been reduced to upper bidiagonal form if( m<n ) then ! annihilate a(m,m+1) by applying plane rotations from the ! right, storing diagonal elements in d and off-diagonal ! elements in e rb = ab( ku, m+1 ) do i = m, 1, -1 call stdlib${ii}$_slartg( ab( ku+1, i ), rb, rc, rs, ra ) d( i ) = ra if( i>1_${ik}$ ) then rb = -rs*ab( ku, i ) e( i-1 ) = rc*ab( ku, i ) end if if( wantpt )call stdlib${ii}$_srot( n, pt( i, 1_${ik}$ ), ldpt, pt( m+1, 1_${ik}$ ), ldpt,rc, rs ) end do else ! copy off-diagonal elements to e and diagonal elements to d do i = 1, minmn - 1 e( i ) = ab( ku, i+1 ) end do do i = 1, minmn d( i ) = ab( ku+1, i ) end do end if else ! a is diagonal. set elements of e to zero and copy diagonal ! elements to d. do i = 1, minmn - 1 e( i ) = zero end do do i = 1, minmn d( i ) = ab( 1_${ik}$, i ) end do end if return end subroutine stdlib${ii}$_sgbbrd pure module subroutine stdlib${ii}$_dgbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, c, & !! DGBBRD reduces a real general m-by-n band matrix A to upper !! bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. !! The routine computes B, and optionally forms Q or P**T, or computes !! Q**T*C for a given matrix C. ldc, work, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: vect integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: kl, ku, ldab, ldc, ldpt, ldq, m, n, ncc ! Array Arguments real(dp), intent(inout) :: ab(ldab,*), c(ldc,*) real(dp), intent(out) :: d(*), e(*), pt(ldpt,*), q(ldq,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: wantb, wantc, wantpt, wantq integer(${ik}$) :: i, inca, j, j1, j2, kb, kb1, kk, klm, klu1, kun, l, minmn, ml, ml0, mn,& mu, mu0, nr, nrt real(dp) :: ra, rb, rc, rs ! Intrinsic Functions ! Executable Statements ! test the input parameters wantb = stdlib_lsame( vect, 'B' ) wantq = stdlib_lsame( vect, 'Q' ) .or. wantb wantpt = stdlib_lsame( vect, 'P' ) .or. wantb wantc = ncc>0_${ik}$ klu1 = kl + ku + 1_${ik}$ info = 0_${ik}$ if( .not.wantq .and. .not.wantpt .and. .not.stdlib_lsame( vect, 'N' ) )then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( ncc<0_${ik}$ ) then info = -4_${ik}$ else if( kl<0_${ik}$ ) then info = -5_${ik}$ else if( ku<0_${ik}$ ) then info = -6_${ik}$ else if( ldab<klu1 ) then info = -8_${ik}$ else if( ldq<1_${ik}$ .or. wantq .and. ldq<max( 1_${ik}$, m ) ) then info = -12_${ik}$ else if( ldpt<1_${ik}$ .or. wantpt .and. ldpt<max( 1_${ik}$, n ) ) then info = -14_${ik}$ else if( ldc<1_${ik}$ .or. wantc .and. ldc<max( 1_${ik}$, m ) ) then info = -16_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGBBRD', -info ) return end if ! initialize q and p**t to the unit matrix, if needed if( wantq )call stdlib${ii}$_dlaset( 'FULL', m, m, zero, one, q, ldq ) if( wantpt )call stdlib${ii}$_dlaset( 'FULL', n, n, zero, one, pt, ldpt ) ! quick return if possible. if( m==0 .or. n==0 )return minmn = min( m, n ) if( kl+ku>1_${ik}$ ) then ! reduce to upper bidiagonal form if ku > 0; if ku = 0, reduce ! first to lower bidiagonal form and then transform to upper ! bidiagonal if( ku>0_${ik}$ ) then ml0 = 1_${ik}$ mu0 = 2_${ik}$ else ml0 = 2_${ik}$ mu0 = 1_${ik}$ end if ! wherever possible, plane rotations are generated and applied in ! vector operations of length nr over the index set j1:j2:klu1. ! the sines of the plane rotations are stored in work(1:max(m,n)) ! and the cosines in work(max(m,n)+1:2*max(m,n)). mn = max( m, n ) klm = min( m-1, kl ) kun = min( n-1, ku ) kb = klm + kun kb1 = kb + 1_${ik}$ inca = kb1*ldab nr = 0_${ik}$ j1 = klm + 2_${ik}$ j2 = 1_${ik}$ - kun loop_90: do i = 1, minmn ! reduce i-th column and i-th row of matrix to bidiagonal form ml = klm + 1_${ik}$ mu = kun + 1_${ik}$ loop_80: do kk = 1, kb j1 = j1 + kb j2 = j2 + kb ! generate plane rotations to annihilate nonzero elements ! which have been created below the band if( nr>0_${ik}$ )call stdlib${ii}$_dlargv( nr, ab( klu1, j1-klm-1 ), inca,work( j1 ), kb1, & work( mn+j1 ), kb1 ) ! apply plane rotations from the left do l = 1, kb if( j2-klm+l-1>n ) then nrt = nr - 1_${ik}$ else nrt = nr end if if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( klu1-l, j1-klm+l-1 ), inca,ab( & klu1-l+1, j1-klm+l-1 ), inca,work( mn+j1 ), work( j1 ), kb1 ) end do if( ml>ml0 ) then if( ml<=m-i+1 ) then ! generate plane rotation to annihilate a(i+ml-1,i) ! within the band, and apply rotation from the left call stdlib${ii}$_dlartg( ab( ku+ml-1, i ), ab( ku+ml, i ),work( mn+i+ml-1 ), & work( i+ml-1 ),ra ) ab( ku+ml-1, i ) = ra if( i<n )call stdlib${ii}$_drot( min( ku+ml-2, n-i ),ab( ku+ml-2, i+1 ), ldab-& 1_${ik}$,ab( ku+ml-1, i+1 ), ldab-1,work( mn+i+ml-1 ), work( i+ml-1 ) ) end if nr = nr + 1_${ik}$ j1 = j1 - kb1 end if if( wantq ) then ! accumulate product of plane rotations in q do j = j1, j2, kb1 call stdlib${ii}$_drot( m, q( 1_${ik}$, j-1 ), 1_${ik}$, q( 1_${ik}$, j ), 1_${ik}$,work( mn+j ), work( j & ) ) end do end if if( wantc ) then ! apply plane rotations to c do j = j1, j2, kb1 call stdlib${ii}$_drot( ncc, c( j-1, 1_${ik}$ ), ldc, c( j, 1_${ik}$ ), ldc,work( mn+j ), & work( j ) ) end do end if if( j2+kun>n ) then ! adjust j2 to keep within the bounds of the matrix nr = nr - 1_${ik}$ j2 = j2 - kb1 end if do j = j1, j2, kb1 ! create nonzero element a(j-1,j+ku) above the band ! and store it in work(n+1:2*n) work( j+kun ) = work( j )*ab( 1_${ik}$, j+kun ) ab( 1_${ik}$, j+kun ) = work( mn+j )*ab( 1_${ik}$, j+kun ) end do ! generate plane rotations to annihilate nonzero elements ! which have been generated above the band if( nr>0_${ik}$ )call stdlib${ii}$_dlargv( nr, ab( 1_${ik}$, j1+kun-1 ), inca,work( j1+kun ), kb1,& work( mn+j1+kun ),kb1 ) ! apply plane rotations from the right do l = 1, kb if( j2+l-1>m ) then nrt = nr - 1_${ik}$ else nrt = nr end if if( nrt>0_${ik}$ )call stdlib${ii}$_dlartv( nrt, ab( l+1, j1+kun-1 ), inca,ab( l, j1+& kun ), inca,work( mn+j1+kun ), work( j1+kun ),kb1 ) end do if( ml==ml0 .and. mu>mu0 ) then if( mu<=n-i+1 ) then ! generate plane rotation to annihilate a(i,i+mu-1) ! within the band, and apply rotation from the right call stdlib${ii}$_dlartg( ab( ku-mu+3, i+mu-2 ),ab( ku-mu+2, i+mu-1 ),work( & mn+i+mu-1 ), work( i+mu-1 ),ra ) ab( ku-mu+3, i+mu-2 ) = ra call stdlib${ii}$_drot( min( kl+mu-2, m-i ),ab( ku-mu+4, i+mu-2 ), 1_${ik}$,ab( ku-& mu+3, i+mu-1 ), 1_${ik}$,work( mn+i+mu-1 ), work( i+mu-1 ) ) end if nr = nr + 1_${ik}$ j1 = j1 - kb1 end if if( wantpt ) then ! accumulate product of plane rotations in p**t do j = j1, j2, kb1 call stdlib${ii}$_drot( n, pt( j+kun-1, 1_${ik}$ ), ldpt,pt( j+kun, 1_${ik}$ ), ldpt, work( & mn+j+kun ),work( j+kun ) ) end do end if if( j2+kb>m ) then ! adjust j2 to keep within the bounds of the matrix nr = nr - 1_${ik}$ j2 = j2 - kb1 end if do j = j1, j2, kb1 ! create nonzero element a(j+kl+ku,j+ku-1) below the ! band and store it in work(1:n) work( j+kb ) = work( j+kun )*ab( klu1, j+kun ) ab( klu1, j+kun ) = work( mn+j+kun )*ab( klu1, j+kun ) end do if( ml>ml0 ) then ml = ml - 1_${ik}$ else mu = mu - 1_${ik}$ end if end do loop_80 end do loop_90 end if if( ku==0_${ik}$ .and. kl>0_${ik}$ ) then ! a has been reduced to lower bidiagonal form ! transform lower bidiagonal form to upper bidiagonal by applying ! plane rotations from the left, storing diagonal elements in d ! and off-diagonal elements in e do i = 1, min( m-1, n ) call stdlib${ii}$_dlartg( ab( 1_${ik}$, i ), ab( 2_${ik}$, i ), rc, rs, ra ) d( i ) = ra if( i<n ) then e( i ) = rs*ab( 1_${ik}$, i+1 ) ab( 1_${ik}$, i+1 ) = rc*ab( 1_${ik}$, i+1 ) end if if( wantq )call stdlib${ii}$_drot( m, q( 1_${ik}$, i ), 1_${ik}$, q( 1_${ik}$, i+1 ), 1_${ik}$, rc, rs ) if( wantc )call stdlib${ii}$_drot( ncc, c( i, 1_${ik}$ ), ldc, c( i+1, 1_${ik}$ ), ldc, rc,rs ) end do if( m<=n )d( m ) = ab( 1_${ik}$, m ) else if( ku>0_${ik}$ ) then ! a has been reduced to upper bidiagonal form if( m<n ) then ! annihilate a(m,m+1) by applying plane rotations from the ! right, storing diagonal elements in d and off-diagonal ! elements in e rb = ab( ku, m+1 ) do i = m, 1, -1 call stdlib${ii}$_dlartg( ab( ku+1, i ), rb, rc, rs, ra ) d( i ) = ra if( i>1_${ik}$ ) then rb = -rs*ab( ku, i ) e( i-1 ) = rc*ab( ku, i ) end if if( wantpt )call stdlib${ii}$_drot( n, pt( i, 1_${ik}$ ), ldpt, pt( m+1, 1_${ik}$ ), ldpt,rc, rs ) end do else ! copy off-diagonal elements to e and diagonal elements to d do i = 1, minmn - 1 e( i ) = ab( ku, i+1 ) end do do i = 1, minmn d( i ) = ab( ku+1, i ) end do end if else ! a is diagonal. set elements of e to zero and copy diagonal ! elements to d. do i = 1, minmn - 1 e( i ) = zero end do do i = 1, minmn d( i ) = ab( 1_${ik}$, i ) end do end if return end subroutine stdlib${ii}$_dgbbrd #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$gbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, c, & !! DGBBRD: reduces a real general m-by-n band matrix A to upper !! bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. !! The routine computes B, and optionally forms Q or P**T, or computes !! Q**T*C for a given matrix C. ldc, work, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: vect integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: kl, ku, ldab, ldc, ldpt, ldq, m, n, ncc ! Array Arguments real(${rk}$), intent(inout) :: ab(ldab,*), c(ldc,*) real(${rk}$), intent(out) :: d(*), e(*), pt(ldpt,*), q(ldq,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: wantb, wantc, wantpt, wantq integer(${ik}$) :: i, inca, j, j1, j2, kb, kb1, kk, klm, klu1, kun, l, minmn, ml, ml0, mn,& mu, mu0, nr, nrt real(${rk}$) :: ra, rb, rc, rs ! Intrinsic Functions ! Executable Statements ! test the input parameters wantb = stdlib_lsame( vect, 'B' ) wantq = stdlib_lsame( vect, 'Q' ) .or. wantb wantpt = stdlib_lsame( vect, 'P' ) .or. wantb wantc = ncc>0_${ik}$ klu1 = kl + ku + 1_${ik}$ info = 0_${ik}$ if( .not.wantq .and. .not.wantpt .and. .not.stdlib_lsame( vect, 'N' ) )then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( ncc<0_${ik}$ ) then info = -4_${ik}$ else if( kl<0_${ik}$ ) then info = -5_${ik}$ else if( ku<0_${ik}$ ) then info = -6_${ik}$ else if( ldab<klu1 ) then info = -8_${ik}$ else if( ldq<1_${ik}$ .or. wantq .and. ldq<max( 1_${ik}$, m ) ) then info = -12_${ik}$ else if( ldpt<1_${ik}$ .or. wantpt .and. ldpt<max( 1_${ik}$, n ) ) then info = -14_${ik}$ else if( ldc<1_${ik}$ .or. wantc .and. ldc<max( 1_${ik}$, m ) ) then info = -16_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGBBRD', -info ) return end if ! initialize q and p**t to the unit matrix, if needed if( wantq )call stdlib${ii}$_${ri}$laset( 'FULL', m, m, zero, one, q, ldq ) if( wantpt )call stdlib${ii}$_${ri}$laset( 'FULL', n, n, zero, one, pt, ldpt ) ! quick return if possible. if( m==0 .or. n==0 )return minmn = min( m, n ) if( kl+ku>1_${ik}$ ) then ! reduce to upper bidiagonal form if ku > 0; if ku = 0, reduce ! first to lower bidiagonal form and then transform to upper ! bidiagonal if( ku>0_${ik}$ ) then ml0 = 1_${ik}$ mu0 = 2_${ik}$ else ml0 = 2_${ik}$ mu0 = 1_${ik}$ end if ! wherever possible, plane rotations are generated and applied in ! vector operations of length nr over the index set j1:j2:klu1. ! the sines of the plane rotations are stored in work(1:max(m,n)) ! and the cosines in work(max(m,n)+1:2*max(m,n)). mn = max( m, n ) klm = min( m-1, kl ) kun = min( n-1, ku ) kb = klm + kun kb1 = kb + 1_${ik}$ inca = kb1*ldab nr = 0_${ik}$ j1 = klm + 2_${ik}$ j2 = 1_${ik}$ - kun loop_90: do i = 1, minmn ! reduce i-th column and i-th row of matrix to bidiagonal form ml = klm + 1_${ik}$ mu = kun + 1_${ik}$ loop_80: do kk = 1, kb j1 = j1 + kb j2 = j2 + kb ! generate plane rotations to annihilate nonzero elements ! which have been created below the band if( nr>0_${ik}$ )call stdlib${ii}$_${ri}$largv( nr, ab( klu1, j1-klm-1 ), inca,work( j1 ), kb1, & work( mn+j1 ), kb1 ) ! apply plane rotations from the left do l = 1, kb if( j2-klm+l-1>n ) then nrt = nr - 1_${ik}$ else nrt = nr end if if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( klu1-l, j1-klm+l-1 ), inca,ab( & klu1-l+1, j1-klm+l-1 ), inca,work( mn+j1 ), work( j1 ), kb1 ) end do if( ml>ml0 ) then if( ml<=m-i+1 ) then ! generate plane rotation to annihilate a(i+ml-1,i) ! within the band, and apply rotation from the left call stdlib${ii}$_${ri}$lartg( ab( ku+ml-1, i ), ab( ku+ml, i ),work( mn+i+ml-1 ), & work( i+ml-1 ),ra ) ab( ku+ml-1, i ) = ra if( i<n )call stdlib${ii}$_${ri}$rot( min( ku+ml-2, n-i ),ab( ku+ml-2, i+1 ), ldab-& 1_${ik}$,ab( ku+ml-1, i+1 ), ldab-1,work( mn+i+ml-1 ), work( i+ml-1 ) ) end if nr = nr + 1_${ik}$ j1 = j1 - kb1 end if if( wantq ) then ! accumulate product of plane rotations in q do j = j1, j2, kb1 call stdlib${ii}$_${ri}$rot( m, q( 1_${ik}$, j-1 ), 1_${ik}$, q( 1_${ik}$, j ), 1_${ik}$,work( mn+j ), work( j & ) ) end do end if if( wantc ) then ! apply plane rotations to c do j = j1, j2, kb1 call stdlib${ii}$_${ri}$rot( ncc, c( j-1, 1_${ik}$ ), ldc, c( j, 1_${ik}$ ), ldc,work( mn+j ), & work( j ) ) end do end if if( j2+kun>n ) then ! adjust j2 to keep within the bounds of the matrix nr = nr - 1_${ik}$ j2 = j2 - kb1 end if do j = j1, j2, kb1 ! create nonzero element a(j-1,j+ku) above the band ! and store it in work(n+1:2*n) work( j+kun ) = work( j )*ab( 1_${ik}$, j+kun ) ab( 1_${ik}$, j+kun ) = work( mn+j )*ab( 1_${ik}$, j+kun ) end do ! generate plane rotations to annihilate nonzero elements ! which have been generated above the band if( nr>0_${ik}$ )call stdlib${ii}$_${ri}$largv( nr, ab( 1_${ik}$, j1+kun-1 ), inca,work( j1+kun ), kb1,& work( mn+j1+kun ),kb1 ) ! apply plane rotations from the right do l = 1, kb if( j2+l-1>m ) then nrt = nr - 1_${ik}$ else nrt = nr end if if( nrt>0_${ik}$ )call stdlib${ii}$_${ri}$lartv( nrt, ab( l+1, j1+kun-1 ), inca,ab( l, j1+& kun ), inca,work( mn+j1+kun ), work( j1+kun ),kb1 ) end do if( ml==ml0 .and. mu>mu0 ) then if( mu<=n-i+1 ) then ! generate plane rotation to annihilate a(i,i+mu-1) ! within the band, and apply rotation from the right call stdlib${ii}$_${ri}$lartg( ab( ku-mu+3, i+mu-2 ),ab( ku-mu+2, i+mu-1 ),work( & mn+i+mu-1 ), work( i+mu-1 ),ra ) ab( ku-mu+3, i+mu-2 ) = ra call stdlib${ii}$_${ri}$rot( min( kl+mu-2, m-i ),ab( ku-mu+4, i+mu-2 ), 1_${ik}$,ab( ku-& mu+3, i+mu-1 ), 1_${ik}$,work( mn+i+mu-1 ), work( i+mu-1 ) ) end if nr = nr + 1_${ik}$ j1 = j1 - kb1 end if if( wantpt ) then ! accumulate product of plane rotations in p**t do j = j1, j2, kb1 call stdlib${ii}$_${ri}$rot( n, pt( j+kun-1, 1_${ik}$ ), ldpt,pt( j+kun, 1_${ik}$ ), ldpt, work( & mn+j+kun ),work( j+kun ) ) end do end if if( j2+kb>m ) then ! adjust j2 to keep within the bounds of the matrix nr = nr - 1_${ik}$ j2 = j2 - kb1 end if do j = j1, j2, kb1 ! create nonzero element a(j+kl+ku,j+ku-1) below the ! band and store it in work(1:n) work( j+kb ) = work( j+kun )*ab( klu1, j+kun ) ab( klu1, j+kun ) = work( mn+j+kun )*ab( klu1, j+kun ) end do if( ml>ml0 ) then ml = ml - 1_${ik}$ else mu = mu - 1_${ik}$ end if end do loop_80 end do loop_90 end if if( ku==0_${ik}$ .and. kl>0_${ik}$ ) then ! a has been reduced to lower bidiagonal form ! transform lower bidiagonal form to upper bidiagonal by applying ! plane rotations from the left, storing diagonal elements in d ! and off-diagonal elements in e do i = 1, min( m-1, n ) call stdlib${ii}$_${ri}$lartg( ab( 1_${ik}$, i ), ab( 2_${ik}$, i ), rc, rs, ra ) d( i ) = ra if( i<n ) then e( i ) = rs*ab( 1_${ik}$, i+1 ) ab( 1_${ik}$, i+1 ) = rc*ab( 1_${ik}$, i+1 ) end if if( wantq )call stdlib${ii}$_${ri}$rot( m, q( 1_${ik}$, i ), 1_${ik}$, q( 1_${ik}$, i+1 ), 1_${ik}$, rc, rs ) if( wantc )call stdlib${ii}$_${ri}$rot( ncc, c( i, 1_${ik}$ ), ldc, c( i+1, 1_${ik}$ ), ldc, rc,rs ) end do if( m<=n )d( m ) = ab( 1_${ik}$, m ) else if( ku>0_${ik}$ ) then ! a has been reduced to upper bidiagonal form if( m<n ) then ! annihilate a(m,m+1) by applying plane rotations from the ! right, storing diagonal elements in d and off-diagonal ! elements in e rb = ab( ku, m+1 ) do i = m, 1, -1 call stdlib${ii}$_${ri}$lartg( ab( ku+1, i ), rb, rc, rs, ra ) d( i ) = ra if( i>1_${ik}$ ) then rb = -rs*ab( ku, i ) e( i-1 ) = rc*ab( ku, i ) end if if( wantpt )call stdlib${ii}$_${ri}$rot( n, pt( i, 1_${ik}$ ), ldpt, pt( m+1, 1_${ik}$ ), ldpt,rc, rs ) end do else ! copy off-diagonal elements to e and diagonal elements to d do i = 1, minmn - 1 e( i ) = ab( ku, i+1 ) end do do i = 1, minmn d( i ) = ab( ku+1, i ) end do end if else ! a is diagonal. set elements of e to zero and copy diagonal ! elements to d. do i = 1, minmn - 1 e( i ) = zero end do do i = 1, minmn d( i ) = ab( 1_${ik}$, i ) end do end if return end subroutine stdlib${ii}$_${ri}$gbbrd #:endif #:endfor pure module subroutine stdlib${ii}$_cgbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, c, & !! CGBBRD reduces a complex general m-by-n band matrix A to real upper !! bidiagonal form B by a unitary transformation: Q**H * A * P = B. !! The routine computes B, and optionally forms Q or P**H, or computes !! Q**H*C for a given matrix C. ldc, work, rwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: vect integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: kl, ku, ldab, ldc, ldpt, ldq, m, n, ncc ! Array Arguments real(sp), intent(out) :: d(*), e(*), rwork(*) complex(sp), intent(inout) :: ab(ldab,*), c(ldc,*) complex(sp), intent(out) :: pt(ldpt,*), q(ldq,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: wantb, wantc, wantpt, wantq integer(${ik}$) :: i, inca, j, j1, j2, kb, kb1, kk, klm, klu1, kun, l, minmn, ml, ml0, mu,& mu0, nr, nrt real(sp) :: abst, rc complex(sp) :: ra, rb, rs, t ! Intrinsic Functions ! Executable Statements ! test the input parameters wantb = stdlib_lsame( vect, 'B' ) wantq = stdlib_lsame( vect, 'Q' ) .or. wantb wantpt = stdlib_lsame( vect, 'P' ) .or. wantb wantc = ncc>0_${ik}$ klu1 = kl + ku + 1_${ik}$ info = 0_${ik}$ if( .not.wantq .and. .not.wantpt .and. .not.stdlib_lsame( vect, 'N' ) )then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( ncc<0_${ik}$ ) then info = -4_${ik}$ else if( kl<0_${ik}$ ) then info = -5_${ik}$ else if( ku<0_${ik}$ ) then info = -6_${ik}$ else if( ldab<klu1 ) then info = -8_${ik}$ else if( ldq<1_${ik}$ .or. wantq .and. ldq<max( 1_${ik}$, m ) ) then info = -12_${ik}$ else if( ldpt<1_${ik}$ .or. wantpt .and. ldpt<max( 1_${ik}$, n ) ) then info = -14_${ik}$ else if( ldc<1_${ik}$ .or. wantc .and. ldc<max( 1_${ik}$, m ) ) then info = -16_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CGBBRD', -info ) return end if ! initialize q and p**h to the unit matrix, if needed if( wantq )call stdlib${ii}$_claset( 'FULL', m, m, czero, cone, q, ldq ) if( wantpt )call stdlib${ii}$_claset( 'FULL', n, n, czero, cone, pt, ldpt ) ! quick return if possible. if( m==0 .or. n==0 )return minmn = min( m, n ) if( kl+ku>1_${ik}$ ) then ! reduce to upper bidiagonal form if ku > 0; if ku = 0, reduce ! first to lower bidiagonal form and then transform to upper ! bidiagonal if( ku>0_${ik}$ ) then ml0 = 1_${ik}$ mu0 = 2_${ik}$ else ml0 = 2_${ik}$ mu0 = 1_${ik}$ end if ! wherever possible, plane rotations are generated and applied in ! vector operations of length nr over the index set j1:j2:klu1. ! the complex sines of the plane rotations are stored in work, ! and the real cosines in rwork. klm = min( m-1, kl ) kun = min( n-1, ku ) kb = klm + kun kb1 = kb + 1_${ik}$ inca = kb1*ldab nr = 0_${ik}$ j1 = klm + 2_${ik}$ j2 = 1_${ik}$ - kun loop_90: do i = 1, minmn ! reduce i-th column and i-th row of matrix to bidiagonal form ml = klm + 1_${ik}$ mu = kun + 1_${ik}$ loop_80: do kk = 1, kb j1 = j1 + kb j2 = j2 + kb ! generate plane rotations to annihilate nonzero elements ! which have been created below the band if( nr>0_${ik}$ )call stdlib${ii}$_clargv( nr, ab( klu1, j1-klm-1 ), inca,work( j1 ), kb1, & rwork( j1 ), kb1 ) ! apply plane rotations from the left do l = 1, kb if( j2-klm+l-1>n ) then nrt = nr - 1_${ik}$ else nrt = nr end if if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( klu1-l, j1-klm+l-1 ), inca,ab( & klu1-l+1, j1-klm+l-1 ), inca,rwork( j1 ), work( j1 ), kb1 ) end do if( ml>ml0 ) then if( ml<=m-i+1 ) then ! generate plane rotation to annihilate a(i+ml-1,i) ! within the band, and apply rotation from the left call stdlib${ii}$_clartg( ab( ku+ml-1, i ), ab( ku+ml, i ),rwork( i+ml-1 ), & work( i+ml-1 ), ra ) ab( ku+ml-1, i ) = ra if( i<n )call stdlib${ii}$_crot( min( ku+ml-2, n-i ),ab( ku+ml-2, i+1 ), ldab-& 1_${ik}$,ab( ku+ml-1, i+1 ), ldab-1,rwork( i+ml-1 ), work( i+ml-1 ) ) end if nr = nr + 1_${ik}$ j1 = j1 - kb1 end if if( wantq ) then ! accumulate product of plane rotations in q do j = j1, j2, kb1 call stdlib${ii}$_crot( m, q( 1_${ik}$, j-1 ), 1_${ik}$, q( 1_${ik}$, j ), 1_${ik}$,rwork( j ), conjg( & work( j ) ) ) end do end if if( wantc ) then ! apply plane rotations to c do j = j1, j2, kb1 call stdlib${ii}$_crot( ncc, c( j-1, 1_${ik}$ ), ldc, c( j, 1_${ik}$ ), ldc,rwork( j ), & work( j ) ) end do end if if( j2+kun>n ) then ! adjust j2 to keep within the bounds of the matrix nr = nr - 1_${ik}$ j2 = j2 - kb1 end if do j = j1, j2, kb1 ! create nonzero element a(j-1,j+ku) above the band ! and store it in work(n+1:2*n) work( j+kun ) = work( j )*ab( 1_${ik}$, j+kun ) ab( 1_${ik}$, j+kun ) = rwork( j )*ab( 1_${ik}$, j+kun ) end do ! generate plane rotations to annihilate nonzero elements ! which have been generated above the band if( nr>0_${ik}$ )call stdlib${ii}$_clargv( nr, ab( 1_${ik}$, j1+kun-1 ), inca,work( j1+kun ), kb1,& rwork( j1+kun ),kb1 ) ! apply plane rotations from the right do l = 1, kb if( j2+l-1>m ) then nrt = nr - 1_${ik}$ else nrt = nr end if if( nrt>0_${ik}$ )call stdlib${ii}$_clartv( nrt, ab( l+1, j1+kun-1 ), inca,ab( l, j1+& kun ), inca,rwork( j1+kun ), work( j1+kun ), kb1 ) end do if( ml==ml0 .and. mu>mu0 ) then if( mu<=n-i+1 ) then ! generate plane rotation to annihilate a(i,i+mu-1) ! within the band, and apply rotation from the right call stdlib${ii}$_clartg( ab( ku-mu+3, i+mu-2 ),ab( ku-mu+2, i+mu-1 ),rwork( & i+mu-1 ), work( i+mu-1 ), ra ) ab( ku-mu+3, i+mu-2 ) = ra call stdlib${ii}$_crot( min( kl+mu-2, m-i ),ab( ku-mu+4, i+mu-2 ), 1_${ik}$,ab( ku-& mu+3, i+mu-1 ), 1_${ik}$,rwork( i+mu-1 ), work( i+mu-1 ) ) end if nr = nr + 1_${ik}$ j1 = j1 - kb1 end if if( wantpt ) then ! accumulate product of plane rotations in p**h do j = j1, j2, kb1 call stdlib${ii}$_crot( n, pt( j+kun-1, 1_${ik}$ ), ldpt,pt( j+kun, 1_${ik}$ ), ldpt, rwork(& j+kun ),conjg( work( j+kun ) ) ) end do end if if( j2+kb>m ) then ! adjust j2 to keep within the bounds of the matrix nr = nr - 1_${ik}$ j2 = j2 - kb1 end if do j = j1, j2, kb1 ! create nonzero element a(j+kl+ku,j+ku-1) below the ! band and store it in work(1:n) work( j+kb ) = work( j+kun )*ab( klu1, j+kun ) ab( klu1, j+kun ) = rwork( j+kun )*ab( klu1, j+kun ) end do if( ml>ml0 ) then ml = ml - 1_${ik}$ else mu = mu - 1_${ik}$ end if end do loop_80 end do loop_90 end if if( ku==0_${ik}$ .and. kl>0_${ik}$ ) then ! a has been reduced to complex lower bidiagonal form ! transform lower bidiagonal form to upper bidiagonal by applying ! plane rotations from the left, overwriting superdiagonal ! elements on subdiagonal elements do i = 1, min( m-1, n ) call stdlib${ii}$_clartg( ab( 1_${ik}$, i ), ab( 2_${ik}$, i ), rc, rs, ra ) ab( 1_${ik}$, i ) = ra if( i<n ) then ab( 2_${ik}$, i ) = rs*ab( 1_${ik}$, i+1 ) ab( 1_${ik}$, i+1 ) = rc*ab( 1_${ik}$, i+1 ) end if if( wantq )call stdlib${ii}$_crot( m, q( 1_${ik}$, i ), 1_${ik}$, q( 1_${ik}$, i+1 ), 1_${ik}$, rc,conjg( rs ) ) if( wantc )call stdlib${ii}$_crot( ncc, c( i, 1_${ik}$ ), ldc, c( i+1, 1_${ik}$ ), ldc, rc,rs ) end do else ! a has been reduced to complex upper bidiagonal form or is ! diagonal if( ku>0_${ik}$ .and. m<n ) then ! annihilate a(m,m+1) by applying plane rotations from the ! right rb = ab( ku, m+1 ) do i = m, 1, -1 call stdlib${ii}$_clartg( ab( ku+1, i ), rb, rc, rs, ra ) ab( ku+1, i ) = ra if( i>1_${ik}$ ) then rb = -conjg( rs )*ab( ku, i ) ab( ku, i ) = rc*ab( ku, i ) end if if( wantpt )call stdlib${ii}$_crot( n, pt( i, 1_${ik}$ ), ldpt, pt( m+1, 1_${ik}$ ), ldpt,rc, & conjg( rs ) ) end do end if end if ! make diagonal and superdiagonal elements real, storing them in d ! and e t = ab( ku+1, 1_${ik}$ ) loop_120: do i = 1, minmn abst = abs( t ) d( i ) = abst if( abst/=zero ) then t = t / abst else t = cone end if if( wantq )call stdlib${ii}$_cscal( m, t, q( 1_${ik}$, i ), 1_${ik}$ ) if( wantc )call stdlib${ii}$_cscal( ncc, conjg( t ), c( i, 1_${ik}$ ), ldc ) if( i<minmn ) then if( ku==0_${ik}$ .and. kl==0_${ik}$ ) then e( i ) = zero t = ab( 1_${ik}$, i+1 ) else if( ku==0_${ik}$ ) then t = ab( 2_${ik}$, i )*conjg( t ) else t = ab( ku, i+1 )*conjg( t ) end if abst = abs( t ) e( i ) = abst if( abst/=zero ) then t = t / abst else t = cone end if if( wantpt )call stdlib${ii}$_cscal( n, t, pt( i+1, 1_${ik}$ ), ldpt ) t = ab( ku+1, i+1 )*conjg( t ) end if end if end do loop_120 return end subroutine stdlib${ii}$_cgbbrd pure module subroutine stdlib${ii}$_zgbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, c, & !! ZGBBRD reduces a complex general m-by-n band matrix A to real upper !! bidiagonal form B by a unitary transformation: Q**H * A * P = B. !! The routine computes B, and optionally forms Q or P**H, or computes !! Q**H*C for a given matrix C. ldc, work, rwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: vect integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: kl, ku, ldab, ldc, ldpt, ldq, m, n, ncc ! Array Arguments real(dp), intent(out) :: d(*), e(*), rwork(*) complex(dp), intent(inout) :: ab(ldab,*), c(ldc,*) complex(dp), intent(out) :: pt(ldpt,*), q(ldq,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: wantb, wantc, wantpt, wantq integer(${ik}$) :: i, inca, j, j1, j2, kb, kb1, kk, klm, klu1, kun, l, minmn, ml, ml0, mu,& mu0, nr, nrt real(dp) :: abst, rc complex(dp) :: ra, rb, rs, t ! Intrinsic Functions ! Executable Statements ! test the input parameters wantb = stdlib_lsame( vect, 'B' ) wantq = stdlib_lsame( vect, 'Q' ) .or. wantb wantpt = stdlib_lsame( vect, 'P' ) .or. wantb wantc = ncc>0_${ik}$ klu1 = kl + ku + 1_${ik}$ info = 0_${ik}$ if( .not.wantq .and. .not.wantpt .and. .not.stdlib_lsame( vect, 'N' ) )then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( ncc<0_${ik}$ ) then info = -4_${ik}$ else if( kl<0_${ik}$ ) then info = -5_${ik}$ else if( ku<0_${ik}$ ) then info = -6_${ik}$ else if( ldab<klu1 ) then info = -8_${ik}$ else if( ldq<1_${ik}$ .or. wantq .and. ldq<max( 1_${ik}$, m ) ) then info = -12_${ik}$ else if( ldpt<1_${ik}$ .or. wantpt .and. ldpt<max( 1_${ik}$, n ) ) then info = -14_${ik}$ else if( ldc<1_${ik}$ .or. wantc .and. ldc<max( 1_${ik}$, m ) ) then info = -16_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGBBRD', -info ) return end if ! initialize q and p**h to the unit matrix, if needed if( wantq )call stdlib${ii}$_zlaset( 'FULL', m, m, czero, cone, q, ldq ) if( wantpt )call stdlib${ii}$_zlaset( 'FULL', n, n, czero, cone, pt, ldpt ) ! quick return if possible. if( m==0 .or. n==0 )return minmn = min( m, n ) if( kl+ku>1_${ik}$ ) then ! reduce to upper bidiagonal form if ku > 0; if ku = 0, reduce ! first to lower bidiagonal form and then transform to upper ! bidiagonal if( ku>0_${ik}$ ) then ml0 = 1_${ik}$ mu0 = 2_${ik}$ else ml0 = 2_${ik}$ mu0 = 1_${ik}$ end if ! wherever possible, plane rotations are generated and applied in ! vector operations of length nr over the index set j1:j2:klu1. ! the complex sines of the plane rotations are stored in work, ! and the real cosines in rwork. klm = min( m-1, kl ) kun = min( n-1, ku ) kb = klm + kun kb1 = kb + 1_${ik}$ inca = kb1*ldab nr = 0_${ik}$ j1 = klm + 2_${ik}$ j2 = 1_${ik}$ - kun loop_90: do i = 1, minmn ! reduce i-th column and i-th row of matrix to bidiagonal form ml = klm + 1_${ik}$ mu = kun + 1_${ik}$ loop_80: do kk = 1, kb j1 = j1 + kb j2 = j2 + kb ! generate plane rotations to annihilate nonzero elements ! which have been created below the band if( nr>0_${ik}$ )call stdlib${ii}$_zlargv( nr, ab( klu1, j1-klm-1 ), inca,work( j1 ), kb1, & rwork( j1 ), kb1 ) ! apply plane rotations from the left do l = 1, kb if( j2-klm+l-1>n ) then nrt = nr - 1_${ik}$ else nrt = nr end if if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( klu1-l, j1-klm+l-1 ), inca,ab( & klu1-l+1, j1-klm+l-1 ), inca,rwork( j1 ), work( j1 ), kb1 ) end do if( ml>ml0 ) then if( ml<=m-i+1 ) then ! generate plane rotation to annihilate a(i+ml-1,i) ! within the band, and apply rotation from the left call stdlib${ii}$_zlartg( ab( ku+ml-1, i ), ab( ku+ml, i ),rwork( i+ml-1 ), & work( i+ml-1 ), ra ) ab( ku+ml-1, i ) = ra if( i<n )call stdlib${ii}$_zrot( min( ku+ml-2, n-i ),ab( ku+ml-2, i+1 ), ldab-& 1_${ik}$,ab( ku+ml-1, i+1 ), ldab-1,rwork( i+ml-1 ), work( i+ml-1 ) ) end if nr = nr + 1_${ik}$ j1 = j1 - kb1 end if if( wantq ) then ! accumulate product of plane rotations in q do j = j1, j2, kb1 call stdlib${ii}$_zrot( m, q( 1_${ik}$, j-1 ), 1_${ik}$, q( 1_${ik}$, j ), 1_${ik}$,rwork( j ), conjg( & work( j ) ) ) end do end if if( wantc ) then ! apply plane rotations to c do j = j1, j2, kb1 call stdlib${ii}$_zrot( ncc, c( j-1, 1_${ik}$ ), ldc, c( j, 1_${ik}$ ), ldc,rwork( j ), & work( j ) ) end do end if if( j2+kun>n ) then ! adjust j2 to keep within the bounds of the matrix nr = nr - 1_${ik}$ j2 = j2 - kb1 end if do j = j1, j2, kb1 ! create nonzero element a(j-1,j+ku) above the band ! and store it in work(n+1:2*n) work( j+kun ) = work( j )*ab( 1_${ik}$, j+kun ) ab( 1_${ik}$, j+kun ) = rwork( j )*ab( 1_${ik}$, j+kun ) end do ! generate plane rotations to annihilate nonzero elements ! which have been generated above the band if( nr>0_${ik}$ )call stdlib${ii}$_zlargv( nr, ab( 1_${ik}$, j1+kun-1 ), inca,work( j1+kun ), kb1,& rwork( j1+kun ),kb1 ) ! apply plane rotations from the right do l = 1, kb if( j2+l-1>m ) then nrt = nr - 1_${ik}$ else nrt = nr end if if( nrt>0_${ik}$ )call stdlib${ii}$_zlartv( nrt, ab( l+1, j1+kun-1 ), inca,ab( l, j1+& kun ), inca,rwork( j1+kun ), work( j1+kun ), kb1 ) end do if( ml==ml0 .and. mu>mu0 ) then if( mu<=n-i+1 ) then ! generate plane rotation to annihilate a(i,i+mu-1) ! within the band, and apply rotation from the right call stdlib${ii}$_zlartg( ab( ku-mu+3, i+mu-2 ),ab( ku-mu+2, i+mu-1 ),rwork( & i+mu-1 ), work( i+mu-1 ), ra ) ab( ku-mu+3, i+mu-2 ) = ra call stdlib${ii}$_zrot( min( kl+mu-2, m-i ),ab( ku-mu+4, i+mu-2 ), 1_${ik}$,ab( ku-& mu+3, i+mu-1 ), 1_${ik}$,rwork( i+mu-1 ), work( i+mu-1 ) ) end if nr = nr + 1_${ik}$ j1 = j1 - kb1 end if if( wantpt ) then ! accumulate product of plane rotations in p**h do j = j1, j2, kb1 call stdlib${ii}$_zrot( n, pt( j+kun-1, 1_${ik}$ ), ldpt,pt( j+kun, 1_${ik}$ ), ldpt, rwork(& j+kun ),conjg( work( j+kun ) ) ) end do end if if( j2+kb>m ) then ! adjust j2 to keep within the bounds of the matrix nr = nr - 1_${ik}$ j2 = j2 - kb1 end if do j = j1, j2, kb1 ! create nonzero element a(j+kl+ku,j+ku-1) below the ! band and store it in work(1:n) work( j+kb ) = work( j+kun )*ab( klu1, j+kun ) ab( klu1, j+kun ) = rwork( j+kun )*ab( klu1, j+kun ) end do if( ml>ml0 ) then ml = ml - 1_${ik}$ else mu = mu - 1_${ik}$ end if end do loop_80 end do loop_90 end if if( ku==0_${ik}$ .and. kl>0_${ik}$ ) then ! a has been reduced to complex lower bidiagonal form ! transform lower bidiagonal form to upper bidiagonal by applying ! plane rotations from the left, overwriting superdiagonal ! elements on subdiagonal elements do i = 1, min( m-1, n ) call stdlib${ii}$_zlartg( ab( 1_${ik}$, i ), ab( 2_${ik}$, i ), rc, rs, ra ) ab( 1_${ik}$, i ) = ra if( i<n ) then ab( 2_${ik}$, i ) = rs*ab( 1_${ik}$, i+1 ) ab( 1_${ik}$, i+1 ) = rc*ab( 1_${ik}$, i+1 ) end if if( wantq )call stdlib${ii}$_zrot( m, q( 1_${ik}$, i ), 1_${ik}$, q( 1_${ik}$, i+1 ), 1_${ik}$, rc,conjg( rs ) ) if( wantc )call stdlib${ii}$_zrot( ncc, c( i, 1_${ik}$ ), ldc, c( i+1, 1_${ik}$ ), ldc, rc,rs ) end do else ! a has been reduced to complex upper bidiagonal form or is ! diagonal if( ku>0_${ik}$ .and. m<n ) then ! annihilate a(m,m+1) by applying plane rotations from the ! right rb = ab( ku, m+1 ) do i = m, 1, -1 call stdlib${ii}$_zlartg( ab( ku+1, i ), rb, rc, rs, ra ) ab( ku+1, i ) = ra if( i>1_${ik}$ ) then rb = -conjg( rs )*ab( ku, i ) ab( ku, i ) = rc*ab( ku, i ) end if if( wantpt )call stdlib${ii}$_zrot( n, pt( i, 1_${ik}$ ), ldpt, pt( m+1, 1_${ik}$ ), ldpt,rc, & conjg( rs ) ) end do end if end if ! make diagonal and superdiagonal elements real, storing them in d ! and e t = ab( ku+1, 1_${ik}$ ) loop_120: do i = 1, minmn abst = abs( t ) d( i ) = abst if( abst/=zero ) then t = t / abst else t = cone end if if( wantq )call stdlib${ii}$_zscal( m, t, q( 1_${ik}$, i ), 1_${ik}$ ) if( wantc )call stdlib${ii}$_zscal( ncc, conjg( t ), c( i, 1_${ik}$ ), ldc ) if( i<minmn ) then if( ku==0_${ik}$ .and. kl==0_${ik}$ ) then e( i ) = zero t = ab( 1_${ik}$, i+1 ) else if( ku==0_${ik}$ ) then t = ab( 2_${ik}$, i )*conjg( t ) else t = ab( ku, i+1 )*conjg( t ) end if abst = abs( t ) e( i ) = abst if( abst/=zero ) then t = t / abst else t = cone end if if( wantpt )call stdlib${ii}$_zscal( n, t, pt( i+1, 1_${ik}$ ), ldpt ) t = ab( ku+1, i+1 )*conjg( t ) end if end if end do loop_120 return end subroutine stdlib${ii}$_zgbbrd #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$gbbrd( vect, m, n, ncc, kl, ku, ab, ldab, d, e, q,ldq, pt, ldpt, c, & !! ZGBBRD: reduces a complex general m-by-n band matrix A to real upper !! bidiagonal form B by a unitary transformation: Q**H * A * P = B. !! The routine computes B, and optionally forms Q or P**H, or computes !! Q**H*C for a given matrix C. ldc, work, rwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: vect integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: kl, ku, ldab, ldc, ldpt, ldq, m, n, ncc ! Array Arguments real(${ck}$), intent(out) :: d(*), e(*), rwork(*) complex(${ck}$), intent(inout) :: ab(ldab,*), c(ldc,*) complex(${ck}$), intent(out) :: pt(ldpt,*), q(ldq,*), work(*) ! ===================================================================== ! Local Scalars logical(lk) :: wantb, wantc, wantpt, wantq integer(${ik}$) :: i, inca, j, j1, j2, kb, kb1, kk, klm, klu1, kun, l, minmn, ml, ml0, mu,& mu0, nr, nrt real(${ck}$) :: abst, rc complex(${ck}$) :: ra, rb, rs, t ! Intrinsic Functions ! Executable Statements ! test the input parameters wantb = stdlib_lsame( vect, 'B' ) wantq = stdlib_lsame( vect, 'Q' ) .or. wantb wantpt = stdlib_lsame( vect, 'P' ) .or. wantb wantc = ncc>0_${ik}$ klu1 = kl + ku + 1_${ik}$ info = 0_${ik}$ if( .not.wantq .and. .not.wantpt .and. .not.stdlib_lsame( vect, 'N' ) )then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ ) then info = -3_${ik}$ else if( ncc<0_${ik}$ ) then info = -4_${ik}$ else if( kl<0_${ik}$ ) then info = -5_${ik}$ else if( ku<0_${ik}$ ) then info = -6_${ik}$ else if( ldab<klu1 ) then info = -8_${ik}$ else if( ldq<1_${ik}$ .or. wantq .and. ldq<max( 1_${ik}$, m ) ) then info = -12_${ik}$ else if( ldpt<1_${ik}$ .or. wantpt .and. ldpt<max( 1_${ik}$, n ) ) then info = -14_${ik}$ else if( ldc<1_${ik}$ .or. wantc .and. ldc<max( 1_${ik}$, m ) ) then info = -16_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGBBRD', -info ) return end if ! initialize q and p**h to the unit matrix, if needed if( wantq )call stdlib${ii}$_${ci}$laset( 'FULL', m, m, czero, cone, q, ldq ) if( wantpt )call stdlib${ii}$_${ci}$laset( 'FULL', n, n, czero, cone, pt, ldpt ) ! quick return if possible. if( m==0 .or. n==0 )return minmn = min( m, n ) if( kl+ku>1_${ik}$ ) then ! reduce to upper bidiagonal form if ku > 0; if ku = 0, reduce ! first to lower bidiagonal form and then transform to upper ! bidiagonal if( ku>0_${ik}$ ) then ml0 = 1_${ik}$ mu0 = 2_${ik}$ else ml0 = 2_${ik}$ mu0 = 1_${ik}$ end if ! wherever possible, plane rotations are generated and applied in ! vector operations of length nr over the index set j1:j2:klu1. ! the complex sines of the plane rotations are stored in work, ! and the real cosines in rwork. klm = min( m-1, kl ) kun = min( n-1, ku ) kb = klm + kun kb1 = kb + 1_${ik}$ inca = kb1*ldab nr = 0_${ik}$ j1 = klm + 2_${ik}$ j2 = 1_${ik}$ - kun loop_90: do i = 1, minmn ! reduce i-th column and i-th row of matrix to bidiagonal form ml = klm + 1_${ik}$ mu = kun + 1_${ik}$ loop_80: do kk = 1, kb j1 = j1 + kb j2 = j2 + kb ! generate plane rotations to annihilate nonzero elements ! which have been created below the band if( nr>0_${ik}$ )call stdlib${ii}$_${ci}$largv( nr, ab( klu1, j1-klm-1 ), inca,work( j1 ), kb1, & rwork( j1 ), kb1 ) ! apply plane rotations from the left do l = 1, kb if( j2-klm+l-1>n ) then nrt = nr - 1_${ik}$ else nrt = nr end if if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( klu1-l, j1-klm+l-1 ), inca,ab( & klu1-l+1, j1-klm+l-1 ), inca,rwork( j1 ), work( j1 ), kb1 ) end do if( ml>ml0 ) then if( ml<=m-i+1 ) then ! generate plane rotation to annihilate a(i+ml-1,i) ! within the band, and apply rotation from the left call stdlib${ii}$_${ci}$lartg( ab( ku+ml-1, i ), ab( ku+ml, i ),rwork( i+ml-1 ), & work( i+ml-1 ), ra ) ab( ku+ml-1, i ) = ra if( i<n )call stdlib${ii}$_${ci}$rot( min( ku+ml-2, n-i ),ab( ku+ml-2, i+1 ), ldab-& 1_${ik}$,ab( ku+ml-1, i+1 ), ldab-1,rwork( i+ml-1 ), work( i+ml-1 ) ) end if nr = nr + 1_${ik}$ j1 = j1 - kb1 end if if( wantq ) then ! accumulate product of plane rotations in q do j = j1, j2, kb1 call stdlib${ii}$_${ci}$rot( m, q( 1_${ik}$, j-1 ), 1_${ik}$, q( 1_${ik}$, j ), 1_${ik}$,rwork( j ), conjg( & work( j ) ) ) end do end if if( wantc ) then ! apply plane rotations to c do j = j1, j2, kb1 call stdlib${ii}$_${ci}$rot( ncc, c( j-1, 1_${ik}$ ), ldc, c( j, 1_${ik}$ ), ldc,rwork( j ), & work( j ) ) end do end if if( j2+kun>n ) then ! adjust j2 to keep within the bounds of the matrix nr = nr - 1_${ik}$ j2 = j2 - kb1 end if do j = j1, j2, kb1 ! create nonzero element a(j-1,j+ku) above the band ! and store it in work(n+1:2*n) work( j+kun ) = work( j )*ab( 1_${ik}$, j+kun ) ab( 1_${ik}$, j+kun ) = rwork( j )*ab( 1_${ik}$, j+kun ) end do ! generate plane rotations to annihilate nonzero elements ! which have been generated above the band if( nr>0_${ik}$ )call stdlib${ii}$_${ci}$largv( nr, ab( 1_${ik}$, j1+kun-1 ), inca,work( j1+kun ), kb1,& rwork( j1+kun ),kb1 ) ! apply plane rotations from the right do l = 1, kb if( j2+l-1>m ) then nrt = nr - 1_${ik}$ else nrt = nr end if if( nrt>0_${ik}$ )call stdlib${ii}$_${ci}$lartv( nrt, ab( l+1, j1+kun-1 ), inca,ab( l, j1+& kun ), inca,rwork( j1+kun ), work( j1+kun ), kb1 ) end do if( ml==ml0 .and. mu>mu0 ) then if( mu<=n-i+1 ) then ! generate plane rotation to annihilate a(i,i+mu-1) ! within the band, and apply rotation from the right call stdlib${ii}$_${ci}$lartg( ab( ku-mu+3, i+mu-2 ),ab( ku-mu+2, i+mu-1 ),rwork( & i+mu-1 ), work( i+mu-1 ), ra ) ab( ku-mu+3, i+mu-2 ) = ra call stdlib${ii}$_${ci}$rot( min( kl+mu-2, m-i ),ab( ku-mu+4, i+mu-2 ), 1_${ik}$,ab( ku-& mu+3, i+mu-1 ), 1_${ik}$,rwork( i+mu-1 ), work( i+mu-1 ) ) end if nr = nr + 1_${ik}$ j1 = j1 - kb1 end if if( wantpt ) then ! accumulate product of plane rotations in p**h do j = j1, j2, kb1 call stdlib${ii}$_${ci}$rot( n, pt( j+kun-1, 1_${ik}$ ), ldpt,pt( j+kun, 1_${ik}$ ), ldpt, rwork(& j+kun ),conjg( work( j+kun ) ) ) end do end if if( j2+kb>m ) then ! adjust j2 to keep within the bounds of the matrix nr = nr - 1_${ik}$ j2 = j2 - kb1 end if do j = j1, j2, kb1 ! create nonzero element a(j+kl+ku,j+ku-1) below the ! band and store it in work(1:n) work( j+kb ) = work( j+kun )*ab( klu1, j+kun ) ab( klu1, j+kun ) = rwork( j+kun )*ab( klu1, j+kun ) end do if( ml>ml0 ) then ml = ml - 1_${ik}$ else mu = mu - 1_${ik}$ end if end do loop_80 end do loop_90 end if if( ku==0_${ik}$ .and. kl>0_${ik}$ ) then ! a has been reduced to complex lower bidiagonal form ! transform lower bidiagonal form to upper bidiagonal by applying ! plane rotations from the left, overwriting superdiagonal ! elements on subdiagonal elements do i = 1, min( m-1, n ) call stdlib${ii}$_${ci}$lartg( ab( 1_${ik}$, i ), ab( 2_${ik}$, i ), rc, rs, ra ) ab( 1_${ik}$, i ) = ra if( i<n ) then ab( 2_${ik}$, i ) = rs*ab( 1_${ik}$, i+1 ) ab( 1_${ik}$, i+1 ) = rc*ab( 1_${ik}$, i+1 ) end if if( wantq )call stdlib${ii}$_${ci}$rot( m, q( 1_${ik}$, i ), 1_${ik}$, q( 1_${ik}$, i+1 ), 1_${ik}$, rc,conjg( rs ) ) if( wantc )call stdlib${ii}$_${ci}$rot( ncc, c( i, 1_${ik}$ ), ldc, c( i+1, 1_${ik}$ ), ldc, rc,rs ) end do else ! a has been reduced to complex upper bidiagonal form or is ! diagonal if( ku>0_${ik}$ .and. m<n ) then ! annihilate a(m,m+1) by applying plane rotations from the ! right rb = ab( ku, m+1 ) do i = m, 1, -1 call stdlib${ii}$_${ci}$lartg( ab( ku+1, i ), rb, rc, rs, ra ) ab( ku+1, i ) = ra if( i>1_${ik}$ ) then rb = -conjg( rs )*ab( ku, i ) ab( ku, i ) = rc*ab( ku, i ) end if if( wantpt )call stdlib${ii}$_${ci}$rot( n, pt( i, 1_${ik}$ ), ldpt, pt( m+1, 1_${ik}$ ), ldpt,rc, & conjg( rs ) ) end do end if end if ! make diagonal and superdiagonal elements real, storing them in d ! and e t = ab( ku+1, 1_${ik}$ ) loop_120: do i = 1, minmn abst = abs( t ) d( i ) = abst if( abst/=zero ) then t = t / abst else t = cone end if if( wantq )call stdlib${ii}$_${ci}$scal( m, t, q( 1_${ik}$, i ), 1_${ik}$ ) if( wantc )call stdlib${ii}$_${ci}$scal( ncc, conjg( t ), c( i, 1_${ik}$ ), ldc ) if( i<minmn ) then if( ku==0_${ik}$ .and. kl==0_${ik}$ ) then e( i ) = zero t = ab( 1_${ik}$, i+1 ) else if( ku==0_${ik}$ ) then t = ab( 2_${ik}$, i )*conjg( t ) else t = ab( ku, i+1 )*conjg( t ) end if abst = abs( t ) e( i ) = abst if( abst/=zero ) then t = t / abst else t = cone end if if( wantpt )call stdlib${ii}$_${ci}$scal( n, t, pt( i+1, 1_${ik}$ ), ldpt ) t = ab( ku+1, i+1 )*conjg( t ) end if end if end do loop_120 return end subroutine stdlib${ii}$_${ci}$gbbrd #:endif #:endfor pure module subroutine stdlib${ii}$_sgsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, & !! SGSVJ0 is called from SGESVJ as a pre-processor and that is its main !! purpose. It applies Jacobi rotations in the same way as SGESVJ does, but !! it does not check convergence (stopping criterion). Few tuning !! parameters (marked by [TP]) are available for the implementer. nsweep, work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, nsweep real(sp), intent(in) :: eps, sfmin, tol character, intent(in) :: jobv ! Array Arguments real(sp), intent(inout) :: a(lda,*), sva(n), d(n), v(ldv,*) real(sp), intent(out) :: work(lwork) ! ===================================================================== ! Local Scalars real(sp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, & rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign integer(${ik}$) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & lkahead, mvl, nbl, notrot, p, pskipped, q, rowskip, swband logical(lk) :: applv, rotok, rsvec ! Local Arrays real(sp) :: fastr(5_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input parameters. applv = stdlib_lsame( jobv, 'A' ) rsvec = stdlib_lsame( jobv, 'V' ) if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -3_${ik}$ else if( lda<m ) then info = -5_${ik}$ else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then info = -8_${ik}$ else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) ) ) then info = -10_${ik}$ else if( tol<=eps ) then info = -13_${ik}$ else if( nsweep<0_${ik}$ ) then info = -14_${ik}$ else if( lwork<m ) then info = -16_${ik}$ else info = 0_${ik}$ end if ! #:( if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SGSVJ0', -info ) return end if if( rsvec ) then mvl = n else if( applv ) then mvl = mv end if rsvec = rsvec .or. applv rooteps = sqrt( eps ) rootsfmin = sqrt( sfmin ) small = sfmin / eps big = one / sfmin rootbig = one / rootsfmin bigtheta = one / rooteps roottol = sqrt( tol ) ! .. row-cyclic jacobi svd algorithm with column pivoting .. emptsw = ( n*( n-1 ) ) / 2_${ik}$ notrot = 0_${ik}$ fastr( 1_${ik}$ ) = zero ! .. row-cyclic pivot strategy with de rijk's pivoting .. swband = 0_${ik}$ ! [tp] swband is a tuning parameter. it is meaningful and effective ! if stdlib${ii}$_sgesvj is used as a computational routine in the preconditioned ! jacobi svd algorithm stdlib${ii}$_sgesvj. for sweeps i=1:swband the procedure ! ...... kbl = min( 8_${ik}$, n ) ! [tp] kbl is a tuning parameter that defines the tile size in the ! tiling of the p-q loops of pivot pairs. in general, an optimal ! value of kbl depends on the matrix dimensions and on the ! parameters of the computer's memory. nbl = n / kbl if( ( nbl*kbl )/=n )nbl = nbl + 1_${ik}$ blskip = ( kbl**2_${ik}$ ) + 1_${ik}$ ! [tp] blkskip is a tuning parameter that depends on swband and kbl. rowskip = min( 5_${ik}$, kbl ) ! [tp] rowskip is a tuning parameter. lkahead = 1_${ik}$ ! [tp] lkahead is a tuning parameter. swband = 0_${ik}$ pskipped = 0_${ik}$ loop_1993: do i = 1, nsweep ! .. go go go ... mxaapq = zero mxsinj = zero iswrot = 0_${ik}$ notrot = 0_${ik}$ pskipped = 0_${ik}$ loop_2000: do ibr = 1, nbl igl = ( ibr-1 )*kbl + 1_${ik}$ loop_1002: do ir1 = 0, min( lkahead, nbl-ibr ) igl = igl + ir1*kbl loop_2001: do p = igl, min( igl+kbl-1, n-1 ) ! .. de rijk's pivoting q = stdlib${ii}$_isamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then call stdlib${ii}$_sswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_sswap( mvl, v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 temp1 = d( p ) d( p ) = d( q ) d( q ) = temp1 end if if( ir1==0_${ik}$ ) then ! column norms are periodically updated by explicit ! norm computation. ! caveat: ! some blas implementations compute stdlib${ii}$_snrm2(m,a(1,p),1) ! as sqrt(stdlib${ii}$_sdot(m,a(1,p),1,a(1,p),1)), which may result in ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). ! hence, stdlib${ii}$_snrm2 cannot be trusted, not even in the case when ! the true norm is far from the under(over)flow boundaries. ! if properly implemented stdlib${ii}$_snrm2 is available, the if-then-else ! below should read "aapp = stdlib${ii}$_snrm2( m, a(1,p), 1 ) * d(p)". if( ( sva( p )<rootbig ) .and.( sva( p )>rootsfmin ) ) then sva( p ) = stdlib${ii}$_snrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p ) else temp1 = zero aapp = one call stdlib${ii}$_slassq( m, a( 1_${ik}$, p ), 1_${ik}$, temp1, aapp ) sva( p ) = temp1*sqrt( aapp )*d( p ) end if aapp = sva( p ) else aapp = sva( p ) end if if( aapp>zero ) then pskipped = 0_${ik}$ loop_2002: do q = p + 1, min( igl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp if( aaqq>=one ) then rotok = ( small*aapp )<=aaqq if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_sdot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )& *d( q ) / aaqq )/ aapp else call stdlib${ii}$_scopy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, d( p ),m, 1_${ik}$, work, lda,& ierr ) aapq = stdlib${ii}$_sdot( m, work, 1_${ik}$, a( 1_${ik}$, q ),1_${ik}$ )*d( q ) / & aaqq end if else rotok = aapp<=( aaqq / small ) if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_sdot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )& *d( q ) / aaqq )/ aapp else call stdlib${ii}$_scopy( m, a( 1_${ik}$, q ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, d( q ),m, 1_${ik}$, work, lda,& ierr ) aapq = stdlib${ii}$_sdot( m, work, 1_${ik}$, a( 1_${ik}$, p ),1_${ik}$ )*d( p ) / & aapp end if end if mxaapq = max( mxaapq, abs( aapq ) ) ! to rotate or not to rotate, that is the question ... if( abs( aapq )>tol ) then ! Rotate ! rotated = rotated + one if( ir1==0_${ik}$ ) then notrot = 0_${ik}$ pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ end if if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq ) / aapq if( abs( theta )>bigtheta ) then t = half / theta fastr( 3_${ik}$ ) = t*d( p ) / d( q ) fastr( 4_${ik}$ ) = -t*d( q ) / d( p ) call stdlib${ii}$_srotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr ) if( rsvec )call stdlib${ii}$_srotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),& 1_${ik}$,fastr ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq ) t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) apoaq = d( p ) / d( q ) aqoap = d( q ) / d( p ) if( d( p )>=one ) then if( d( q )>=one ) then fastr( 3_${ik}$ ) = t*apoaq fastr( 4_${ik}$ ) = -t*aqoap d( p ) = d( p )*cs d( q ) = d( q )*cs call stdlib${ii}$_srotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,& fastr ) if( rsvec )call stdlib${ii}$_srotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( & 1_${ik}$, q ),1_${ik}$, fastr ) else call stdlib${ii}$_saxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, & p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( & 1_${ik}$, q ), 1_${ik}$ ) d( p ) = d( p )*cs d( q ) = d( q ) / cs if( rsvec ) then call stdlib${ii}$_saxpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(& 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,& v( 1_${ik}$, q ), 1_${ik}$ ) end if end if else if( d( q )>=one ) then call stdlib${ii}$_saxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q & ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) d( p ) = d( p ) / cs d( q ) = d( q )*cs if( rsvec ) then call stdlib${ii}$_saxpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( & 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), & 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if else if( d( p )>=d( q ) ) then call stdlib${ii}$_saxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,& a( 1_${ik}$, q ), 1_${ik}$ ) d( p ) = d( p )*cs d( q ) = d( q ) / cs if( rsvec ) then call stdlib${ii}$_saxpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,& v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),& 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) end if else call stdlib${ii}$_saxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,& q ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,& a( 1_${ik}$, p ), 1_${ik}$ ) d( p ) = d( p ) / cs d( q ) = d( q )*cs if( rsvec ) then call stdlib${ii}$_saxpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, & v( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )& , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if end if end if end if end if else ! .. have to use modified gram-schmidt like transformation call stdlib${ii}$_scopy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one, m,1_${ik}$, work, lda, & ierr ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) temp1 = -aapq*d( p ) / d( q ) call stdlib${ii}$_saxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! recompute sva(q), sva(p). if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_snrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*d( q ) else t = zero aaqq = one call stdlib${ii}$_slassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq )*d( q ) end if end if if( ( aapp / aapp0 )<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_snrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p ) else t = zero aapp = one call stdlib${ii}$_slassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp )*d( p ) end if sva( p ) = aapp end if else ! a(:,p) and a(:,q) already numerically orthogonal if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ end if else ! a(:,q) is zero column if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then if( ir1==0_${ik}$ )aapp = -aapp notrot = 0_${ik}$ go to 2103 end if end do loop_2002 ! end q-loop 2103 continue ! bailed out of q-loop sva( p ) = aapp else sva( p ) = aapp if( ( ir1==0_${ik}$ ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & n ) - p end if end do loop_2001 ! end of the p-loop ! end of doing the block ( ibr, ibr ) end do loop_1002 ! end of ir1-loop ! ........................................................ ! ... go to the off diagonal blocks igl = ( ibr-1 )*kbl + 1_${ik}$ loop_2010: do jbc = ibr + 1, nbl jgl = ( jbc-1 )*kbl + 1_${ik}$ ! doing the block at ( ibr, jbc ) ijblsk = 0_${ik}$ loop_2100: do p = igl, min( igl+kbl-1, n ) aapp = sva( p ) if( aapp>zero ) then pskipped = 0_${ik}$ loop_2200: do q = jgl, min( jgl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp ! M X 2 Jacobi Svd ! Safe Gram Matrix Computation if( aaqq>=one ) then if( aapp>=aaqq ) then rotok = ( small*aapp )<=aaqq else rotok = ( small*aaqq )<=aapp end if if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_sdot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )& *d( q ) / aaqq )/ aapp else call stdlib${ii}$_scopy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, d( p ),m, 1_${ik}$, work, lda,& ierr ) aapq = stdlib${ii}$_sdot( m, work, 1_${ik}$, a( 1_${ik}$, q ),1_${ik}$ )*d( q ) / & aaqq end if else if( aapp>=aaqq ) then rotok = aapp<=( aaqq / small ) else rotok = aaqq<=( aapp / small ) end if if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_sdot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )& *d( q ) / aaqq )/ aapp else call stdlib${ii}$_scopy( m, a( 1_${ik}$, q ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, d( q ),m, 1_${ik}$, work, lda,& ierr ) aapq = stdlib${ii}$_sdot( m, work, 1_${ik}$, a( 1_${ik}$, p ),1_${ik}$ )*d( p ) / & aapp end if end if mxaapq = max( mxaapq, abs( aapq ) ) ! to rotate or not to rotate, that is the question ... if( abs( aapq )>tol ) then notrot = 0_${ik}$ ! rotated = rotated + 1 pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq ) / aapq if( aaqq>aapp0 )theta = -theta if( abs( theta )>bigtheta ) then t = half / theta fastr( 3_${ik}$ ) = t*d( p ) / d( q ) fastr( 4_${ik}$ ) = -t*d( q ) / d( p ) call stdlib${ii}$_srotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr ) if( rsvec )call stdlib${ii}$_srotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),& 1_${ik}$,fastr ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq ) if( aaqq>aapp0 )thsign = -thsign t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) apoaq = d( p ) / d( q ) aqoap = d( q ) / d( p ) if( d( p )>=one ) then if( d( q )>=one ) then fastr( 3_${ik}$ ) = t*apoaq fastr( 4_${ik}$ ) = -t*aqoap d( p ) = d( p )*cs d( q ) = d( q )*cs call stdlib${ii}$_srotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,& fastr ) if( rsvec )call stdlib${ii}$_srotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( & 1_${ik}$, q ),1_${ik}$, fastr ) else call stdlib${ii}$_saxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, & p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( & 1_${ik}$, q ), 1_${ik}$ ) if( rsvec ) then call stdlib${ii}$_saxpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(& 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,& v( 1_${ik}$, q ), 1_${ik}$ ) end if d( p ) = d( p )*cs d( q ) = d( q ) / cs end if else if( d( q )>=one ) then call stdlib${ii}$_saxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q & ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) if( rsvec ) then call stdlib${ii}$_saxpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( & 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), & 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if d( p ) = d( p ) / cs d( q ) = d( q )*cs else if( d( p )>=d( q ) ) then call stdlib${ii}$_saxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,& a( 1_${ik}$, q ), 1_${ik}$ ) d( p ) = d( p )*cs d( q ) = d( q ) / cs if( rsvec ) then call stdlib${ii}$_saxpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,& v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),& 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) end if else call stdlib${ii}$_saxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,& q ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,& a( 1_${ik}$, p ), 1_${ik}$ ) d( p ) = d( p ) / cs d( q ) = d( q )*cs if( rsvec ) then call stdlib${ii}$_saxpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, & v( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )& , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if end if end if end if end if else if( aapp>aaqq ) then call stdlib${ii}$_scopy( m, a( 1_${ik}$, p ), 1_${ik}$, work,1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work, lda,& ierr ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) temp1 = -aapq*d( p ) / d( q ) call stdlib${ii}$_saxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) else call stdlib${ii}$_scopy( m, a( 1_${ik}$, q ), 1_${ik}$, work,1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work, lda,& ierr ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) temp1 = -aapq*d( q ) / d( p ) call stdlib${ii}$_saxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) end if end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q) ! .. recompute sva(q) if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_snrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*d( q ) else t = zero aaqq = one call stdlib${ii}$_slassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq )*d( q ) end if end if if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_snrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p ) else t = zero aapp = one call stdlib${ii}$_slassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp )*d( p ) end if sva( p ) = aapp end if ! end of ok rotation else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then sva( p ) = aapp notrot = 0_${ik}$ go to 2011 end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then aapp = -aapp notrot = 0_${ik}$ go to 2203 end if end do loop_2200 ! end of the q-loop 2203 continue sva( p ) = aapp else if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$ if( aapp<zero )notrot = 0_${ik}$ end if end do loop_2100 ! end of the p-loop end do loop_2010 ! end of the jbc-loop 2011 continue ! 2011 bailed out of the jbc-loop do p = igl, min( igl+kbl-1, n ) sva( p ) = abs( sva( p ) ) end do end do loop_2000 ! 2000 :: end of the ibr-loop ! .. update sva(n) if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then sva( n ) = stdlib${ii}$_snrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )*d( n ) else t = zero aapp = one call stdlib${ii}$_slassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp ) sva( n ) = t*sqrt( aapp )*d( n ) end if ! additional steering devices if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i if( ( i>swband+1 ) .and. ( mxaapq<real( n,KIND=sp)*tol ) .and.( real( n,KIND=sp)& *mxaapq*mxsinj<tol ) ) then go to 1994 end if if( notrot>=emptsw )go to 1994 end do loop_1993 ! end i=1:nsweep loop ! #:) reaching this point means that the procedure has completed the given ! number of iterations. info = nsweep - 1_${ik}$ go to 1995 1994 continue ! #:) reaching this point means that during the i-th sweep all pivots were ! below the given tolerance, causing early exit. info = 0_${ik}$ ! #:) info = 0 confirms successful iterations. 1995 continue ! sort the vector d. do p = 1, n - 1 q = stdlib${ii}$_isamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 temp1 = d( p ) d( p ) = d( q ) d( q ) = temp1 call stdlib${ii}$_sswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_sswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ ) end if end do return end subroutine stdlib${ii}$_sgsvj0 pure module subroutine stdlib${ii}$_dgsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, & !! DGSVJ0 is called from DGESVJ as a pre-processor and that is its main !! purpose. It applies Jacobi rotations in the same way as DGESVJ does, but !! it does not check convergence (stopping criterion). Few tuning !! parameters (marked by [TP]) are available for the implementer. nsweep, work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, nsweep real(dp), intent(in) :: eps, sfmin, tol character, intent(in) :: jobv ! Array Arguments real(dp), intent(inout) :: a(lda,*), sva(n), d(n), v(ldv,*) real(dp), intent(out) :: work(lwork) ! ===================================================================== ! Local Scalars real(dp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, & rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign integer(${ik}$) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & lkahead, mvl, nbl, notrot, p, pskipped, q, rowskip, swband logical(lk) :: applv, rotok, rsvec ! Local Arrays real(dp) :: fastr(5_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input parameters. applv = stdlib_lsame( jobv, 'A' ) rsvec = stdlib_lsame( jobv, 'V' ) if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -3_${ik}$ else if( lda<m ) then info = -5_${ik}$ else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then info = -8_${ik}$ else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) ) ) then info = -10_${ik}$ else if( tol<=eps ) then info = -13_${ik}$ else if( nsweep<0_${ik}$ ) then info = -14_${ik}$ else if( lwork<m ) then info = -16_${ik}$ else info = 0_${ik}$ end if ! #:( if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGSVJ0', -info ) return end if if( rsvec ) then mvl = n else if( applv ) then mvl = mv end if rsvec = rsvec .or. applv rooteps = sqrt( eps ) rootsfmin = sqrt( sfmin ) small = sfmin / eps big = one / sfmin rootbig = one / rootsfmin bigtheta = one / rooteps roottol = sqrt( tol ) ! -#- row-cyclic jacobi svd algorithm with column pivoting -#- emptsw = ( n*( n-1 ) ) / 2_${ik}$ notrot = 0_${ik}$ fastr( 1_${ik}$ ) = zero ! -#- row-cyclic pivot strategy with de rijk's pivoting -#- swband = 0_${ik}$ ! [tp] swband is a tuning parameter. it is meaningful and effective ! if stdlib${ii}$_sgesvj is used as a computational routine in the preconditioned ! jacobi svd algorithm stdlib${ii}$_sgesvj. for sweeps i=1:swband the procedure ! ...... kbl = min( 8_${ik}$, n ) ! [tp] kbl is a tuning parameter that defines the tile size in the ! tiling of the p-q loops of pivot pairs. in general, an optimal ! value of kbl depends on the matrix dimensions and on the ! parameters of the computer's memory. nbl = n / kbl if( ( nbl*kbl )/=n )nbl = nbl + 1_${ik}$ blskip = ( kbl**2_${ik}$ ) + 1_${ik}$ ! [tp] blkskip is a tuning parameter that depends on swband and kbl. rowskip = min( 5_${ik}$, kbl ) ! [tp] rowskip is a tuning parameter. lkahead = 1_${ik}$ ! [tp] lkahead is a tuning parameter. swband = 0_${ik}$ pskipped = 0_${ik}$ loop_1993: do i = 1, nsweep ! .. go go go ... mxaapq = zero mxsinj = zero iswrot = 0_${ik}$ notrot = 0_${ik}$ pskipped = 0_${ik}$ loop_2000: do ibr = 1, nbl igl = ( ibr-1 )*kbl + 1_${ik}$ loop_1002: do ir1 = 0, min( lkahead, nbl-ibr ) igl = igl + ir1*kbl loop_2001: do p = igl, min( igl+kbl-1, n-1 ) ! .. de rijk's pivoting q = stdlib${ii}$_idamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then call stdlib${ii}$_dswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_dswap( mvl, v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 temp1 = d( p ) d( p ) = d( q ) d( q ) = temp1 end if if( ir1==0_${ik}$ ) then ! column norms are periodically updated by explicit ! norm computation. ! caveat: ! some blas implementations compute stdlib${ii}$_dnrm2(m,a(1,p),1) ! as sqrt(stdlib${ii}$_ddot(m,a(1,p),1,a(1,p),1)), which may result in ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). ! hence, stdlib${ii}$_dnrm2 cannot be trusted, not even in the case when ! the true norm is far from the under(over)flow boundaries. ! if properly implemented stdlib${ii}$_dnrm2 is available, the if-then-else ! below should read "aapp = stdlib${ii}$_dnrm2( m, a(1,p), 1 ) * d(p)". if( ( sva( p )<rootbig ) .and.( sva( p )>rootsfmin ) ) then sva( p ) = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p ) else temp1 = zero aapp = one call stdlib${ii}$_dlassq( m, a( 1_${ik}$, p ), 1_${ik}$, temp1, aapp ) sva( p ) = temp1*sqrt( aapp )*d( p ) end if aapp = sva( p ) else aapp = sva( p ) end if if( aapp>zero ) then pskipped = 0_${ik}$ loop_2002: do q = p + 1, min( igl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp if( aaqq>=one ) then rotok = ( small*aapp )<=aaqq if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_ddot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )& *d( q ) / aaqq )/ aapp else call stdlib${ii}$_dcopy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, d( p ),m, 1_${ik}$, work, lda,& ierr ) aapq = stdlib${ii}$_ddot( m, work, 1_${ik}$, a( 1_${ik}$, q ),1_${ik}$ )*d( q ) / & aaqq end if else rotok = aapp<=( aaqq / small ) if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_ddot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )& *d( q ) / aaqq )/ aapp else call stdlib${ii}$_dcopy( m, a( 1_${ik}$, q ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, d( q ),m, 1_${ik}$, work, lda,& ierr ) aapq = stdlib${ii}$_ddot( m, work, 1_${ik}$, a( 1_${ik}$, p ),1_${ik}$ )*d( p ) / & aapp end if end if mxaapq = max( mxaapq, abs( aapq ) ) ! to rotate or not to rotate, that is the question ... if( abs( aapq )>tol ) then ! Rotate ! rotated = rotated + one if( ir1==0_${ik}$ ) then notrot = 0_${ik}$ pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ end if if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/aapq if( abs( theta )>bigtheta ) then t = half / theta fastr( 3_${ik}$ ) = t*d( p ) / d( q ) fastr( 4_${ik}$ ) = -t*d( q ) / d( p ) call stdlib${ii}$_drotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr ) if( rsvec )call stdlib${ii}$_drotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),& 1_${ik}$,fastr ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq ) t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) apoaq = d( p ) / d( q ) aqoap = d( q ) / d( p ) if( d( p )>=one ) then if( d( q )>=one ) then fastr( 3_${ik}$ ) = t*apoaq fastr( 4_${ik}$ ) = -t*aqoap d( p ) = d( p )*cs d( q ) = d( q )*cs call stdlib${ii}$_drotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,& fastr ) if( rsvec )call stdlib${ii}$_drotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( & 1_${ik}$, q ),1_${ik}$, fastr ) else call stdlib${ii}$_daxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, & p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( & 1_${ik}$, q ), 1_${ik}$ ) d( p ) = d( p )*cs d( q ) = d( q ) / cs if( rsvec ) then call stdlib${ii}$_daxpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(& 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,& v( 1_${ik}$, q ), 1_${ik}$ ) end if end if else if( d( q )>=one ) then call stdlib${ii}$_daxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q & ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) d( p ) = d( p ) / cs d( q ) = d( q )*cs if( rsvec ) then call stdlib${ii}$_daxpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( & 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), & 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if else if( d( p )>=d( q ) ) then call stdlib${ii}$_daxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,& a( 1_${ik}$, q ), 1_${ik}$ ) d( p ) = d( p )*cs d( q ) = d( q ) / cs if( rsvec ) then call stdlib${ii}$_daxpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,& v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),& 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) end if else call stdlib${ii}$_daxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,& q ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,& a( 1_${ik}$, p ), 1_${ik}$ ) d( p ) = d( p ) / cs d( q ) = d( q )*cs if( rsvec ) then call stdlib${ii}$_daxpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, & v( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )& , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if end if end if end if end if else ! .. have to use modified gram-schmidt like transformation call stdlib${ii}$_dcopy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one, m,1_${ik}$, work, lda, & ierr ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) temp1 = -aapq*d( p ) / d( q ) call stdlib${ii}$_daxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! recompute sva(q), sva(p). if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*d( q ) else t = zero aaqq = one call stdlib${ii}$_dlassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq )*d( q ) end if end if if( ( aapp / aapp0 )<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p ) else t = zero aapp = one call stdlib${ii}$_dlassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp )*d( p ) end if sva( p ) = aapp end if else ! a(:,p) and a(:,q) already numerically orthogonal if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ end if else ! a(:,q) is zero column if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then if( ir1==0_${ik}$ )aapp = -aapp notrot = 0_${ik}$ go to 2103 end if end do loop_2002 ! end q-loop 2103 continue ! bailed out of q-loop sva( p ) = aapp else sva( p ) = aapp if( ( ir1==0_${ik}$ ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & n ) - p end if end do loop_2001 ! end of the p-loop ! end of doing the block ( ibr, ibr ) end do loop_1002 ! end of ir1-loop ! ........................................................ ! ... go to the off diagonal blocks igl = ( ibr-1 )*kbl + 1_${ik}$ loop_2010: do jbc = ibr + 1, nbl jgl = ( jbc-1 )*kbl + 1_${ik}$ ! doing the block at ( ibr, jbc ) ijblsk = 0_${ik}$ loop_2100: do p = igl, min( igl+kbl-1, n ) aapp = sva( p ) if( aapp>zero ) then pskipped = 0_${ik}$ loop_2200: do q = jgl, min( jgl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp ! -#- m x 2 jacobi svd -#- ! -#- safe gram matrix computation -#- if( aaqq>=one ) then if( aapp>=aaqq ) then rotok = ( small*aapp )<=aaqq else rotok = ( small*aaqq )<=aapp end if if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_ddot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )& *d( q ) / aaqq )/ aapp else call stdlib${ii}$_dcopy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, d( p ),m, 1_${ik}$, work, lda,& ierr ) aapq = stdlib${ii}$_ddot( m, work, 1_${ik}$, a( 1_${ik}$, q ),1_${ik}$ )*d( q ) / & aaqq end if else if( aapp>=aaqq ) then rotok = aapp<=( aaqq / small ) else rotok = aaqq<=( aapp / small ) end if if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_ddot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )& *d( q ) / aaqq )/ aapp else call stdlib${ii}$_dcopy( m, a( 1_${ik}$, q ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, d( q ),m, 1_${ik}$, work, lda,& ierr ) aapq = stdlib${ii}$_ddot( m, work, 1_${ik}$, a( 1_${ik}$, p ),1_${ik}$ )*d( p ) / & aapp end if end if mxaapq = max( mxaapq, abs( aapq ) ) ! to rotate or not to rotate, that is the question ... if( abs( aapq )>tol ) then notrot = 0_${ik}$ ! rotated = rotated + 1 pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/aapq if( aaqq>aapp0 )theta = -theta if( abs( theta )>bigtheta ) then t = half / theta fastr( 3_${ik}$ ) = t*d( p ) / d( q ) fastr( 4_${ik}$ ) = -t*d( q ) / d( p ) call stdlib${ii}$_drotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr ) if( rsvec )call stdlib${ii}$_drotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),& 1_${ik}$,fastr ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq ) if( aaqq>aapp0 )thsign = -thsign t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) apoaq = d( p ) / d( q ) aqoap = d( q ) / d( p ) if( d( p )>=one ) then if( d( q )>=one ) then fastr( 3_${ik}$ ) = t*apoaq fastr( 4_${ik}$ ) = -t*aqoap d( p ) = d( p )*cs d( q ) = d( q )*cs call stdlib${ii}$_drotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,& fastr ) if( rsvec )call stdlib${ii}$_drotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( & 1_${ik}$, q ),1_${ik}$, fastr ) else call stdlib${ii}$_daxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, & p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( & 1_${ik}$, q ), 1_${ik}$ ) if( rsvec ) then call stdlib${ii}$_daxpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(& 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,& v( 1_${ik}$, q ), 1_${ik}$ ) end if d( p ) = d( p )*cs d( q ) = d( q ) / cs end if else if( d( q )>=one ) then call stdlib${ii}$_daxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q & ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) if( rsvec ) then call stdlib${ii}$_daxpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( & 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), & 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if d( p ) = d( p ) / cs d( q ) = d( q )*cs else if( d( p )>=d( q ) ) then call stdlib${ii}$_daxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,& a( 1_${ik}$, q ), 1_${ik}$ ) d( p ) = d( p )*cs d( q ) = d( q ) / cs if( rsvec ) then call stdlib${ii}$_daxpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,& v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),& 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) end if else call stdlib${ii}$_daxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,& q ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,& a( 1_${ik}$, p ), 1_${ik}$ ) d( p ) = d( p ) / cs d( q ) = d( q )*cs if( rsvec ) then call stdlib${ii}$_daxpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, & v( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )& , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if end if end if end if end if else if( aapp>aaqq ) then call stdlib${ii}$_dcopy( m, a( 1_${ik}$, p ), 1_${ik}$, work,1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work, lda,& ierr ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) temp1 = -aapq*d( p ) / d( q ) call stdlib${ii}$_daxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) else call stdlib${ii}$_dcopy( m, a( 1_${ik}$, q ), 1_${ik}$, work,1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work, lda,& ierr ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) temp1 = -aapq*d( q ) / d( p ) call stdlib${ii}$_daxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) end if end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q) ! .. recompute sva(q) if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*d( q ) else t = zero aaqq = one call stdlib${ii}$_dlassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq )*d( q ) end if end if if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p ) else t = zero aapp = one call stdlib${ii}$_dlassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp )*d( p ) end if sva( p ) = aapp end if ! end of ok rotation else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then sva( p ) = aapp notrot = 0_${ik}$ go to 2011 end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then aapp = -aapp notrot = 0_${ik}$ go to 2203 end if end do loop_2200 ! end of the q-loop 2203 continue sva( p ) = aapp else if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$ if( aapp<zero )notrot = 0_${ik}$ end if end do loop_2100 ! end of the p-loop end do loop_2010 ! end of the jbc-loop 2011 continue ! 2011 bailed out of the jbc-loop do p = igl, min( igl+kbl-1, n ) sva( p ) = abs( sva( p ) ) end do end do loop_2000 ! 2000 :: end of the ibr-loop ! .. update sva(n) if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then sva( n ) = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )*d( n ) else t = zero aapp = one call stdlib${ii}$_dlassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp ) sva( n ) = t*sqrt( aapp )*d( n ) end if ! additional steering devices if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i if( ( i>swband+1 ) .and. ( mxaapq<real( n,KIND=dp)*tol ) .and.( real( n,KIND=dp)& *mxaapq*mxsinj<tol ) ) then go to 1994 end if if( notrot>=emptsw )go to 1994 end do loop_1993 ! end i=1:nsweep loop ! #:) reaching this point means that the procedure has completed the given ! number of iterations. info = nsweep - 1_${ik}$ go to 1995 1994 continue ! #:) reaching this point means that during the i-th sweep all pivots were ! below the given tolerance, causing early exit. info = 0_${ik}$ ! #:) info = 0 confirms successful iterations. 1995 continue ! sort the vector d. do p = 1, n - 1 q = stdlib${ii}$_idamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 temp1 = d( p ) d( p ) = d( q ) d( q ) = temp1 call stdlib${ii}$_dswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_dswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ ) end if end do return end subroutine stdlib${ii}$_dgsvj0 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$gsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, & !! DGSVJ0: is called from DGESVJ as a pre-processor and that is its main !! purpose. It applies Jacobi rotations in the same way as DGESVJ does, but !! it does not check convergence (stopping criterion). Few tuning !! parameters (marked by [TP]) are available for the implementer. nsweep, work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, nsweep real(${rk}$), intent(in) :: eps, sfmin, tol character, intent(in) :: jobv ! Array Arguments real(${rk}$), intent(inout) :: a(lda,*), sva(n), d(n), v(ldv,*) real(${rk}$), intent(out) :: work(lwork) ! ===================================================================== ! Local Scalars real(${rk}$) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, & rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign integer(${ik}$) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & lkahead, mvl, nbl, notrot, p, pskipped, q, rowskip, swband logical(lk) :: applv, rotok, rsvec ! Local Arrays real(${rk}$) :: fastr(5_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input parameters. applv = stdlib_lsame( jobv, 'A' ) rsvec = stdlib_lsame( jobv, 'V' ) if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -3_${ik}$ else if( lda<m ) then info = -5_${ik}$ else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then info = -8_${ik}$ else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) ) ) then info = -10_${ik}$ else if( tol<=eps ) then info = -13_${ik}$ else if( nsweep<0_${ik}$ ) then info = -14_${ik}$ else if( lwork<m ) then info = -16_${ik}$ else info = 0_${ik}$ end if ! #:( if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGSVJ0', -info ) return end if if( rsvec ) then mvl = n else if( applv ) then mvl = mv end if rsvec = rsvec .or. applv rooteps = sqrt( eps ) rootsfmin = sqrt( sfmin ) small = sfmin / eps big = one / sfmin rootbig = one / rootsfmin bigtheta = one / rooteps roottol = sqrt( tol ) ! -#- row-cyclic jacobi svd algorithm with column pivoting -#- emptsw = ( n*( n-1 ) ) / 2_${ik}$ notrot = 0_${ik}$ fastr( 1_${ik}$ ) = zero ! -#- row-cyclic pivot strategy with de rijk's pivoting -#- swband = 0_${ik}$ ! [tp] swband is a tuning parameter. it is meaningful and effective ! if stdlib${ii}$_dgesvj is used as a computational routine in the preconditioned ! jacobi svd algorithm stdlib${ii}$_dgesvj. for sweeps i=1:swband the procedure ! ...... kbl = min( 8_${ik}$, n ) ! [tp] kbl is a tuning parameter that defines the tile size in the ! tiling of the p-q loops of pivot pairs. in general, an optimal ! value of kbl depends on the matrix dimensions and on the ! parameters of the computer's memory. nbl = n / kbl if( ( nbl*kbl )/=n )nbl = nbl + 1_${ik}$ blskip = ( kbl**2_${ik}$ ) + 1_${ik}$ ! [tp] blkskip is a tuning parameter that depends on swband and kbl. rowskip = min( 5_${ik}$, kbl ) ! [tp] rowskip is a tuning parameter. lkahead = 1_${ik}$ ! [tp] lkahead is a tuning parameter. swband = 0_${ik}$ pskipped = 0_${ik}$ loop_1993: do i = 1, nsweep ! .. go go go ... mxaapq = zero mxsinj = zero iswrot = 0_${ik}$ notrot = 0_${ik}$ pskipped = 0_${ik}$ loop_2000: do ibr = 1, nbl igl = ( ibr-1 )*kbl + 1_${ik}$ loop_1002: do ir1 = 0, min( lkahead, nbl-ibr ) igl = igl + ir1*kbl loop_2001: do p = igl, min( igl+kbl-1, n-1 ) ! .. de rijk's pivoting q = stdlib${ii}$_i${ri}$amax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then call stdlib${ii}$_${ri}$swap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_${ri}$swap( mvl, v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 temp1 = d( p ) d( p ) = d( q ) d( q ) = temp1 end if if( ir1==0_${ik}$ ) then ! column norms are periodically updated by explicit ! norm computation. ! caveat: ! some blas implementations compute stdlib${ii}$_${ri}$nrm2(m,a(1,p),1) ! as sqrt(stdlib${ii}$_${ri}$dot(m,a(1,p),1,a(1,p),1)), which may result in ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). ! hence, stdlib${ii}$_${ri}$nrm2 cannot be trusted, not even in the case when ! the true norm is far from the under(over)flow boundaries. ! if properly implemented stdlib${ii}$_${ri}$nrm2 is available, the if-then-else ! below should read "aapp = stdlib${ii}$_${ri}$nrm2( m, a(1,p), 1 ) * d(p)". if( ( sva( p )<rootbig ) .and.( sva( p )>rootsfmin ) ) then sva( p ) = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p ) else temp1 = zero aapp = one call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, temp1, aapp ) sva( p ) = temp1*sqrt( aapp )*d( p ) end if aapp = sva( p ) else aapp = sva( p ) end if if( aapp>zero ) then pskipped = 0_${ik}$ loop_2002: do q = p + 1, min( igl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp if( aaqq>=one ) then rotok = ( small*aapp )<=aaqq if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_${ri}$dot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )& *d( q ) / aaqq )/ aapp else call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, d( p ),m, 1_${ik}$, work, lda,& ierr ) aapq = stdlib${ii}$_${ri}$dot( m, work, 1_${ik}$, a( 1_${ik}$, q ),1_${ik}$ )*d( q ) / & aaqq end if else rotok = aapp<=( aaqq / small ) if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_${ri}$dot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )& *d( q ) / aaqq )/ aapp else call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, q ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, d( q ),m, 1_${ik}$, work, lda,& ierr ) aapq = stdlib${ii}$_${ri}$dot( m, work, 1_${ik}$, a( 1_${ik}$, p ),1_${ik}$ )*d( p ) / & aapp end if end if mxaapq = max( mxaapq, abs( aapq ) ) ! to rotate or not to rotate, that is the question ... if( abs( aapq )>tol ) then ! Rotate ! rotated = rotated + one if( ir1==0_${ik}$ ) then notrot = 0_${ik}$ pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ end if if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/aapq if( abs( theta )>bigtheta ) then t = half / theta fastr( 3_${ik}$ ) = t*d( p ) / d( q ) fastr( 4_${ik}$ ) = -t*d( q ) / d( p ) call stdlib${ii}$_${ri}$rotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr ) if( rsvec )call stdlib${ii}$_${ri}$rotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),& 1_${ik}$,fastr ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq ) t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) apoaq = d( p ) / d( q ) aqoap = d( q ) / d( p ) if( d( p )>=one ) then if( d( q )>=one ) then fastr( 3_${ik}$ ) = t*apoaq fastr( 4_${ik}$ ) = -t*aqoap d( p ) = d( p )*cs d( q ) = d( q )*cs call stdlib${ii}$_${ri}$rotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,& fastr ) if( rsvec )call stdlib${ii}$_${ri}$rotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( & 1_${ik}$, q ),1_${ik}$, fastr ) else call stdlib${ii}$_${ri}$axpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, & p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( & 1_${ik}$, q ), 1_${ik}$ ) d( p ) = d( p )*cs d( q ) = d( q ) / cs if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(& 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,& v( 1_${ik}$, q ), 1_${ik}$ ) end if end if else if( d( q )>=one ) then call stdlib${ii}$_${ri}$axpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q & ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) d( p ) = d( p ) / cs d( q ) = d( q )*cs if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( & 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), & 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if else if( d( p )>=d( q ) ) then call stdlib${ii}$_${ri}$axpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,& a( 1_${ik}$, q ), 1_${ik}$ ) d( p ) = d( p )*cs d( q ) = d( q ) / cs if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,& v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),& 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) end if else call stdlib${ii}$_${ri}$axpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,& q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,& a( 1_${ik}$, p ), 1_${ik}$ ) d( p ) = d( p ) / cs d( q ) = d( q )*cs if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, & v( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )& , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if end if end if end if end if else ! .. have to use modified gram-schmidt like transformation call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one, m,1_${ik}$, work, lda, & ierr ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) temp1 = -aapq*d( p ) / d( q ) call stdlib${ii}$_${ri}$axpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! recompute sva(q), sva(p). if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*d( q ) else t = zero aaqq = one call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq )*d( q ) end if end if if( ( aapp / aapp0 )<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p ) else t = zero aapp = one call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp )*d( p ) end if sva( p ) = aapp end if else ! a(:,p) and a(:,q) already numerically orthogonal if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ end if else ! a(:,q) is zero column if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then if( ir1==0_${ik}$ )aapp = -aapp notrot = 0_${ik}$ go to 2103 end if end do loop_2002 ! end q-loop 2103 continue ! bailed out of q-loop sva( p ) = aapp else sva( p ) = aapp if( ( ir1==0_${ik}$ ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & n ) - p end if end do loop_2001 ! end of the p-loop ! end of doing the block ( ibr, ibr ) end do loop_1002 ! end of ir1-loop ! ........................................................ ! ... go to the off diagonal blocks igl = ( ibr-1 )*kbl + 1_${ik}$ loop_2010: do jbc = ibr + 1, nbl jgl = ( jbc-1 )*kbl + 1_${ik}$ ! doing the block at ( ibr, jbc ) ijblsk = 0_${ik}$ loop_2100: do p = igl, min( igl+kbl-1, n ) aapp = sva( p ) if( aapp>zero ) then pskipped = 0_${ik}$ loop_2200: do q = jgl, min( jgl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp ! -#- m x 2 jacobi svd -#- ! -#- safe gram matrix computation -#- if( aaqq>=one ) then if( aapp>=aaqq ) then rotok = ( small*aapp )<=aaqq else rotok = ( small*aaqq )<=aapp end if if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_${ri}$dot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )& *d( q ) / aaqq )/ aapp else call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, d( p ),m, 1_${ik}$, work, lda,& ierr ) aapq = stdlib${ii}$_${ri}$dot( m, work, 1_${ik}$, a( 1_${ik}$, q ),1_${ik}$ )*d( q ) / & aaqq end if else if( aapp>=aaqq ) then rotok = aapp<=( aaqq / small ) else rotok = aaqq<=( aapp / small ) end if if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_${ri}$dot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )& *d( q ) / aaqq )/ aapp else call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, q ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, d( q ),m, 1_${ik}$, work, lda,& ierr ) aapq = stdlib${ii}$_${ri}$dot( m, work, 1_${ik}$, a( 1_${ik}$, p ),1_${ik}$ )*d( p ) / & aapp end if end if mxaapq = max( mxaapq, abs( aapq ) ) ! to rotate or not to rotate, that is the question ... if( abs( aapq )>tol ) then notrot = 0_${ik}$ ! rotated = rotated + 1 pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/aapq if( aaqq>aapp0 )theta = -theta if( abs( theta )>bigtheta ) then t = half / theta fastr( 3_${ik}$ ) = t*d( p ) / d( q ) fastr( 4_${ik}$ ) = -t*d( q ) / d( p ) call stdlib${ii}$_${ri}$rotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr ) if( rsvec )call stdlib${ii}$_${ri}$rotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),& 1_${ik}$,fastr ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq ) if( aaqq>aapp0 )thsign = -thsign t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) apoaq = d( p ) / d( q ) aqoap = d( q ) / d( p ) if( d( p )>=one ) then if( d( q )>=one ) then fastr( 3_${ik}$ ) = t*apoaq fastr( 4_${ik}$ ) = -t*aqoap d( p ) = d( p )*cs d( q ) = d( q )*cs call stdlib${ii}$_${ri}$rotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,& fastr ) if( rsvec )call stdlib${ii}$_${ri}$rotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( & 1_${ik}$, q ),1_${ik}$, fastr ) else call stdlib${ii}$_${ri}$axpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, & p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( & 1_${ik}$, q ), 1_${ik}$ ) if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(& 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,& v( 1_${ik}$, q ), 1_${ik}$ ) end if d( p ) = d( p )*cs d( q ) = d( q ) / cs end if else if( d( q )>=one ) then call stdlib${ii}$_${ri}$axpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q & ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( & 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), & 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if d( p ) = d( p ) / cs d( q ) = d( q )*cs else if( d( p )>=d( q ) ) then call stdlib${ii}$_${ri}$axpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,& a( 1_${ik}$, q ), 1_${ik}$ ) d( p ) = d( p )*cs d( q ) = d( q ) / cs if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,& v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),& 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) end if else call stdlib${ii}$_${ri}$axpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,& q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,& a( 1_${ik}$, p ), 1_${ik}$ ) d( p ) = d( p ) / cs d( q ) = d( q )*cs if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, & v( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )& , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if end if end if end if end if else if( aapp>aaqq ) then call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, p ), 1_${ik}$, work,1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work, lda,& ierr ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) temp1 = -aapq*d( p ) / d( q ) call stdlib${ii}$_${ri}$axpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) else call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, q ), 1_${ik}$, work,1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work, lda,& ierr ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) temp1 = -aapq*d( q ) / d( p ) call stdlib${ii}$_${ri}$axpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) end if end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q) ! .. recompute sva(q) if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*d( q ) else t = zero aaqq = one call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq )*d( q ) end if end if if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p ) else t = zero aapp = one call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp )*d( p ) end if sva( p ) = aapp end if ! end of ok rotation else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then sva( p ) = aapp notrot = 0_${ik}$ go to 2011 end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then aapp = -aapp notrot = 0_${ik}$ go to 2203 end if end do loop_2200 ! end of the q-loop 2203 continue sva( p ) = aapp else if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$ if( aapp<zero )notrot = 0_${ik}$ end if end do loop_2100 ! end of the p-loop end do loop_2010 ! end of the jbc-loop 2011 continue ! 2011 bailed out of the jbc-loop do p = igl, min( igl+kbl-1, n ) sva( p ) = abs( sva( p ) ) end do end do loop_2000 ! 2000 :: end of the ibr-loop ! .. update sva(n) if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then sva( n ) = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )*d( n ) else t = zero aapp = one call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp ) sva( n ) = t*sqrt( aapp )*d( n ) end if ! additional steering devices if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i if( ( i>swband+1 ) .and. ( mxaapq<real( n,KIND=${rk}$)*tol ) .and.( real( n,KIND=${rk}$)& *mxaapq*mxsinj<tol ) ) then go to 1994 end if if( notrot>=emptsw )go to 1994 end do loop_1993 ! end i=1:nsweep loop ! #:) reaching this point means that the procedure has completed the given ! number of iterations. info = nsweep - 1_${ik}$ go to 1995 1994 continue ! #:) reaching this point means that during the i-th sweep all pivots were ! below the given tolerance, causing early exit. info = 0_${ik}$ ! #:) info = 0 confirms successful iterations. 1995 continue ! sort the vector d. do p = 1, n - 1 q = stdlib${ii}$_i${ri}$amax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 temp1 = d( p ) d( p ) = d( q ) d( q ) = temp1 call stdlib${ii}$_${ri}$swap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_${ri}$swap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ ) end if end do return end subroutine stdlib${ii}$_${ri}$gsvj0 #:endif #:endfor pure module subroutine stdlib${ii}$_cgsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, & !! CGSVJ0 is called from CGESVJ as a pre-processor and that is its main !! purpose. It applies Jacobi rotations in the same way as CGESVJ does, but !! it does not check convergence (stopping criterion). Few tuning !! parameters (marked by [TP]) are available for the implementer. nsweep, work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, nsweep real(sp), intent(in) :: eps, sfmin, tol character, intent(in) :: jobv ! Array Arguments complex(sp), intent(inout) :: a(lda,*), d(n), v(ldv,*) complex(sp), intent(out) :: work(lwork) real(sp), intent(inout) :: sva(n) ! ===================================================================== ! Local Scalars complex(sp) :: aapq, ompq real(sp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, & rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign integer(${ik}$) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & lkahead, mvl, nbl, notrot, p, pskipped, q, rowskip, swband logical(lk) :: applv, rotok, rsvec ! Intrinsic Functions ! from lapack ! Executable Statements ! test the input parameters. applv = stdlib_lsame( jobv, 'A' ) rsvec = stdlib_lsame( jobv, 'V' ) if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -3_${ik}$ else if( lda<m ) then info = -5_${ik}$ else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then info = -8_${ik}$ else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) ) ) then info = -10_${ik}$ else if( tol<=eps ) then info = -13_${ik}$ else if( nsweep<0_${ik}$ ) then info = -14_${ik}$ else if( lwork<m ) then info = -16_${ik}$ else info = 0_${ik}$ end if ! #:( if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CGSVJ0', -info ) return end if if( rsvec ) then mvl = n else if( applv ) then mvl = mv end if rsvec = rsvec .or. applv rooteps = sqrt( eps ) rootsfmin = sqrt( sfmin ) small = sfmin / eps big = one / sfmin rootbig = one / rootsfmin bigtheta = one / rooteps roottol = sqrt( tol ) ! .. row-cyclic jacobi svd algorithm with column pivoting .. emptsw = ( n*( n-1 ) ) / 2_${ik}$ notrot = 0_${ik}$ ! .. row-cyclic pivot strategy with de rijk's pivoting .. swband = 0_${ik}$ ! [tp] swband is a tuning parameter [tp]. it is meaningful and effective ! if stdlib${ii}$_cgesvj is used as a computational routine in the preconditioned ! jacobi svd algorithm stdlib${ii}$_cgejsv. for sweeps i=1:swband the procedure ! works on pivots inside a band-like region around the diagonal. ! the boundaries are determined dynamically, based on the number of ! pivots above a threshold. kbl = min( 8_${ik}$, n ) ! [tp] kbl is a tuning parameter that defines the tile size in the ! tiling of the p-q loops of pivot pairs. in general, an optimal ! value of kbl depends on the matrix dimensions and on the ! parameters of the computer's memory. nbl = n / kbl if( ( nbl*kbl )/=n )nbl = nbl + 1_${ik}$ blskip = kbl**2_${ik}$ ! [tp] blkskip is a tuning parameter that depends on swband and kbl. rowskip = min( 5_${ik}$, kbl ) ! [tp] rowskip is a tuning parameter. lkahead = 1_${ik}$ ! [tp] lkahead is a tuning parameter. ! quasi block transformations, using the lower (upper) triangular ! structure of the input matrix. the quasi-block-cycling usually ! invokes cubic convergence. big part of this cycle is done inside ! canonical subspaces of dimensions less than m. ! .. row-cyclic pivot strategy with de rijk's pivoting .. loop_1993: do i = 1, nsweep ! .. go go go ... mxaapq = zero mxsinj = zero iswrot = 0_${ik}$ notrot = 0_${ik}$ pskipped = 0_${ik}$ ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs ! 1 <= p < q <= n. this is the first step toward a blocked implementation ! of the rotations. new implementation, based on block transformations, ! is under development. loop_2000: do ibr = 1, nbl igl = ( ibr-1 )*kbl + 1_${ik}$ loop_1002: do ir1 = 0, min( lkahead, nbl-ibr ) igl = igl + ir1*kbl loop_2001: do p = igl, min( igl+kbl-1, n-1 ) ! .. de rijk's pivoting q = stdlib${ii}$_isamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then call stdlib${ii}$_cswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_cswap( mvl, v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 aapq = d(p) d(p) = d(q) d(q) = aapq end if if( ir1==0_${ik}$ ) then ! column norms are periodically updated by explicit ! norm computation. ! caveat: ! unfortunately, some blas implementations compute sncrm2(m,a(1,p),1) ! as sqrt(s=stdlib${ii}$_cdotc(m,a(1,p),1,a(1,p),1)), which may cause the result to ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and to ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). ! hence, stdlib${ii}$_scnrm2 cannot be trusted, not even in the case when ! the true norm is far from the under(over)flow boundaries. ! if properly implemented stdlib${ii}$_scnrm2 is available, the if-then-else-end if ! below should be replaced with "aapp = stdlib${ii}$_scnrm2( m, a(1,p), 1 )". if( ( sva( p )<rootbig ) .and.( sva( p )>rootsfmin ) ) then sva( p ) = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else temp1 = zero aapp = one call stdlib${ii}$_classq( m, a( 1_${ik}$, p ), 1_${ik}$, temp1, aapp ) sva( p ) = temp1*sqrt( aapp ) end if aapp = sva( p ) else aapp = sva( p ) end if if( aapp>zero ) then pskipped = 0_${ik}$ loop_2002: do q = p + 1, min( igl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp if( aaqq>=one ) then rotok = ( small*aapp )<=aaqq if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq ) / aapp else call stdlib${ii}$_ccopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work, lda, & ierr ) aapq = stdlib${ii}$_cdotc( m, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq end if else rotok = aapp<=( aaqq / small ) if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aapp ) / aaqq else call stdlib${ii}$_ccopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,work, lda, & ierr ) aapq = stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) / & aapp end if end if ! aapq = aapq * conjg( cwork(p) ) * cwork(q) aapq1 = -abs(aapq) mxaapq = max( mxaapq, -aapq1 ) ! to rotate or not to rotate, that is the question ... if( abs( aapq1 )>tol ) then ompq = aapq / abs(aapq) ! Rotate ! [rtd] rotated = rotated + one if( ir1==0_${ik}$ ) then notrot = 0_${ik}$ pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ end if if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/aapq1 if( abs( theta )>bigtheta ) then t = half / theta cs = one call stdlib${ii}$_crot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *t ) if ( rsvec ) then call stdlib${ii}$_crot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*t ) end if sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq1 ) t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) call stdlib${ii}$_crot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *sn ) if ( rsvec ) then call stdlib${ii}$_crot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*sn ) end if end if d(p) = -d(q) * ompq else ! .. have to use modified gram-schmidt like transformation call stdlib${ii}$_ccopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one, m,1_${ik}$, work, lda,& ierr ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) call stdlib${ii}$_caxpy( m, -aapq, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! recompute sva(q), sva(p). if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, q ), 1_${ik}$ ) else t = zero aaqq = one call stdlib${ii}$_classq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq ) end if end if if( ( aapp / aapp0 )<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_classq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp ) end if sva( p ) = aapp end if else ! a(:,p) and a(:,q) already numerically orthogonal if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ end if else ! a(:,q) is zero column if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then if( ir1==0_${ik}$ )aapp = -aapp notrot = 0_${ik}$ go to 2103 end if end do loop_2002 ! end q-loop 2103 continue ! bailed out of q-loop sva( p ) = aapp else sva( p ) = aapp if( ( ir1==0_${ik}$ ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & n ) - p end if end do loop_2001 ! end of the p-loop ! end of doing the block ( ibr, ibr ) end do loop_1002 ! end of ir1-loop ! ... go to the off diagonal blocks igl = ( ibr-1 )*kbl + 1_${ik}$ loop_2010: do jbc = ibr + 1, nbl jgl = ( jbc-1 )*kbl + 1_${ik}$ ! doing the block at ( ibr, jbc ) ijblsk = 0_${ik}$ loop_2100: do p = igl, min( igl+kbl-1, n ) aapp = sva( p ) if( aapp>zero ) then pskipped = 0_${ik}$ loop_2200: do q = jgl, min( jgl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp ! M X 2 Jacobi Svd ! safe gram matrix computation if( aaqq>=one ) then if( aapp>=aaqq ) then rotok = ( small*aapp )<=aaqq else rotok = ( small*aaqq )<=aapp end if if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq ) / aapp else call stdlib${ii}$_ccopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp,one, m, 1_${ik}$,work, lda, & ierr ) aapq = stdlib${ii}$_cdotc( m, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq end if else if( aapp>=aaqq ) then rotok = aapp<=( aaqq / small ) else rotok = aaqq<=( aapp / small ) end if if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / max(& aaqq,aapp) )/ min(aaqq,aapp) else call stdlib${ii}$_ccopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,work, lda, & ierr ) aapq = stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) / & aapp end if end if ! aapq = aapq * conjg(cwork(p))*cwork(q) aapq1 = -abs(aapq) mxaapq = max( mxaapq, -aapq1 ) ! to rotate or not to rotate, that is the question ... if( abs( aapq1 )>tol ) then ompq = aapq / abs(aapq) notrot = 0_${ik}$ ! [rtd] rotated = rotated + 1 pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/ aapq1 if( aaqq>aapp0 )theta = -theta if( abs( theta )>bigtheta ) then t = half / theta cs = one call stdlib${ii}$_crot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *t ) if( rsvec ) then call stdlib${ii}$_crot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*t ) end if sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq1 ) if( aaqq>aapp0 )thsign = -thsign t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) call stdlib${ii}$_crot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *sn ) if( rsvec ) then call stdlib${ii}$_crot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*sn ) end if end if d(p) = -d(q) * ompq else ! .. have to use modified gram-schmidt like transformation if( aapp>aaqq ) then call stdlib${ii}$_ccopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work,lda,& ierr ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) call stdlib${ii}$_caxpy( m, -aapq, work,1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) else call stdlib${ii}$_ccopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work,lda,& ierr ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) call stdlib${ii}$_caxpy( m, -conjg(aapq),work, 1_${ik}$, a( 1_${ik}$, p ), 1_${ik}$ & ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) end if end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! .. recompute sva(q), sva(p) if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, q ), 1_${ik}$) else t = zero aaqq = one call stdlib${ii}$_classq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq ) end if end if if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_classq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp ) end if sva( p ) = aapp end if ! end of ok rotation else notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then sva( p ) = aapp notrot = 0_${ik}$ go to 2011 end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then aapp = -aapp notrot = 0_${ik}$ go to 2203 end if end do loop_2200 ! end of the q-loop 2203 continue sva( p ) = aapp else if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$ if( aapp<zero )notrot = 0_${ik}$ end if end do loop_2100 ! end of the p-loop end do loop_2010 ! end of the jbc-loop 2011 continue ! 2011 bailed out of the jbc-loop do p = igl, min( igl+kbl-1, n ) sva( p ) = abs( sva( p ) ) end do ! ** end do loop_2000 ! 2000 :: end of the ibr-loop ! .. update sva(n) if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then sva( n ) = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, n ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_classq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp ) sva( n ) = t*sqrt( aapp ) end if ! additional steering devices if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i if( ( i>swband+1 ) .and. ( mxaapq<sqrt( real( n,KIND=sp) )*tol ) .and. ( real( n,& KIND=sp)*mxaapq*mxsinj<tol ) ) then go to 1994 end if if( notrot>=emptsw )go to 1994 end do loop_1993 ! end i=1:nsweep loop ! #:( reaching this point means that the procedure has not converged. info = nsweep - 1_${ik}$ go to 1995 1994 continue ! #:) reaching this point means numerical convergence after the i-th ! sweep. info = 0_${ik}$ ! #:) info = 0 confirms successful iterations. 1995 continue ! sort the vector sva() of column norms. do p = 1, n - 1 q = stdlib${ii}$_isamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 aapq = d( p ) d( p ) = d( q ) d( q ) = aapq call stdlib${ii}$_cswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_cswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ ) end if end do return end subroutine stdlib${ii}$_cgsvj0 pure module subroutine stdlib${ii}$_zgsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, & !! ZGSVJ0 is called from ZGESVJ as a pre-processor and that is its main !! purpose. It applies Jacobi rotations in the same way as ZGESVJ does, but !! it does not check convergence (stopping criterion). Few tuning !! parameters (marked by [TP]) are available for the implementer. nsweep, work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, nsweep real(dp), intent(in) :: eps, sfmin, tol character, intent(in) :: jobv ! Array Arguments complex(dp), intent(inout) :: a(lda,*), d(n), v(ldv,*) complex(dp), intent(out) :: work(lwork) real(dp), intent(inout) :: sva(n) ! ===================================================================== ! Local Scalars complex(dp) :: aapq, ompq real(dp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, & rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign integer(${ik}$) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & lkahead, mvl, nbl, notrot, p, pskipped, q, rowskip, swband logical(lk) :: applv, rotok, rsvec ! Intrinsic Functions ! from lapack ! Executable Statements ! test the input parameters. applv = stdlib_lsame( jobv, 'A' ) rsvec = stdlib_lsame( jobv, 'V' ) if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -3_${ik}$ else if( lda<m ) then info = -5_${ik}$ else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then info = -8_${ik}$ else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) ) ) then info = -10_${ik}$ else if( tol<=eps ) then info = -13_${ik}$ else if( nsweep<0_${ik}$ ) then info = -14_${ik}$ else if( lwork<m ) then info = -16_${ik}$ else info = 0_${ik}$ end if ! #:( if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGSVJ0', -info ) return end if if( rsvec ) then mvl = n else if( applv ) then mvl = mv end if rsvec = rsvec .or. applv rooteps = sqrt( eps ) rootsfmin = sqrt( sfmin ) small = sfmin / eps big = one / sfmin rootbig = one / rootsfmin bigtheta = one / rooteps roottol = sqrt( tol ) ! .. row-cyclic jacobi svd algorithm with column pivoting .. emptsw = ( n*( n-1 ) ) / 2_${ik}$ notrot = 0_${ik}$ ! .. row-cyclic pivot strategy with de rijk's pivoting .. swband = 0_${ik}$ ! [tp] swband is a tuning parameter [tp]. it is meaningful and effective ! if stdlib${ii}$_zgesvj is used as a computational routine in the preconditioned ! jacobi svd algorithm stdlib${ii}$_zgejsv. for sweeps i=1:swband the procedure ! works on pivots inside a band-like region around the diagonal. ! the boundaries are determined dynamically, based on the number of ! pivots above a threshold. kbl = min( 8_${ik}$, n ) ! [tp] kbl is a tuning parameter that defines the tile size in the ! tiling of the p-q loops of pivot pairs. in general, an optimal ! value of kbl depends on the matrix dimensions and on the ! parameters of the computer's memory. nbl = n / kbl if( ( nbl*kbl )/=n )nbl = nbl + 1_${ik}$ blskip = kbl**2_${ik}$ ! [tp] blkskip is a tuning parameter that depends on swband and kbl. rowskip = min( 5_${ik}$, kbl ) ! [tp] rowskip is a tuning parameter. lkahead = 1_${ik}$ ! [tp] lkahead is a tuning parameter. ! quasi block transformations, using the lower (upper) triangular ! structure of the input matrix. the quasi-block-cycling usually ! invokes cubic convergence. big part of this cycle is done inside ! canonical subspaces of dimensions less than m. ! .. row-cyclic pivot strategy with de rijk's pivoting .. loop_1993: do i = 1, nsweep ! .. go go go ... mxaapq = zero mxsinj = zero iswrot = 0_${ik}$ notrot = 0_${ik}$ pskipped = 0_${ik}$ ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs ! 1 <= p < q <= n. this is the first step toward a blocked implementation ! of the rotations. new implementation, based on block transformations, ! is under development. loop_2000: do ibr = 1, nbl igl = ( ibr-1 )*kbl + 1_${ik}$ loop_1002: do ir1 = 0, min( lkahead, nbl-ibr ) igl = igl + ir1*kbl loop_2001: do p = igl, min( igl+kbl-1, n-1 ) ! .. de rijk's pivoting q = stdlib${ii}$_idamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then call stdlib${ii}$_zswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_zswap( mvl, v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 aapq = d(p) d(p) = d(q) d(q) = aapq end if if( ir1==0_${ik}$ ) then ! column norms are periodically updated by explicit ! norm computation. ! caveat: ! unfortunately, some blas implementations compute sncrm2(m,a(1,p),1) ! as sqrt(s=stdlib${ii}$_zdotc(m,a(1,p),1,a(1,p),1)), which may cause the result to ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and to ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). ! hence, stdlib${ii}$_dznrm2 cannot be trusted, not even in the case when ! the true norm is far from the under(over)flow boundaries. ! if properly implemented stdlib${ii}$_dznrm2 is available, the if-then-else-end if ! below should be replaced with "aapp = stdlib${ii}$_dznrm2( m, a(1,p), 1 )". if( ( sva( p )<rootbig ) .and.( sva( p )>rootsfmin ) ) then sva( p ) = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else temp1 = zero aapp = one call stdlib${ii}$_zlassq( m, a( 1_${ik}$, p ), 1_${ik}$, temp1, aapp ) sva( p ) = temp1*sqrt( aapp ) end if aapp = sva( p ) else aapp = sva( p ) end if if( aapp>zero ) then pskipped = 0_${ik}$ loop_2002: do q = p + 1, min( igl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp if( aaqq>=one ) then rotok = ( small*aapp )<=aaqq if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq ) / aapp else call stdlib${ii}$_zcopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work, lda, & ierr ) aapq = stdlib${ii}$_zdotc( m, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq end if else rotok = aapp<=( aaqq / small ) if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aapp ) / aaqq else call stdlib${ii}$_zcopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,work, lda, & ierr ) aapq = stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) / & aapp end if end if ! aapq = aapq * conjg( cwork(p) ) * cwork(q) aapq1 = -abs(aapq) mxaapq = max( mxaapq, -aapq1 ) ! to rotate or not to rotate, that is the question ... if( abs( aapq1 )>tol ) then ompq = aapq / abs(aapq) ! Rotate ! [rtd] rotated = rotated + one if( ir1==0_${ik}$ ) then notrot = 0_${ik}$ pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ end if if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/aapq1 if( abs( theta )>bigtheta ) then t = half / theta cs = one call stdlib${ii}$_zrot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *t ) if ( rsvec ) then call stdlib${ii}$_zrot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*t ) end if sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq1 ) t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) call stdlib${ii}$_zrot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *sn ) if ( rsvec ) then call stdlib${ii}$_zrot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*sn ) end if end if d(p) = -d(q) * ompq else ! .. have to use modified gram-schmidt like transformation call stdlib${ii}$_zcopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one, m,1_${ik}$, work, lda,& ierr ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) call stdlib${ii}$_zaxpy( m, -aapq, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! recompute sva(q), sva(p). if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, q ), 1_${ik}$ ) else t = zero aaqq = one call stdlib${ii}$_zlassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq ) end if end if if( ( aapp / aapp0 )<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_zlassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp ) end if sva( p ) = aapp end if else ! a(:,p) and a(:,q) already numerically orthogonal if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ end if else ! a(:,q) is zero column if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then if( ir1==0_${ik}$ )aapp = -aapp notrot = 0_${ik}$ go to 2103 end if end do loop_2002 ! end q-loop 2103 continue ! bailed out of q-loop sva( p ) = aapp else sva( p ) = aapp if( ( ir1==0_${ik}$ ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & n ) - p end if end do loop_2001 ! end of the p-loop ! end of doing the block ( ibr, ibr ) end do loop_1002 ! end of ir1-loop ! ... go to the off diagonal blocks igl = ( ibr-1 )*kbl + 1_${ik}$ loop_2010: do jbc = ibr + 1, nbl jgl = ( jbc-1 )*kbl + 1_${ik}$ ! doing the block at ( ibr, jbc ) ijblsk = 0_${ik}$ loop_2100: do p = igl, min( igl+kbl-1, n ) aapp = sva( p ) if( aapp>zero ) then pskipped = 0_${ik}$ loop_2200: do q = jgl, min( jgl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp ! M X 2 Jacobi Svd ! safe gram matrix computation if( aaqq>=one ) then if( aapp>=aaqq ) then rotok = ( small*aapp )<=aaqq else rotok = ( small*aaqq )<=aapp end if if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq ) / aapp else call stdlib${ii}$_zcopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp,one, m, 1_${ik}$,work, lda, & ierr ) aapq = stdlib${ii}$_zdotc( m, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq end if else if( aapp>=aaqq ) then rotok = aapp<=( aaqq / small ) else rotok = aaqq<=( aapp / small ) end if if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / max(& aaqq,aapp) )/ min(aaqq,aapp) else call stdlib${ii}$_zcopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,work, lda, & ierr ) aapq = stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) / & aapp end if end if ! aapq = aapq * conjg(cwork(p))*cwork(q) aapq1 = -abs(aapq) mxaapq = max( mxaapq, -aapq1 ) ! to rotate or not to rotate, that is the question ... if( abs( aapq1 )>tol ) then ompq = aapq / abs(aapq) notrot = 0_${ik}$ ! [rtd] rotated = rotated + 1 pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/ aapq1 if( aaqq>aapp0 )theta = -theta if( abs( theta )>bigtheta ) then t = half / theta cs = one call stdlib${ii}$_zrot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *t ) if( rsvec ) then call stdlib${ii}$_zrot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*t ) end if sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq1 ) if( aaqq>aapp0 )thsign = -thsign t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) call stdlib${ii}$_zrot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *sn ) if( rsvec ) then call stdlib${ii}$_zrot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*sn ) end if end if d(p) = -d(q) * ompq else ! .. have to use modified gram-schmidt like transformation if( aapp>aaqq ) then call stdlib${ii}$_zcopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work,lda,& ierr ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) call stdlib${ii}$_zaxpy( m, -aapq, work,1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) else call stdlib${ii}$_zcopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work,lda,& ierr ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) call stdlib${ii}$_zaxpy( m, -conjg(aapq),work, 1_${ik}$, a( 1_${ik}$, p ), 1_${ik}$ & ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) end if end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! .. recompute sva(q), sva(p) if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, q ), 1_${ik}$) else t = zero aaqq = one call stdlib${ii}$_zlassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq ) end if end if if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_zlassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp ) end if sva( p ) = aapp end if ! end of ok rotation else notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then sva( p ) = aapp notrot = 0_${ik}$ go to 2011 end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then aapp = -aapp notrot = 0_${ik}$ go to 2203 end if end do loop_2200 ! end of the q-loop 2203 continue sva( p ) = aapp else if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$ if( aapp<zero )notrot = 0_${ik}$ end if end do loop_2100 ! end of the p-loop end do loop_2010 ! end of the jbc-loop 2011 continue ! 2011 bailed out of the jbc-loop do p = igl, min( igl+kbl-1, n ) sva( p ) = abs( sva( p ) ) end do ! ** end do loop_2000 ! 2000 :: end of the ibr-loop ! .. update sva(n) if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then sva( n ) = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, n ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_zlassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp ) sva( n ) = t*sqrt( aapp ) end if ! additional steering devices if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i if( ( i>swband+1 ) .and. ( mxaapq<sqrt( real( n,KIND=dp) )*tol ) .and. ( real( n,& KIND=dp)*mxaapq*mxsinj<tol ) ) then go to 1994 end if if( notrot>=emptsw )go to 1994 end do loop_1993 ! end i=1:nsweep loop ! #:( reaching this point means that the procedure has not converged. info = nsweep - 1_${ik}$ go to 1995 1994 continue ! #:) reaching this point means numerical convergence after the i-th ! sweep. info = 0_${ik}$ ! #:) info = 0 confirms successful iterations. 1995 continue ! sort the vector sva() of column norms. do p = 1, n - 1 q = stdlib${ii}$_idamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 aapq = d( p ) d( p ) = d( q ) d( q ) = aapq call stdlib${ii}$_zswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_zswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ ) end if end do return end subroutine stdlib${ii}$_zgsvj0 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$gsvj0( jobv, m, n, a, lda, d, sva, mv, v, ldv, eps,sfmin, tol, & !! ZGSVJ0: is called from ZGESVJ as a pre-processor and that is its main !! purpose. It applies Jacobi rotations in the same way as ZGESVJ does, but !! it does not check convergence (stopping criterion). Few tuning !! parameters (marked by [TP]) are available for the implementer. nsweep, work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, nsweep real(${ck}$), intent(in) :: eps, sfmin, tol character, intent(in) :: jobv ! Array Arguments complex(${ck}$), intent(inout) :: a(lda,*), d(n), v(ldv,*) complex(${ck}$), intent(out) :: work(lwork) real(${ck}$), intent(inout) :: sva(n) ! ===================================================================== ! Local Scalars complex(${ck}$) :: aapq, ompq real(${ck}$) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, & rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign integer(${ik}$) :: blskip, emptsw, i, ibr, ierr, igl, ijblsk, ir1, iswrot, jbc, jgl, kbl, & lkahead, mvl, nbl, notrot, p, pskipped, q, rowskip, swband logical(lk) :: applv, rotok, rsvec ! Intrinsic Functions ! from lapack ! Executable Statements ! test the input parameters. applv = stdlib_lsame( jobv, 'A' ) rsvec = stdlib_lsame( jobv, 'V' ) if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -3_${ik}$ else if( lda<m ) then info = -5_${ik}$ else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then info = -8_${ik}$ else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) ) ) then info = -10_${ik}$ else if( tol<=eps ) then info = -13_${ik}$ else if( nsweep<0_${ik}$ ) then info = -14_${ik}$ else if( lwork<m ) then info = -16_${ik}$ else info = 0_${ik}$ end if ! #:( if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGSVJ0', -info ) return end if if( rsvec ) then mvl = n else if( applv ) then mvl = mv end if rsvec = rsvec .or. applv rooteps = sqrt( eps ) rootsfmin = sqrt( sfmin ) small = sfmin / eps big = one / sfmin rootbig = one / rootsfmin bigtheta = one / rooteps roottol = sqrt( tol ) ! .. row-cyclic jacobi svd algorithm with column pivoting .. emptsw = ( n*( n-1 ) ) / 2_${ik}$ notrot = 0_${ik}$ ! .. row-cyclic pivot strategy with de rijk's pivoting .. swband = 0_${ik}$ ! [tp] swband is a tuning parameter [tp]. it is meaningful and effective ! if stdlib${ii}$_${ci}$gesvj is used as a computational routine in the preconditioned ! jacobi svd algorithm stdlib${ii}$_${ci}$gejsv. for sweeps i=1:swband the procedure ! works on pivots inside a band-like region around the diagonal. ! the boundaries are determined dynamically, based on the number of ! pivots above a threshold. kbl = min( 8_${ik}$, n ) ! [tp] kbl is a tuning parameter that defines the tile size in the ! tiling of the p-q loops of pivot pairs. in general, an optimal ! value of kbl depends on the matrix dimensions and on the ! parameters of the computer's memory. nbl = n / kbl if( ( nbl*kbl )/=n )nbl = nbl + 1_${ik}$ blskip = kbl**2_${ik}$ ! [tp] blkskip is a tuning parameter that depends on swband and kbl. rowskip = min( 5_${ik}$, kbl ) ! [tp] rowskip is a tuning parameter. lkahead = 1_${ik}$ ! [tp] lkahead is a tuning parameter. ! quasi block transformations, using the lower (upper) triangular ! structure of the input matrix. the quasi-block-cycling usually ! invokes cubic convergence. big part of this cycle is done inside ! canonical subspaces of dimensions less than m. ! .. row-cyclic pivot strategy with de rijk's pivoting .. loop_1993: do i = 1, nsweep ! .. go go go ... mxaapq = zero mxsinj = zero iswrot = 0_${ik}$ notrot = 0_${ik}$ pskipped = 0_${ik}$ ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs ! 1 <= p < q <= n. this is the first step toward a blocked implementation ! of the rotations. new implementation, based on block transformations, ! is under development. loop_2000: do ibr = 1, nbl igl = ( ibr-1 )*kbl + 1_${ik}$ loop_1002: do ir1 = 0, min( lkahead, nbl-ibr ) igl = igl + ir1*kbl loop_2001: do p = igl, min( igl+kbl-1, n-1 ) ! .. de rijk's pivoting q = stdlib${ii}$_i${c2ri(ci)}$amax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then call stdlib${ii}$_${ci}$swap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_${ci}$swap( mvl, v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 aapq = d(p) d(p) = d(q) d(q) = aapq end if if( ir1==0_${ik}$ ) then ! column norms are periodically updated by explicit ! norm computation. ! caveat: ! unfortunately, some blas implementations compute sncrm2(m,a(1,p),1) ! as sqrt(s=stdlib${ii}$_${ci}$dotc(m,a(1,p),1,a(1,p),1)), which may cause the result to ! overflow for ||a(:,p)||_2 > sqrt(overflow_threshold), and to ! underflow for ||a(:,p)||_2 < sqrt(underflow_threshold). ! hence, stdlib${ii}$_${c2ri(ci)}$znrm2 cannot be trusted, not even in the case when ! the true norm is far from the under(over)flow boundaries. ! if properly implemented stdlib${ii}$_${c2ri(ci)}$znrm2 is available, the if-then-else-end if ! below should be replaced with "aapp = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a(1,p), 1 )". if( ( sva( p )<rootbig ) .and.( sva( p )>rootsfmin ) ) then sva( p ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else temp1 = zero aapp = one call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, temp1, aapp ) sva( p ) = temp1*sqrt( aapp ) end if aapp = sva( p ) else aapp = sva( p ) end if if( aapp>zero ) then pskipped = 0_${ik}$ loop_2002: do q = p + 1, min( igl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp if( aaqq>=one ) then rotok = ( small*aapp )<=aaqq if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq ) / aapp else call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work, lda, & ierr ) aapq = stdlib${ii}$_${ci}$dotc( m, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq end if else rotok = aapp<=( aaqq / small ) if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aapp ) / aaqq else call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,work, lda, & ierr ) aapq = stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) / & aapp end if end if ! aapq = aapq * conjg( cwork(p) ) * cwork(q) aapq1 = -abs(aapq) mxaapq = max( mxaapq, -aapq1 ) ! to rotate or not to rotate, that is the question ... if( abs( aapq1 )>tol ) then ompq = aapq / abs(aapq) ! Rotate ! [rtd] rotated = rotated + one if( ir1==0_${ik}$ ) then notrot = 0_${ik}$ pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ end if if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/aapq1 if( abs( theta )>bigtheta ) then t = half / theta cs = one call stdlib${ii}$_${ci}$rot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *t ) if ( rsvec ) then call stdlib${ii}$_${ci}$rot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*t ) end if sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq1 ) t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) call stdlib${ii}$_${ci}$rot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *sn ) if ( rsvec ) then call stdlib${ii}$_${ci}$rot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*sn ) end if end if d(p) = -d(q) * ompq else ! .. have to use modified gram-schmidt like transformation call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one, m,1_${ik}$, work, lda,& ierr ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) call stdlib${ii}$_${ci}$axpy( m, -aapq, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq, m,1_${ik}$, a( 1_${ik}$, q ), & lda, ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! recompute sva(q), sva(p). if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, q ), 1_${ik}$ ) else t = zero aaqq = one call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq ) end if end if if( ( aapp / aapp0 )<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp ) end if sva( p ) = aapp end if else ! a(:,p) and a(:,q) already numerically orthogonal if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ end if else ! a(:,q) is zero column if( ir1==0_${ik}$ )notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then if( ir1==0_${ik}$ )aapp = -aapp notrot = 0_${ik}$ go to 2103 end if end do loop_2002 ! end q-loop 2103 continue ! bailed out of q-loop sva( p ) = aapp else sva( p ) = aapp if( ( ir1==0_${ik}$ ) .and. ( aapp==zero ) )notrot = notrot + min( igl+kbl-1, & n ) - p end if end do loop_2001 ! end of the p-loop ! end of doing the block ( ibr, ibr ) end do loop_1002 ! end of ir1-loop ! ... go to the off diagonal blocks igl = ( ibr-1 )*kbl + 1_${ik}$ loop_2010: do jbc = ibr + 1, nbl jgl = ( jbc-1 )*kbl + 1_${ik}$ ! doing the block at ( ibr, jbc ) ijblsk = 0_${ik}$ loop_2100: do p = igl, min( igl+kbl-1, n ) aapp = sva( p ) if( aapp>zero ) then pskipped = 0_${ik}$ loop_2200: do q = jgl, min( jgl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp ! M X 2 Jacobi Svd ! safe gram matrix computation if( aaqq>=one ) then if( aapp>=aaqq ) then rotok = ( small*aapp )<=aaqq else rotok = ( small*aaqq )<=aapp end if if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq ) / aapp else call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp,one, m, 1_${ik}$,work, lda, & ierr ) aapq = stdlib${ii}$_${ci}$dotc( m, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq end if else if( aapp>=aaqq ) then rotok = aapp<=( aaqq / small ) else rotok = aaqq<=( aapp / small ) end if if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / max(& aaqq,aapp) )/ min(aaqq,aapp) else call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,work, lda, & ierr ) aapq = stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) / & aapp end if end if ! aapq = aapq * conjg(cwork(p))*cwork(q) aapq1 = -abs(aapq) mxaapq = max( mxaapq, -aapq1 ) ! to rotate or not to rotate, that is the question ... if( abs( aapq1 )>tol ) then ompq = aapq / abs(aapq) notrot = 0_${ik}$ ! [rtd] rotated = rotated + 1 pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/ aapq1 if( aaqq>aapp0 )theta = -theta if( abs( theta )>bigtheta ) then t = half / theta cs = one call stdlib${ii}$_${ci}$rot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *t ) if( rsvec ) then call stdlib${ii}$_${ci}$rot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*t ) end if sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq1 ) if( aaqq>aapp0 )thsign = -thsign t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) call stdlib${ii}$_${ci}$rot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *sn ) if( rsvec ) then call stdlib${ii}$_${ci}$rot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*sn ) end if end if d(p) = -d(q) * ompq else ! .. have to use modified gram-schmidt like transformation if( aapp>aaqq ) then call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work,lda,& ierr ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) call stdlib${ii}$_${ci}$axpy( m, -aapq, work,1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) else call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work,lda,& ierr ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) call stdlib${ii}$_${ci}$axpy( m, -conjg(aapq),work, 1_${ik}$, a( 1_${ik}$, p ), 1_${ik}$ & ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) end if end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! .. recompute sva(q), sva(p) if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, q ), 1_${ik}$) else t = zero aaqq = one call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq ) end if end if if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp ) end if sva( p ) = aapp end if ! end of ok rotation else notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then sva( p ) = aapp notrot = 0_${ik}$ go to 2011 end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then aapp = -aapp notrot = 0_${ik}$ go to 2203 end if end do loop_2200 ! end of the q-loop 2203 continue sva( p ) = aapp else if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$ if( aapp<zero )notrot = 0_${ik}$ end if end do loop_2100 ! end of the p-loop end do loop_2010 ! end of the jbc-loop 2011 continue ! 2011 bailed out of the jbc-loop do p = igl, min( igl+kbl-1, n ) sva( p ) = abs( sva( p ) ) end do ! ** end do loop_2000 ! 2000 :: end of the ibr-loop ! .. update sva(n) if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then sva( n ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, n ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp ) sva( n ) = t*sqrt( aapp ) end if ! additional steering devices if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i if( ( i>swband+1 ) .and. ( mxaapq<sqrt( real( n,KIND=${ck}$) )*tol ) .and. ( real( n,& KIND=${ck}$)*mxaapq*mxsinj<tol ) ) then go to 1994 end if if( notrot>=emptsw )go to 1994 end do loop_1993 ! end i=1:nsweep loop ! #:( reaching this point means that the procedure has not converged. info = nsweep - 1_${ik}$ go to 1995 1994 continue ! #:) reaching this point means numerical convergence after the i-th ! sweep. info = 0_${ik}$ ! #:) info = 0 confirms successful iterations. 1995 continue ! sort the vector sva() of column norms. do p = 1, n - 1 q = stdlib${ii}$_i${c2ri(ci)}$amax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 aapq = d( p ) d( p ) = d( q ) d( q ) = aapq call stdlib${ii}$_${ci}$swap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_${ci}$swap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ ) end if end do return end subroutine stdlib${ii}$_${ci}$gsvj0 #:endif #:endfor pure module subroutine stdlib${ii}$_sgsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol, & !! SGSVJ1 is called from SGESVJ as a pre-processor and that is its main !! purpose. It applies Jacobi rotations in the same way as SGESVJ does, but !! it targets only particular pivots and it does not check convergence !! (stopping criterion). Few tuning parameters (marked by [TP]) are !! available for the implementer. !! Further Details !! ~~~~~~~~~~~~~~~ !! SGSVJ1 applies few sweeps of Jacobi rotations in the column space of !! the input M-by-N matrix A. The pivot pairs are taken from the (1,2) !! off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The !! block-entries (tiles) of the (1,2) off-diagonal block are marked by the !! [x]'s in the following scheme: !! | * * * [x] [x] [x]| !! | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. !! | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. !! |[x] [x] [x] * * * | !! |[x] [x] [x] * * * | !! |[x] [x] [x] * * * | !! In terms of the columns of A, the first N1 columns are rotated 'against' !! the remaining N-N1 columns, trying to increase the angle between the !! corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is !! tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter. !! The number of sweeps is given in NSWEEP and the orthogonality threshold !! is given in TOL. nsweep, work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments real(sp), intent(in) :: eps, sfmin, tol integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, n1, nsweep character, intent(in) :: jobv ! Array Arguments real(sp), intent(inout) :: a(lda,*), d(n), sva(n), v(ldv,*) real(sp), intent(out) :: work(lwork) ! ===================================================================== ! Local Scalars real(sp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, large, mxaapq, & mxsinj, rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, & thsign integer(${ik}$) :: blskip, emptsw, i, ibr, igl, ierr, ijblsk, iswrot, jbc, jgl, kbl, mvl, & notrot, nblc, nblr, p, pskipped, q, rowskip, swband logical(lk) :: applv, rotok, rsvec ! Local Arrays real(sp) :: fastr(5_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input parameters. applv = stdlib_lsame( jobv, 'A' ) rsvec = stdlib_lsame( jobv, 'V' ) if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -3_${ik}$ else if( n1<0_${ik}$ ) then info = -4_${ik}$ else if( lda<m ) then info = -6_${ik}$ else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then info = -9_${ik}$ else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) ) ) then info = -11_${ik}$ else if( tol<=eps ) then info = -14_${ik}$ else if( nsweep<0_${ik}$ ) then info = -15_${ik}$ else if( lwork<m ) then info = -17_${ik}$ else info = 0_${ik}$ end if ! #:( if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SGSVJ1', -info ) return end if if( rsvec ) then mvl = n else if( applv ) then mvl = mv end if rsvec = rsvec .or. applv rooteps = sqrt( eps ) rootsfmin = sqrt( sfmin ) small = sfmin / eps big = one / sfmin rootbig = one / rootsfmin large = big / sqrt( real( m*n,KIND=sp) ) bigtheta = one / rooteps roottol = sqrt( tol ) ! Initialize The Right Singular Vector Matrix ! rsvec = stdlib_lsame( jobv, 'y' ) emptsw = n1*( n-n1 ) notrot = 0_${ik}$ fastr( 1_${ik}$ ) = zero ! .. row-cyclic pivot strategy with de rijk's pivoting .. kbl = min( 8_${ik}$, n ) nblr = n1 / kbl if( ( nblr*kbl )/=n1 )nblr = nblr + 1_${ik}$ ! .. the tiling is nblr-by-nblc [tiles] nblc = ( n-n1 ) / kbl if( ( nblc*kbl )/=( n-n1 ) )nblc = nblc + 1_${ik}$ blskip = ( kbl**2_${ik}$ ) + 1_${ik}$ ! [tp] blkskip is a tuning parameter that depends on swband and kbl. rowskip = min( 5_${ik}$, kbl ) ! [tp] rowskip is a tuning parameter. swband = 0_${ik}$ ! [tp] swband is a tuning parameter. it is meaningful and effective ! if stdlib${ii}$_sgesvj is used as a computational routine in the preconditioned ! jacobi svd algorithm stdlib${ii}$_sgesvj. ! | * * * [x] [x] [x]| ! | * * * [x] [x] [x]| row-cycling in the nblr-by-nblc [x] blocks. ! | * * * [x] [x] [x]| row-cyclic pivoting inside each [x] block. ! |[x] [x] [x] * * * | ! |[x] [x] [x] * * * | ! |[x] [x] [x] * * * | loop_1993: do i = 1, nsweep ! .. go go go ... mxaapq = zero mxsinj = zero iswrot = 0_${ik}$ notrot = 0_${ik}$ pskipped = 0_${ik}$ loop_2000: do ibr = 1, nblr igl = ( ibr-1 )*kbl + 1_${ik}$ ! ........................................................ ! ... go to the off diagonal blocks igl = ( ibr-1 )*kbl + 1_${ik}$ loop_2010: do jbc = 1, nblc jgl = n1 + ( jbc-1 )*kbl + 1_${ik}$ ! doing the block at ( ibr, jbc ) ijblsk = 0_${ik}$ loop_2100: do p = igl, min( igl+kbl-1, n1 ) aapp = sva( p ) if( aapp>zero ) then pskipped = 0_${ik}$ loop_2200: do q = jgl, min( jgl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp ! M X 2 Jacobi Svd ! Safe Gram Matrix Computation if( aaqq>=one ) then if( aapp>=aaqq ) then rotok = ( small*aapp )<=aaqq else rotok = ( small*aaqq )<=aapp end if if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_sdot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )& *d( q ) / aaqq )/ aapp else call stdlib${ii}$_scopy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, d( p ),m, 1_${ik}$, work, lda,& ierr ) aapq = stdlib${ii}$_sdot( m, work, 1_${ik}$, a( 1_${ik}$, q ),1_${ik}$ )*d( q ) / & aaqq end if else if( aapp>=aaqq ) then rotok = aapp<=( aaqq / small ) else rotok = aaqq<=( aapp / small ) end if if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_sdot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )& *d( q ) / aaqq )/ aapp else call stdlib${ii}$_scopy( m, a( 1_${ik}$, q ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, d( q ),m, 1_${ik}$, work, lda,& ierr ) aapq = stdlib${ii}$_sdot( m, work, 1_${ik}$, a( 1_${ik}$, p ),1_${ik}$ )*d( p ) / & aapp end if end if mxaapq = max( mxaapq, abs( aapq ) ) ! to rotate or not to rotate, that is the question ... if( abs( aapq )>tol ) then notrot = 0_${ik}$ ! rotated = rotated + 1 pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq ) / aapq if( aaqq>aapp0 )theta = -theta if( abs( theta )>bigtheta ) then t = half / theta fastr( 3_${ik}$ ) = t*d( p ) / d( q ) fastr( 4_${ik}$ ) = -t*d( q ) / d( p ) call stdlib${ii}$_srotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr ) if( rsvec )call stdlib${ii}$_srotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),& 1_${ik}$,fastr ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq ) if( aaqq>aapp0 )thsign = -thsign t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) apoaq = d( p ) / d( q ) aqoap = d( q ) / d( p ) if( d( p )>=one ) then if( d( q )>=one ) then fastr( 3_${ik}$ ) = t*apoaq fastr( 4_${ik}$ ) = -t*aqoap d( p ) = d( p )*cs d( q ) = d( q )*cs call stdlib${ii}$_srotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,& fastr ) if( rsvec )call stdlib${ii}$_srotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( & 1_${ik}$, q ),1_${ik}$, fastr ) else call stdlib${ii}$_saxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, & p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( & 1_${ik}$, q ), 1_${ik}$ ) if( rsvec ) then call stdlib${ii}$_saxpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(& 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,& v( 1_${ik}$, q ), 1_${ik}$ ) end if d( p ) = d( p )*cs d( q ) = d( q ) / cs end if else if( d( q )>=one ) then call stdlib${ii}$_saxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q & ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) if( rsvec ) then call stdlib${ii}$_saxpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( & 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), & 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if d( p ) = d( p ) / cs d( q ) = d( q )*cs else if( d( p )>=d( q ) ) then call stdlib${ii}$_saxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,& a( 1_${ik}$, q ), 1_${ik}$ ) d( p ) = d( p )*cs d( q ) = d( q ) / cs if( rsvec ) then call stdlib${ii}$_saxpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,& v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),& 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) end if else call stdlib${ii}$_saxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,& q ), 1_${ik}$ ) call stdlib${ii}$_saxpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,& a( 1_${ik}$, p ), 1_${ik}$ ) d( p ) = d( p ) / cs d( q ) = d( q )*cs if( rsvec ) then call stdlib${ii}$_saxpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, & v( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_saxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )& , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if end if end if end if end if else if( aapp>aaqq ) then call stdlib${ii}$_scopy( m, a( 1_${ik}$, p ), 1_${ik}$, work,1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work, lda,& ierr ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) temp1 = -aapq*d( p ) / d( q ) call stdlib${ii}$_saxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) else call stdlib${ii}$_scopy( m, a( 1_${ik}$, q ), 1_${ik}$, work,1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work, lda,& ierr ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) temp1 = -aapq*d( q ) / d( p ) call stdlib${ii}$_saxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) end if end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q) ! .. recompute sva(q) if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_snrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*d( q ) else t = zero aaqq = one call stdlib${ii}$_slassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq )*d( q ) end if end if if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_snrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p ) else t = zero aapp = one call stdlib${ii}$_slassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp )*d( p ) end if sva( p ) = aapp end if ! end of ok rotation else notrot = notrot + 1_${ik}$ ! skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if ! if ( notrot >= emptsw ) go to 2011 if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then sva( p ) = aapp notrot = 0_${ik}$ go to 2011 end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then aapp = -aapp notrot = 0_${ik}$ go to 2203 end if end do loop_2200 ! end of the q-loop 2203 continue sva( p ) = aapp else if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$ if( aapp<zero )notrot = 0_${ik}$ ! ** if ( notrot >= emptsw ) go to 2011 end if end do loop_2100 ! end of the p-loop end do loop_2010 ! end of the jbc-loop 2011 continue ! 2011 bailed out of the jbc-loop do p = igl, min( igl+kbl-1, n ) sva( p ) = abs( sva( p ) ) end do ! ** if ( notrot >= emptsw ) go to 1994 end do loop_2000 ! 2000 :: end of the ibr-loop ! .. update sva(n) if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then sva( n ) = stdlib${ii}$_snrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )*d( n ) else t = zero aapp = one call stdlib${ii}$_slassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp ) sva( n ) = t*sqrt( aapp )*d( n ) end if ! additional steering devices if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i if( ( i>swband+1 ) .and. ( mxaapq<real( n,KIND=sp)*tol ) .and.( real( n,KIND=sp)& *mxaapq*mxsinj<tol ) ) then go to 1994 end if if( notrot>=emptsw )go to 1994 end do loop_1993 ! end i=1:nsweep loop ! #:) reaching this point means that the procedure has completed the given ! number of sweeps. info = nsweep - 1_${ik}$ go to 1995 1994 continue ! #:) reaching this point means that during the i-th sweep all pivots were ! below the given threshold, causing early exit. info = 0_${ik}$ ! #:) info = 0 confirms successful iterations. 1995 continue ! sort the vector d do p = 1, n - 1 q = stdlib${ii}$_isamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 temp1 = d( p ) d( p ) = d( q ) d( q ) = temp1 call stdlib${ii}$_sswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_sswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ ) end if end do return end subroutine stdlib${ii}$_sgsvj1 pure module subroutine stdlib${ii}$_dgsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol, & !! DGSVJ1 is called from DGESVJ as a pre-processor and that is its main !! purpose. It applies Jacobi rotations in the same way as DGESVJ does, but !! it targets only particular pivots and it does not check convergence !! (stopping criterion). Few tuning parameters (marked by [TP]) are !! available for the implementer. !! Further Details !! ~~~~~~~~~~~~~~~ !! DGSVJ1 applies few sweeps of Jacobi rotations in the column space of !! the input M-by-N matrix A. The pivot pairs are taken from the (1,2) !! off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The !! block-entries (tiles) of the (1,2) off-diagonal block are marked by the !! [x]'s in the following scheme: !! | * * * [x] [x] [x]| !! | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. !! | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. !! |[x] [x] [x] * * * | !! |[x] [x] [x] * * * | !! |[x] [x] [x] * * * | !! In terms of the columns of A, the first N1 columns are rotated 'against' !! the remaining N-N1 columns, trying to increase the angle between the !! corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is !! tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter. !! The number of sweeps is given in NSWEEP and the orthogonality threshold !! is given in TOL. nsweep, work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments real(dp), intent(in) :: eps, sfmin, tol integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, n1, nsweep character, intent(in) :: jobv ! Array Arguments real(dp), intent(inout) :: a(lda,*), d(n), sva(n), v(ldv,*) real(dp), intent(out) :: work(lwork) ! ===================================================================== ! Local Scalars real(dp) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, large, mxaapq, & mxsinj, rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, & thsign integer(${ik}$) :: blskip, emptsw, i, ibr, igl, ierr, ijblsk, iswrot, jbc, jgl, kbl, mvl, & notrot, nblc, nblr, p, pskipped, q, rowskip, swband logical(lk) :: applv, rotok, rsvec ! Local Arrays real(dp) :: fastr(5_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input parameters. applv = stdlib_lsame( jobv, 'A' ) rsvec = stdlib_lsame( jobv, 'V' ) if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -3_${ik}$ else if( n1<0_${ik}$ ) then info = -4_${ik}$ else if( lda<m ) then info = -6_${ik}$ else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then info = -9_${ik}$ else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) ) ) then info = -11_${ik}$ else if( tol<=eps ) then info = -14_${ik}$ else if( nsweep<0_${ik}$ ) then info = -15_${ik}$ else if( lwork<m ) then info = -17_${ik}$ else info = 0_${ik}$ end if ! #:( if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGSVJ1', -info ) return end if if( rsvec ) then mvl = n else if( applv ) then mvl = mv end if rsvec = rsvec .or. applv rooteps = sqrt( eps ) rootsfmin = sqrt( sfmin ) small = sfmin / eps big = one / sfmin rootbig = one / rootsfmin large = big / sqrt( real( m*n,KIND=dp) ) bigtheta = one / rooteps roottol = sqrt( tol ) ! Initialize The Right Singular Vector Matrix ! rsvec = stdlib_lsame( jobv, 'y' ) emptsw = n1*( n-n1 ) notrot = 0_${ik}$ fastr( 1_${ik}$ ) = zero ! .. row-cyclic pivot strategy with de rijk's pivoting .. kbl = min( 8_${ik}$, n ) nblr = n1 / kbl if( ( nblr*kbl )/=n1 )nblr = nblr + 1_${ik}$ ! .. the tiling is nblr-by-nblc [tiles] nblc = ( n-n1 ) / kbl if( ( nblc*kbl )/=( n-n1 ) )nblc = nblc + 1_${ik}$ blskip = ( kbl**2_${ik}$ ) + 1_${ik}$ ! [tp] blkskip is a tuning parameter that depends on swband and kbl. rowskip = min( 5_${ik}$, kbl ) ! [tp] rowskip is a tuning parameter. swband = 0_${ik}$ ! [tp] swband is a tuning parameter. it is meaningful and effective ! if stdlib${ii}$_sgesvj is used as a computational routine in the preconditioned ! jacobi svd algorithm stdlib${ii}$_sgesvj. ! | * * * [x] [x] [x]| ! | * * * [x] [x] [x]| row-cycling in the nblr-by-nblc [x] blocks. ! | * * * [x] [x] [x]| row-cyclic pivoting inside each [x] block. ! |[x] [x] [x] * * * | ! |[x] [x] [x] * * * | ! |[x] [x] [x] * * * | loop_1993: do i = 1, nsweep ! .. go go go ... mxaapq = zero mxsinj = zero iswrot = 0_${ik}$ notrot = 0_${ik}$ pskipped = 0_${ik}$ loop_2000: do ibr = 1, nblr igl = ( ibr-1 )*kbl + 1_${ik}$ ! ........................................................ ! ... go to the off diagonal blocks igl = ( ibr-1 )*kbl + 1_${ik}$ loop_2010: do jbc = 1, nblc jgl = n1 + ( jbc-1 )*kbl + 1_${ik}$ ! doing the block at ( ibr, jbc ) ijblsk = 0_${ik}$ loop_2100: do p = igl, min( igl+kbl-1, n1 ) aapp = sva( p ) if( aapp>zero ) then pskipped = 0_${ik}$ loop_2200: do q = jgl, min( jgl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp ! M X 2 Jacobi Svd ! Safe Gram Matrix Computation if( aaqq>=one ) then if( aapp>=aaqq ) then rotok = ( small*aapp )<=aaqq else rotok = ( small*aaqq )<=aapp end if if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_ddot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )& *d( q ) / aaqq )/ aapp else call stdlib${ii}$_dcopy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, d( p ),m, 1_${ik}$, work, lda,& ierr ) aapq = stdlib${ii}$_ddot( m, work, 1_${ik}$, a( 1_${ik}$, q ),1_${ik}$ )*d( q ) / & aaqq end if else if( aapp>=aaqq ) then rotok = aapp<=( aaqq / small ) else rotok = aaqq<=( aapp / small ) end if if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_ddot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )& *d( q ) / aaqq )/ aapp else call stdlib${ii}$_dcopy( m, a( 1_${ik}$, q ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, d( q ),m, 1_${ik}$, work, lda,& ierr ) aapq = stdlib${ii}$_ddot( m, work, 1_${ik}$, a( 1_${ik}$, p ),1_${ik}$ )*d( p ) / & aapp end if end if mxaapq = max( mxaapq, abs( aapq ) ) ! to rotate or not to rotate, that is the question ... if( abs( aapq )>tol ) then notrot = 0_${ik}$ ! rotated = rotated + 1 pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs(aqoap-apoaq) / aapq if( aaqq>aapp0 )theta = -theta if( abs( theta )>bigtheta ) then t = half / theta fastr( 3_${ik}$ ) = t*d( p ) / d( q ) fastr( 4_${ik}$ ) = -t*d( q ) / d( p ) call stdlib${ii}$_drotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr ) if( rsvec )call stdlib${ii}$_drotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),& 1_${ik}$,fastr ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq ) if( aaqq>aapp0 )thsign = -thsign t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) apoaq = d( p ) / d( q ) aqoap = d( q ) / d( p ) if( d( p )>=one ) then if( d( q )>=one ) then fastr( 3_${ik}$ ) = t*apoaq fastr( 4_${ik}$ ) = -t*aqoap d( p ) = d( p )*cs d( q ) = d( q )*cs call stdlib${ii}$_drotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,& fastr ) if( rsvec )call stdlib${ii}$_drotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( & 1_${ik}$, q ),1_${ik}$, fastr ) else call stdlib${ii}$_daxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, & p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( & 1_${ik}$, q ), 1_${ik}$ ) if( rsvec ) then call stdlib${ii}$_daxpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(& 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,& v( 1_${ik}$, q ), 1_${ik}$ ) end if d( p ) = d( p )*cs d( q ) = d( q ) / cs end if else if( d( q )>=one ) then call stdlib${ii}$_daxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q & ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) if( rsvec ) then call stdlib${ii}$_daxpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( & 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), & 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if d( p ) = d( p ) / cs d( q ) = d( q )*cs else if( d( p )>=d( q ) ) then call stdlib${ii}$_daxpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,& a( 1_${ik}$, q ), 1_${ik}$ ) d( p ) = d( p )*cs d( q ) = d( q ) / cs if( rsvec ) then call stdlib${ii}$_daxpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,& v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),& 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) end if else call stdlib${ii}$_daxpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,& q ), 1_${ik}$ ) call stdlib${ii}$_daxpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,& a( 1_${ik}$, p ), 1_${ik}$ ) d( p ) = d( p ) / cs d( q ) = d( q )*cs if( rsvec ) then call stdlib${ii}$_daxpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, & v( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_daxpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )& , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if end if end if end if end if else if( aapp>aaqq ) then call stdlib${ii}$_dcopy( m, a( 1_${ik}$, p ), 1_${ik}$, work,1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work, lda,& ierr ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) temp1 = -aapq*d( p ) / d( q ) call stdlib${ii}$_daxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) else call stdlib${ii}$_dcopy( m, a( 1_${ik}$, q ), 1_${ik}$, work,1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work, lda,& ierr ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) temp1 = -aapq*d( q ) / d( p ) call stdlib${ii}$_daxpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_dlascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) end if end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q) ! .. recompute sva(q) if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*d( q ) else t = zero aaqq = one call stdlib${ii}$_dlassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq )*d( q ) end if end if if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p ) else t = zero aapp = one call stdlib${ii}$_dlassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp )*d( p ) end if sva( p ) = aapp end if ! end of ok rotation else notrot = notrot + 1_${ik}$ ! skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if ! if ( notrot >= emptsw ) go to 2011 if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then sva( p ) = aapp notrot = 0_${ik}$ go to 2011 end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then aapp = -aapp notrot = 0_${ik}$ go to 2203 end if end do loop_2200 ! end of the q-loop 2203 continue sva( p ) = aapp else if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$ if( aapp<zero )notrot = 0_${ik}$ ! ** if ( notrot >= emptsw ) go to 2011 end if end do loop_2100 ! end of the p-loop end do loop_2010 ! end of the jbc-loop 2011 continue ! 2011 bailed out of the jbc-loop do p = igl, min( igl+kbl-1, n ) sva( p ) = abs( sva( p ) ) end do ! ** if ( notrot >= emptsw ) go to 1994 end do loop_2000 ! 2000 :: end of the ibr-loop ! .. update sva(n) if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then sva( n ) = stdlib${ii}$_dnrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )*d( n ) else t = zero aapp = one call stdlib${ii}$_dlassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp ) sva( n ) = t*sqrt( aapp )*d( n ) end if ! additional steering devices if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i if( ( i>swband+1 ) .and. ( mxaapq<real( n,KIND=dp)*tol ) .and.( real( n,KIND=dp)& *mxaapq*mxsinj<tol ) ) then go to 1994 end if if( notrot>=emptsw )go to 1994 end do loop_1993 ! end i=1:nsweep loop ! #:) reaching this point means that the procedure has completed the given ! number of sweeps. info = nsweep - 1_${ik}$ go to 1995 1994 continue ! #:) reaching this point means that during the i-th sweep all pivots were ! below the given threshold, causing early exit. info = 0_${ik}$ ! #:) info = 0 confirms successful iterations. 1995 continue ! sort the vector d do p = 1, n - 1 q = stdlib${ii}$_idamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 temp1 = d( p ) d( p ) = d( q ) d( q ) = temp1 call stdlib${ii}$_dswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_dswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ ) end if end do return end subroutine stdlib${ii}$_dgsvj1 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$gsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol, & !! DGSVJ1: is called from DGESVJ as a pre-processor and that is its main !! purpose. It applies Jacobi rotations in the same way as DGESVJ does, but !! it targets only particular pivots and it does not check convergence !! (stopping criterion). Few tuning parameters (marked by [TP]) are !! available for the implementer. !! Further Details !! ~~~~~~~~~~~~~~~ !! DGSVJ1 applies few sweeps of Jacobi rotations in the column space of !! the input M-by-N matrix A. The pivot pairs are taken from the (1,2) !! off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The !! block-entries (tiles) of the (1,2) off-diagonal block are marked by the !! [x]'s in the following scheme: !! | * * * [x] [x] [x]| !! | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. !! | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. !! |[x] [x] [x] * * * | !! |[x] [x] [x] * * * | !! |[x] [x] [x] * * * | !! In terms of the columns of A, the first N1 columns are rotated 'against' !! the remaining N-N1 columns, trying to increase the angle between the !! corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is !! tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter. !! The number of sweeps is given in NSWEEP and the orthogonality threshold !! is given in TOL. nsweep, work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments real(${rk}$), intent(in) :: eps, sfmin, tol integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, n1, nsweep character, intent(in) :: jobv ! Array Arguments real(${rk}$), intent(inout) :: a(lda,*), d(n), sva(n), v(ldv,*) real(${rk}$), intent(out) :: work(lwork) ! ===================================================================== ! Local Scalars real(${rk}$) :: aapp, aapp0, aapq, aaqq, apoaq, aqoap, big, bigtheta, cs, large, mxaapq, & mxsinj, rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, & thsign integer(${ik}$) :: blskip, emptsw, i, ibr, igl, ierr, ijblsk, iswrot, jbc, jgl, kbl, mvl, & notrot, nblc, nblr, p, pskipped, q, rowskip, swband logical(lk) :: applv, rotok, rsvec ! Local Arrays real(${rk}$) :: fastr(5_${ik}$) ! Intrinsic Functions ! Executable Statements ! test the input parameters. applv = stdlib_lsame( jobv, 'A' ) rsvec = stdlib_lsame( jobv, 'V' ) if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -3_${ik}$ else if( n1<0_${ik}$ ) then info = -4_${ik}$ else if( lda<m ) then info = -6_${ik}$ else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then info = -9_${ik}$ else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) ) ) then info = -11_${ik}$ else if( tol<=eps ) then info = -14_${ik}$ else if( nsweep<0_${ik}$ ) then info = -15_${ik}$ else if( lwork<m ) then info = -17_${ik}$ else info = 0_${ik}$ end if ! #:( if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DGSVJ1', -info ) return end if if( rsvec ) then mvl = n else if( applv ) then mvl = mv end if rsvec = rsvec .or. applv rooteps = sqrt( eps ) rootsfmin = sqrt( sfmin ) small = sfmin / eps big = one / sfmin rootbig = one / rootsfmin large = big / sqrt( real( m*n,KIND=${rk}$) ) bigtheta = one / rooteps roottol = sqrt( tol ) ! Initialize The Right Singular Vector Matrix ! rsvec = stdlib_lsame( jobv, 'y' ) emptsw = n1*( n-n1 ) notrot = 0_${ik}$ fastr( 1_${ik}$ ) = zero ! .. row-cyclic pivot strategy with de rijk's pivoting .. kbl = min( 8_${ik}$, n ) nblr = n1 / kbl if( ( nblr*kbl )/=n1 )nblr = nblr + 1_${ik}$ ! .. the tiling is nblr-by-nblc [tiles] nblc = ( n-n1 ) / kbl if( ( nblc*kbl )/=( n-n1 ) )nblc = nblc + 1_${ik}$ blskip = ( kbl**2_${ik}$ ) + 1_${ik}$ ! [tp] blkskip is a tuning parameter that depends on swband and kbl. rowskip = min( 5_${ik}$, kbl ) ! [tp] rowskip is a tuning parameter. swband = 0_${ik}$ ! [tp] swband is a tuning parameter. it is meaningful and effective ! if stdlib${ii}$_dgesvj is used as a computational routine in the preconditioned ! jacobi svd algorithm stdlib${ii}$_dgesvj. ! | * * * [x] [x] [x]| ! | * * * [x] [x] [x]| row-cycling in the nblr-by-nblc [x] blocks. ! | * * * [x] [x] [x]| row-cyclic pivoting inside each [x] block. ! |[x] [x] [x] * * * | ! |[x] [x] [x] * * * | ! |[x] [x] [x] * * * | loop_1993: do i = 1, nsweep ! .. go go go ... mxaapq = zero mxsinj = zero iswrot = 0_${ik}$ notrot = 0_${ik}$ pskipped = 0_${ik}$ loop_2000: do ibr = 1, nblr igl = ( ibr-1 )*kbl + 1_${ik}$ ! ........................................................ ! ... go to the off diagonal blocks igl = ( ibr-1 )*kbl + 1_${ik}$ loop_2010: do jbc = 1, nblc jgl = n1 + ( jbc-1 )*kbl + 1_${ik}$ ! doing the block at ( ibr, jbc ) ijblsk = 0_${ik}$ loop_2100: do p = igl, min( igl+kbl-1, n1 ) aapp = sva( p ) if( aapp>zero ) then pskipped = 0_${ik}$ loop_2200: do q = jgl, min( jgl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp ! M X 2 Jacobi Svd ! Safe Gram Matrix Computation if( aaqq>=one ) then if( aapp>=aaqq ) then rotok = ( small*aapp )<=aaqq else rotok = ( small*aaqq )<=aapp end if if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_${ri}$dot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )& *d( q ) / aaqq )/ aapp else call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, p ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, d( p ),m, 1_${ik}$, work, lda,& ierr ) aapq = stdlib${ii}$_${ri}$dot( m, work, 1_${ik}$, a( 1_${ik}$, q ),1_${ik}$ )*d( q ) / & aaqq end if else if( aapp>=aaqq ) then rotok = aapp<=( aaqq / small ) else rotok = aaqq<=( aapp / small ) end if if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_${ri}$dot( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$,q ), 1_${ik}$ )*d( p )& *d( q ) / aaqq )/ aapp else call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, q ), 1_${ik}$, work, 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, d( q ),m, 1_${ik}$, work, lda,& ierr ) aapq = stdlib${ii}$_${ri}$dot( m, work, 1_${ik}$, a( 1_${ik}$, p ),1_${ik}$ )*d( p ) / & aapp end if end if mxaapq = max( mxaapq, abs( aapq ) ) ! to rotate or not to rotate, that is the question ... if( abs( aapq )>tol ) then notrot = 0_${ik}$ ! rotated = rotated + 1 pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs(aqoap-apoaq) / aapq if( aaqq>aapp0 )theta = -theta if( abs( theta )>bigtheta ) then t = half / theta fastr( 3_${ik}$ ) = t*d( p ) / d( q ) fastr( 4_${ik}$ ) = -t*d( q ) / d( p ) call stdlib${ii}$_${ri}$rotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$, fastr ) if( rsvec )call stdlib${ii}$_${ri}$rotm( mvl,v( 1_${ik}$, p ), 1_${ik}$,v( 1_${ik}$, q ),& 1_${ik}$,fastr ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq ) if( aaqq>aapp0 )thsign = -thsign t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq ) ) apoaq = d( p ) / d( q ) aqoap = d( q ) / d( p ) if( d( p )>=one ) then if( d( q )>=one ) then fastr( 3_${ik}$ ) = t*apoaq fastr( 4_${ik}$ ) = -t*aqoap d( p ) = d( p )*cs d( q ) = d( q )*cs call stdlib${ii}$_${ri}$rotm( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$,& fastr ) if( rsvec )call stdlib${ii}$_${ri}$rotm( mvl,v( 1_${ik}$, p ), 1_${ik}$, v( & 1_${ik}$, q ),1_${ik}$, fastr ) else call stdlib${ii}$_${ri}$axpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( 1_${ik}$, & p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( & 1_${ik}$, q ), 1_${ik}$ ) if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl, -t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,v(& 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ), 1_${ik}$,& v( 1_${ik}$, q ), 1_${ik}$ ) end if d( p ) = d( p )*cs d( q ) = d( q ) / cs end if else if( d( q )>=one ) then call stdlib${ii}$_${ri}$axpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q & ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m, -cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl, t*apoaq,v( 1_${ik}$, p ), 1_${ik}$,v( & 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q ), & 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if d( p ) = d( p ) / cs d( q ) = d( q )*cs else if( d( p )>=d( q ) ) then call stdlib${ii}$_${ri}$axpy( m, -t*aqoap,a( 1_${ik}$, q ), 1_${ik}$,a( & 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m, cs*sn*apoaq,a( 1_${ik}$, p ), 1_${ik}$,& a( 1_${ik}$, q ), 1_${ik}$ ) d( p ) = d( p )*cs d( q ) = d( q ) / cs if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl,-t*aqoap,v( 1_${ik}$, q ), 1_${ik}$,& v( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,cs*sn*apoaq,v( 1_${ik}$, p ),& 1_${ik}$,v( 1_${ik}$, q ), 1_${ik}$ ) end if else call stdlib${ii}$_${ri}$axpy( m, t*apoaq,a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$,& q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( m,-cs*sn*aqoap,a( 1_${ik}$, q ), 1_${ik}$,& a( 1_${ik}$, p ), 1_${ik}$ ) d( p ) = d( p ) / cs d( q ) = d( q )*cs if( rsvec ) then call stdlib${ii}$_${ri}$axpy( mvl,t*apoaq, v( 1_${ik}$, p ),1_${ik}$, & v( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( mvl,-cs*sn*aqoap,v( 1_${ik}$, q )& , 1_${ik}$,v( 1_${ik}$, p ), 1_${ik}$ ) end if end if end if end if end if else if( aapp>aaqq ) then call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, p ), 1_${ik}$, work,1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work, lda,& ierr ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) temp1 = -aapq*d( p ) / d( q ) call stdlib${ii}$_${ri}$axpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) else call stdlib${ii}$_${ri}$copy( m, a( 1_${ik}$, q ), 1_${ik}$, work,1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work, lda,& ierr ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) temp1 = -aapq*d( q ) / d( p ) call stdlib${ii}$_${ri}$axpy( m, temp1, work, 1_${ik}$,a( 1_${ik}$, p ), 1_${ik}$ ) call stdlib${ii}$_${ri}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) sva( p ) = aapp*sqrt( max( zero,one-aapq*aapq ) ) mxsinj = max( mxsinj, sfmin ) end if end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q) ! .. recompute sva(q) if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, q ), 1_${ik}$ )*d( q ) else t = zero aaqq = one call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq )*d( q ) end if end if if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, p ), 1_${ik}$ )*d( p ) else t = zero aapp = one call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp )*d( p ) end if sva( p ) = aapp end if ! end of ok rotation else notrot = notrot + 1_${ik}$ ! skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if ! if ( notrot >= emptsw ) go to 2011 if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then sva( p ) = aapp notrot = 0_${ik}$ go to 2011 end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then aapp = -aapp notrot = 0_${ik}$ go to 2203 end if end do loop_2200 ! end of the q-loop 2203 continue sva( p ) = aapp else if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$ if( aapp<zero )notrot = 0_${ik}$ ! ** if ( notrot >= emptsw ) go to 2011 end if end do loop_2100 ! end of the p-loop end do loop_2010 ! end of the jbc-loop 2011 continue ! 2011 bailed out of the jbc-loop do p = igl, min( igl+kbl-1, n ) sva( p ) = abs( sva( p ) ) end do ! ** if ( notrot >= emptsw ) go to 1994 end do loop_2000 ! 2000 :: end of the ibr-loop ! .. update sva(n) if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then sva( n ) = stdlib${ii}$_${ri}$nrm2( m, a( 1_${ik}$, n ), 1_${ik}$ )*d( n ) else t = zero aapp = one call stdlib${ii}$_${ri}$lassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp ) sva( n ) = t*sqrt( aapp )*d( n ) end if ! additional steering devices if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i if( ( i>swband+1 ) .and. ( mxaapq<real( n,KIND=${rk}$)*tol ) .and.( real( n,KIND=${rk}$)& *mxaapq*mxsinj<tol ) ) then go to 1994 end if if( notrot>=emptsw )go to 1994 end do loop_1993 ! end i=1:nsweep loop ! #:) reaching this point means that the procedure has completed the given ! number of sweeps. info = nsweep - 1_${ik}$ go to 1995 1994 continue ! #:) reaching this point means that during the i-th sweep all pivots were ! below the given threshold, causing early exit. info = 0_${ik}$ ! #:) info = 0 confirms successful iterations. 1995 continue ! sort the vector d do p = 1, n - 1 q = stdlib${ii}$_i${ri}$amax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 temp1 = d( p ) d( p ) = d( q ) d( q ) = temp1 call stdlib${ii}$_${ri}$swap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_${ri}$swap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ ) end if end do return end subroutine stdlib${ii}$_${ri}$gsvj1 #:endif #:endfor pure module subroutine stdlib${ii}$_cgsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol, & !! CGSVJ1 is called from CGESVJ as a pre-processor and that is its main !! purpose. It applies Jacobi rotations in the same way as CGESVJ does, but !! it targets only particular pivots and it does not check convergence !! (stopping criterion). Few tuning parameters (marked by [TP]) are !! available for the implementer. !! Further Details !! ~~~~~~~~~~~~~~~ !! CGSVJ1 applies few sweeps of Jacobi rotations in the column space of !! the input M-by-N matrix A. The pivot pairs are taken from the (1,2) !! off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The !! block-entries (tiles) of the (1,2) off-diagonal block are marked by the !! [x]'s in the following scheme: !! | * * * [x] [x] [x]| !! | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. !! | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. !! |[x] [x] [x] * * * | !! |[x] [x] [x] * * * | !! |[x] [x] [x] * * * | !! In terms of the columns of A, the first N1 columns are rotated 'against' !! the remaining N-N1 columns, trying to increase the angle between the !! corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is !! tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter. !! The number of sweeps is given in NSWEEP and the orthogonality threshold !! is given in TOL. nsweep, work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments real(sp), intent(in) :: eps, sfmin, tol integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, n1, nsweep character, intent(in) :: jobv ! Array Arguments complex(sp), intent(inout) :: a(lda,*), d(n), v(ldv,*) complex(sp), intent(out) :: work(lwork) real(sp), intent(inout) :: sva(n) ! ===================================================================== ! Local Scalars complex(sp) :: aapq, ompq real(sp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, & rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign integer(${ik}$) :: blskip, emptsw, i, ibr, igl, ierr, ijblsk, iswrot, jbc, jgl, kbl, mvl, & notrot, nblc, nblr, p, pskipped, q, rowskip, swband logical(lk) :: applv, rotok, rsvec ! Intrinsic Functions ! From Lapack ! Executable Statements ! test the input parameters. applv = stdlib_lsame( jobv, 'A' ) rsvec = stdlib_lsame( jobv, 'V' ) if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -3_${ik}$ else if( n1<0_${ik}$ ) then info = -4_${ik}$ else if( lda<m ) then info = -6_${ik}$ else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then info = -9_${ik}$ else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) ) ) then info = -11_${ik}$ else if( tol<=eps ) then info = -14_${ik}$ else if( nsweep<0_${ik}$ ) then info = -15_${ik}$ else if( lwork<m ) then info = -17_${ik}$ else info = 0_${ik}$ end if ! #:( if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CGSVJ1', -info ) return end if if( rsvec ) then mvl = n else if( applv ) then mvl = mv end if rsvec = rsvec .or. applv rooteps = sqrt( eps ) rootsfmin = sqrt( sfmin ) small = sfmin / eps big = one / sfmin rootbig = one / rootsfmin ! large = big / sqrt( real( m*n,KIND=sp) ) bigtheta = one / rooteps roottol = sqrt( tol ) ! Initialize The Right Singular Vector Matrix ! rsvec = stdlib_lsame( jobv, 'y' ) emptsw = n1*( n-n1 ) notrot = 0_${ik}$ ! .. row-cyclic pivot strategy with de rijk's pivoting .. kbl = min( 8_${ik}$, n ) nblr = n1 / kbl if( ( nblr*kbl )/=n1 )nblr = nblr + 1_${ik}$ ! .. the tiling is nblr-by-nblc [tiles] nblc = ( n-n1 ) / kbl if( ( nblc*kbl )/=( n-n1 ) )nblc = nblc + 1_${ik}$ blskip = ( kbl**2_${ik}$ ) + 1_${ik}$ ! [tp] blkskip is a tuning parameter that depends on swband and kbl. rowskip = min( 5_${ik}$, kbl ) ! [tp] rowskip is a tuning parameter. swband = 0_${ik}$ ! [tp] swband is a tuning parameter. it is meaningful and effective ! if stdlib${ii}$_cgesvj is used as a computational routine in the preconditioned ! jacobi svd algorithm stdlib${ii}$_cgejsv. ! | * * * [x] [x] [x]| ! | * * * [x] [x] [x]| row-cycling in the nblr-by-nblc [x] blocks. ! | * * * [x] [x] [x]| row-cyclic pivoting inside each [x] block. ! |[x] [x] [x] * * * | ! |[x] [x] [x] * * * | ! |[x] [x] [x] * * * | loop_1993: do i = 1, nsweep ! .. go go go ... mxaapq = zero mxsinj = zero iswrot = 0_${ik}$ notrot = 0_${ik}$ pskipped = 0_${ik}$ ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs ! 1 <= p < q <= n. this is the first step toward a blocked implementation ! of the rotations. new implementation, based on block transformations, ! is under development. loop_2000: do ibr = 1, nblr igl = ( ibr-1 )*kbl + 1_${ik}$ ! ... go to the off diagonal blocks igl = ( ibr-1 )*kbl + 1_${ik}$ ! do 2010 jbc = ibr + 1, nbl loop_2010: do jbc = 1, nblc jgl = ( jbc-1 )*kbl + n1 + 1_${ik}$ ! doing the block at ( ibr, jbc ) ijblsk = 0_${ik}$ loop_2100: do p = igl, min( igl+kbl-1, n1 ) aapp = sva( p ) if( aapp>zero ) then pskipped = 0_${ik}$ loop_2200: do q = jgl, min( jgl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp ! M X 2 Jacobi Svd ! safe gram matrix computation if( aaqq>=one ) then if( aapp>=aaqq ) then rotok = ( small*aapp )<=aaqq else rotok = ( small*aaqq )<=aapp end if if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq ) / aapp else call stdlib${ii}$_ccopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp,one, m, 1_${ik}$,work, lda, & ierr ) aapq = stdlib${ii}$_cdotc( m, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq end if else if( aapp>=aaqq ) then rotok = aapp<=( aaqq / small ) else rotok = aaqq<=( aapp / small ) end if if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / max(& aaqq,aapp) )/ min(aaqq,aapp) else call stdlib${ii}$_ccopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,work, lda, & ierr ) aapq = stdlib${ii}$_cdotc( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) / & aapp end if end if ! aapq = aapq * conjg(cwork(p))*cwork(q) aapq1 = -abs(aapq) mxaapq = max( mxaapq, -aapq1 ) ! to rotate or not to rotate, that is the question ... if( abs( aapq1 )>tol ) then ompq = aapq / abs(aapq) notrot = 0_${ik}$ ! [rtd] rotated = rotated + 1 pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/ aapq1 if( aaqq>aapp0 )theta = -theta if( abs( theta )>bigtheta ) then t = half / theta cs = one call stdlib${ii}$_crot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *t ) if( rsvec ) then call stdlib${ii}$_crot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*t ) end if sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq1 ) if( aaqq>aapp0 )thsign = -thsign t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) call stdlib${ii}$_crot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *sn ) if( rsvec ) then call stdlib${ii}$_crot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*sn ) end if end if d(p) = -d(q) * ompq else ! .. have to use modified gram-schmidt like transformation if( aapp>aaqq ) then call stdlib${ii}$_ccopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work,lda,& ierr ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) call stdlib${ii}$_caxpy( m, -aapq, work,1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) else call stdlib${ii}$_ccopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work,lda,& ierr ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) call stdlib${ii}$_caxpy( m, -conjg(aapq),work, 1_${ik}$, a( 1_${ik}$, p ), 1_${ik}$ & ) call stdlib${ii}$_clascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) end if end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! .. recompute sva(q), sva(p) if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, q ), 1_${ik}$) else t = zero aaqq = one call stdlib${ii}$_classq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq ) end if end if if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_classq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp ) end if sva( p ) = aapp end if ! end of ok rotation else notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then sva( p ) = aapp notrot = 0_${ik}$ go to 2011 end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then aapp = -aapp notrot = 0_${ik}$ go to 2203 end if end do loop_2200 ! end of the q-loop 2203 continue sva( p ) = aapp else if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$ if( aapp<zero )notrot = 0_${ik}$ end if end do loop_2100 ! end of the p-loop end do loop_2010 ! end of the jbc-loop 2011 continue ! 2011 bailed out of the jbc-loop do p = igl, min( igl+kbl-1, n ) sva( p ) = abs( sva( p ) ) end do ! ** end do loop_2000 ! 2000 :: end of the ibr-loop ! .. update sva(n) if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then sva( n ) = stdlib${ii}$_scnrm2( m, a( 1_${ik}$, n ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_classq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp ) sva( n ) = t*sqrt( aapp ) end if ! additional steering devices if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i if( ( i>swband+1 ) .and. ( mxaapq<sqrt( real( n,KIND=sp) )*tol ) .and. ( real( n,& KIND=sp)*mxaapq*mxsinj<tol ) ) then go to 1994 end if if( notrot>=emptsw )go to 1994 end do loop_1993 ! end i=1:nsweep loop ! #:( reaching this point means that the procedure has not converged. info = nsweep - 1_${ik}$ go to 1995 1994 continue ! #:) reaching this point means numerical convergence after the i-th ! sweep. info = 0_${ik}$ ! #:) info = 0 confirms successful iterations. 1995 continue ! sort the vector sva() of column norms. do p = 1, n - 1 q = stdlib${ii}$_isamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 aapq = d( p ) d( p ) = d( q ) d( q ) = aapq call stdlib${ii}$_cswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_cswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ ) end if end do return end subroutine stdlib${ii}$_cgsvj1 pure module subroutine stdlib${ii}$_zgsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol, & !! ZGSVJ1 is called from ZGESVJ as a pre-processor and that is its main !! purpose. It applies Jacobi rotations in the same way as ZGESVJ does, but !! it targets only particular pivots and it does not check convergence !! (stopping criterion). Few tuning parameters (marked by [TP]) are !! available for the implementer. !! Further Details !! ~~~~~~~~~~~~~~~ !! ZGSVJ1 applies few sweeps of Jacobi rotations in the column space of !! the input M-by-N matrix A. The pivot pairs are taken from the (1,2) !! off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The !! block-entries (tiles) of the (1,2) off-diagonal block are marked by the !! [x]'s in the following scheme: !! | * * * [x] [x] [x]| !! | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. !! | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. !! |[x] [x] [x] * * * | !! |[x] [x] [x] * * * | !! |[x] [x] [x] * * * | !! In terms of the columns of A, the first N1 columns are rotated 'against' !! the remaining N-N1 columns, trying to increase the angle between the !! corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is !! tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter. !! The number of sweeps is given in NSWEEP and the orthogonality threshold !! is given in TOL. nsweep, work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments real(dp), intent(in) :: eps, sfmin, tol integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, n1, nsweep character, intent(in) :: jobv ! Array Arguments complex(dp), intent(inout) :: a(lda,*), d(n), v(ldv,*) complex(dp), intent(out) :: work(lwork) real(dp), intent(inout) :: sva(n) ! ===================================================================== ! Local Scalars complex(dp) :: aapq, ompq real(dp) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, & rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign integer(${ik}$) :: blskip, emptsw, i, ibr, igl, ierr, ijblsk, iswrot, jbc, jgl, kbl, mvl, & notrot, nblc, nblr, p, pskipped, q, rowskip, swband logical(lk) :: applv, rotok, rsvec ! Intrinsic Functions ! From Lapack ! Executable Statements ! test the input parameters. applv = stdlib_lsame( jobv, 'A' ) rsvec = stdlib_lsame( jobv, 'V' ) if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -3_${ik}$ else if( n1<0_${ik}$ ) then info = -4_${ik}$ else if( lda<m ) then info = -6_${ik}$ else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then info = -9_${ik}$ else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) ) ) then info = -11_${ik}$ else if( tol<=eps ) then info = -14_${ik}$ else if( nsweep<0_${ik}$ ) then info = -15_${ik}$ else if( lwork<m ) then info = -17_${ik}$ else info = 0_${ik}$ end if ! #:( if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGSVJ1', -info ) return end if if( rsvec ) then mvl = n else if( applv ) then mvl = mv end if rsvec = rsvec .or. applv rooteps = sqrt( eps ) rootsfmin = sqrt( sfmin ) small = sfmin / eps big = one / sfmin rootbig = one / rootsfmin ! large = big / sqrt( real( m*n,KIND=dp) ) bigtheta = one / rooteps roottol = sqrt( tol ) ! Initialize The Right Singular Vector Matrix ! rsvec = stdlib_lsame( jobv, 'y' ) emptsw = n1*( n-n1 ) notrot = 0_${ik}$ ! .. row-cyclic pivot strategy with de rijk's pivoting .. kbl = min( 8_${ik}$, n ) nblr = n1 / kbl if( ( nblr*kbl )/=n1 )nblr = nblr + 1_${ik}$ ! .. the tiling is nblr-by-nblc [tiles] nblc = ( n-n1 ) / kbl if( ( nblc*kbl )/=( n-n1 ) )nblc = nblc + 1_${ik}$ blskip = ( kbl**2_${ik}$ ) + 1_${ik}$ ! [tp] blkskip is a tuning parameter that depends on swband and kbl. rowskip = min( 5_${ik}$, kbl ) ! [tp] rowskip is a tuning parameter. swband = 0_${ik}$ ! [tp] swband is a tuning parameter. it is meaningful and effective ! if stdlib${ii}$_zgesvj is used as a computational routine in the preconditioned ! jacobi svd algorithm stdlib${ii}$_zgejsv. ! | * * * [x] [x] [x]| ! | * * * [x] [x] [x]| row-cycling in the nblr-by-nblc [x] blocks. ! | * * * [x] [x] [x]| row-cyclic pivoting inside each [x] block. ! |[x] [x] [x] * * * | ! |[x] [x] [x] * * * | ! |[x] [x] [x] * * * | loop_1993: do i = 1, nsweep ! .. go go go ... mxaapq = zero mxsinj = zero iswrot = 0_${ik}$ notrot = 0_${ik}$ pskipped = 0_${ik}$ ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs ! 1 <= p < q <= n. this is the first step toward a blocked implementation ! of the rotations. new implementation, based on block transformations, ! is under development. loop_2000: do ibr = 1, nblr igl = ( ibr-1 )*kbl + 1_${ik}$ ! ... go to the off diagonal blocks igl = ( ibr-1 )*kbl + 1_${ik}$ ! do 2010 jbc = ibr + 1, nbl loop_2010: do jbc = 1, nblc jgl = ( jbc-1 )*kbl + n1 + 1_${ik}$ ! doing the block at ( ibr, jbc ) ijblsk = 0_${ik}$ loop_2100: do p = igl, min( igl+kbl-1, n1 ) aapp = sva( p ) if( aapp>zero ) then pskipped = 0_${ik}$ loop_2200: do q = jgl, min( jgl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp ! M X 2 Jacobi Svd ! safe gram matrix computation if( aaqq>=one ) then if( aapp>=aaqq ) then rotok = ( small*aapp )<=aaqq else rotok = ( small*aaqq )<=aapp end if if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq ) / aapp else call stdlib${ii}$_zcopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp,one, m, 1_${ik}$,work, lda, & ierr ) aapq = stdlib${ii}$_zdotc( m, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq end if else if( aapp>=aaqq ) then rotok = aapp<=( aaqq / small ) else rotok = aaqq<=( aapp / small ) end if if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / max(& aaqq,aapp) )/ min(aaqq,aapp) else call stdlib${ii}$_zcopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,work, lda, & ierr ) aapq = stdlib${ii}$_zdotc( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) / & aapp end if end if ! aapq = aapq * conjg(cwork(p))*cwork(q) aapq1 = -abs(aapq) mxaapq = max( mxaapq, -aapq1 ) ! to rotate or not to rotate, that is the question ... if( abs( aapq1 )>tol ) then ompq = aapq / abs(aapq) notrot = 0_${ik}$ ! [rtd] rotated = rotated + 1 pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/ aapq1 if( aaqq>aapp0 )theta = -theta if( abs( theta )>bigtheta ) then t = half / theta cs = one call stdlib${ii}$_zrot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *t ) if( rsvec ) then call stdlib${ii}$_zrot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*t ) end if sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq1 ) if( aaqq>aapp0 )thsign = -thsign t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) call stdlib${ii}$_zrot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *sn ) if( rsvec ) then call stdlib${ii}$_zrot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*sn ) end if end if d(p) = -d(q) * ompq else ! .. have to use modified gram-schmidt like transformation if( aapp>aaqq ) then call stdlib${ii}$_zcopy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work,lda,& ierr ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) call stdlib${ii}$_zaxpy( m, -aapq, work,1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) else call stdlib${ii}$_zcopy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work,lda,& ierr ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) call stdlib${ii}$_zaxpy( m, -conjg(aapq),work, 1_${ik}$, a( 1_${ik}$, p ), 1_${ik}$ & ) call stdlib${ii}$_zlascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) end if end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! .. recompute sva(q), sva(p) if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, q ), 1_${ik}$) else t = zero aaqq = one call stdlib${ii}$_zlassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq ) end if end if if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_zlassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp ) end if sva( p ) = aapp end if ! end of ok rotation else notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then sva( p ) = aapp notrot = 0_${ik}$ go to 2011 end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then aapp = -aapp notrot = 0_${ik}$ go to 2203 end if end do loop_2200 ! end of the q-loop 2203 continue sva( p ) = aapp else if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$ if( aapp<zero )notrot = 0_${ik}$ end if end do loop_2100 ! end of the p-loop end do loop_2010 ! end of the jbc-loop 2011 continue ! 2011 bailed out of the jbc-loop do p = igl, min( igl+kbl-1, n ) sva( p ) = abs( sva( p ) ) end do ! ** end do loop_2000 ! 2000 :: end of the ibr-loop ! .. update sva(n) if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then sva( n ) = stdlib${ii}$_dznrm2( m, a( 1_${ik}$, n ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_zlassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp ) sva( n ) = t*sqrt( aapp ) end if ! additional steering devices if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i if( ( i>swband+1 ) .and. ( mxaapq<sqrt( real( n,KIND=dp) )*tol ) .and. ( real( n,& KIND=dp)*mxaapq*mxsinj<tol ) ) then go to 1994 end if if( notrot>=emptsw )go to 1994 end do loop_1993 ! end i=1:nsweep loop ! #:( reaching this point means that the procedure has not converged. info = nsweep - 1_${ik}$ go to 1995 1994 continue ! #:) reaching this point means numerical convergence after the i-th ! sweep. info = 0_${ik}$ ! #:) info = 0 confirms successful iterations. 1995 continue ! sort the vector sva() of column norms. do p = 1, n - 1 q = stdlib${ii}$_idamax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 aapq = d( p ) d( p ) = d( q ) d( q ) = aapq call stdlib${ii}$_zswap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_zswap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ ) end if end do return end subroutine stdlib${ii}$_zgsvj1 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$gsvj1( jobv, m, n, n1, a, lda, d, sva, mv, v, ldv,eps, sfmin, tol, & !! ZGSVJ1: is called from ZGESVJ as a pre-processor and that is its main !! purpose. It applies Jacobi rotations in the same way as ZGESVJ does, but !! it targets only particular pivots and it does not check convergence !! (stopping criterion). Few tuning parameters (marked by [TP]) are !! available for the implementer. !! Further Details !! ~~~~~~~~~~~~~~~ !! ZGSVJ1 applies few sweeps of Jacobi rotations in the column space of !! the input M-by-N matrix A. The pivot pairs are taken from the (1,2) !! off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The !! block-entries (tiles) of the (1,2) off-diagonal block are marked by the !! [x]'s in the following scheme: !! | * * * [x] [x] [x]| !! | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. !! | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. !! |[x] [x] [x] * * * | !! |[x] [x] [x] * * * | !! |[x] [x] [x] * * * | !! In terms of the columns of A, the first N1 columns are rotated 'against' !! the remaining N-N1 columns, trying to increase the angle between the !! corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is !! tiled using quadratic tiles of side KBL. Here, KBL is a tuning parameter. !! The number of sweeps is given in NSWEEP and the orthogonality threshold !! is given in TOL. nsweep, work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments real(${ck}$), intent(in) :: eps, sfmin, tol integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lda, ldv, lwork, m, mv, n, n1, nsweep character, intent(in) :: jobv ! Array Arguments complex(${ck}$), intent(inout) :: a(lda,*), d(n), v(ldv,*) complex(${ck}$), intent(out) :: work(lwork) real(${ck}$), intent(inout) :: sva(n) ! ===================================================================== ! Local Scalars complex(${ck}$) :: aapq, ompq real(${ck}$) :: aapp, aapp0, aapq1, aaqq, apoaq, aqoap, big, bigtheta, cs, mxaapq, mxsinj, & rootbig, rooteps, rootsfmin, roottol, small, sn, t, temp1, theta, thsign integer(${ik}$) :: blskip, emptsw, i, ibr, igl, ierr, ijblsk, iswrot, jbc, jgl, kbl, mvl, & notrot, nblc, nblr, p, pskipped, q, rowskip, swband logical(lk) :: applv, rotok, rsvec ! Intrinsic Functions ! From Lapack ! Executable Statements ! test the input parameters. applv = stdlib_lsame( jobv, 'A' ) rsvec = stdlib_lsame( jobv, 'V' ) if( .not.( rsvec .or. applv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( ( n<0_${ik}$ ) .or. ( n>m ) ) then info = -3_${ik}$ else if( n1<0_${ik}$ ) then info = -4_${ik}$ else if( lda<m ) then info = -6_${ik}$ else if( ( rsvec.or.applv ) .and. ( mv<0_${ik}$ ) ) then info = -9_${ik}$ else if( ( rsvec.and.( ldv<n ) ).or.( applv.and.( ldv<mv ) ) ) then info = -11_${ik}$ else if( tol<=eps ) then info = -14_${ik}$ else if( nsweep<0_${ik}$ ) then info = -15_${ik}$ else if( lwork<m ) then info = -17_${ik}$ else info = 0_${ik}$ end if ! #:( if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZGSVJ1', -info ) return end if if( rsvec ) then mvl = n else if( applv ) then mvl = mv end if rsvec = rsvec .or. applv rooteps = sqrt( eps ) rootsfmin = sqrt( sfmin ) small = sfmin / eps big = one / sfmin rootbig = one / rootsfmin ! large = big / sqrt( real( m*n,KIND=${ck}$) ) bigtheta = one / rooteps roottol = sqrt( tol ) ! Initialize The Right Singular Vector Matrix ! rsvec = stdlib_lsame( jobv, 'y' ) emptsw = n1*( n-n1 ) notrot = 0_${ik}$ ! .. row-cyclic pivot strategy with de rijk's pivoting .. kbl = min( 8_${ik}$, n ) nblr = n1 / kbl if( ( nblr*kbl )/=n1 )nblr = nblr + 1_${ik}$ ! .. the tiling is nblr-by-nblc [tiles] nblc = ( n-n1 ) / kbl if( ( nblc*kbl )/=( n-n1 ) )nblc = nblc + 1_${ik}$ blskip = ( kbl**2_${ik}$ ) + 1_${ik}$ ! [tp] blkskip is a tuning parameter that depends on swband and kbl. rowskip = min( 5_${ik}$, kbl ) ! [tp] rowskip is a tuning parameter. swband = 0_${ik}$ ! [tp] swband is a tuning parameter. it is meaningful and effective ! if stdlib${ii}$_${ci}$gesvj is used as a computational routine in the preconditioned ! jacobi svd algorithm stdlib${ii}$_${ci}$gejsv. ! | * * * [x] [x] [x]| ! | * * * [x] [x] [x]| row-cycling in the nblr-by-nblc [x] blocks. ! | * * * [x] [x] [x]| row-cyclic pivoting inside each [x] block. ! |[x] [x] [x] * * * | ! |[x] [x] [x] * * * | ! |[x] [x] [x] * * * | loop_1993: do i = 1, nsweep ! .. go go go ... mxaapq = zero mxsinj = zero iswrot = 0_${ik}$ notrot = 0_${ik}$ pskipped = 0_${ik}$ ! each sweep is unrolled using kbl-by-kbl tiles over the pivot pairs ! 1 <= p < q <= n. this is the first step toward a blocked implementation ! of the rotations. new implementation, based on block transformations, ! is under development. loop_2000: do ibr = 1, nblr igl = ( ibr-1 )*kbl + 1_${ik}$ ! ... go to the off diagonal blocks igl = ( ibr-1 )*kbl + 1_${ik}$ ! do 2010 jbc = ibr + 1, nbl loop_2010: do jbc = 1, nblc jgl = ( jbc-1 )*kbl + n1 + 1_${ik}$ ! doing the block at ( ibr, jbc ) ijblsk = 0_${ik}$ loop_2100: do p = igl, min( igl+kbl-1, n1 ) aapp = sva( p ) if( aapp>zero ) then pskipped = 0_${ik}$ loop_2200: do q = jgl, min( jgl+kbl-1, n ) aaqq = sva( q ) if( aaqq>zero ) then aapp0 = aapp ! M X 2 Jacobi Svd ! safe gram matrix computation if( aaqq>=one ) then if( aapp>=aaqq ) then rotok = ( small*aapp )<=aaqq else rotok = ( small*aaqq )<=aapp end if if( aapp<( big / aaqq ) ) then aapq = ( stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq ) / aapp else call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp,one, m, 1_${ik}$,work, lda, & ierr ) aapq = stdlib${ii}$_${ci}$dotc( m, work, 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / & aaqq end if else if( aapp>=aaqq ) then rotok = aapp<=( aaqq / small ) else rotok = aaqq<=( aapp / small ) end if if( aapp>( small / aaqq ) ) then aapq = ( stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,a( 1_${ik}$, q ), 1_${ik}$ ) / max(& aaqq,aapp) )/ min(aaqq,aapp) else call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq,one, m, 1_${ik}$,work, lda, & ierr ) aapq = stdlib${ii}$_${ci}$dotc( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) / & aapp end if end if ! aapq = aapq * conjg(cwork(p))*cwork(q) aapq1 = -abs(aapq) mxaapq = max( mxaapq, -aapq1 ) ! to rotate or not to rotate, that is the question ... if( abs( aapq1 )>tol ) then ompq = aapq / abs(aapq) notrot = 0_${ik}$ ! [rtd] rotated = rotated + 1 pskipped = 0_${ik}$ iswrot = iswrot + 1_${ik}$ if( rotok ) then aqoap = aaqq / aapp apoaq = aapp / aaqq theta = -half*abs( aqoap-apoaq )/ aapq1 if( aaqq>aapp0 )theta = -theta if( abs( theta )>bigtheta ) then t = half / theta cs = one call stdlib${ii}$_${ci}$rot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *t ) if( rsvec ) then call stdlib${ii}$_${ci}$rot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*t ) end if sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) mxsinj = max( mxsinj, abs( t ) ) else ! Choose Correct Signum For Theta And Rotate thsign = -sign( one, aapq1 ) if( aaqq>aapp0 )thsign = -thsign t = one / ( theta+thsign*sqrt( one+theta*theta ) ) cs = sqrt( one / ( one+t*t ) ) sn = t*cs mxsinj = max( mxsinj, abs( sn ) ) sva( q ) = aaqq*sqrt( max( zero,one+t*apoaq*aapq1 ) ) aapp = aapp*sqrt( max( zero,one-t*aqoap*aapq1 ) ) call stdlib${ii}$_${ci}$rot( m, a(1_${ik}$,p), 1_${ik}$, a(1_${ik}$,q), 1_${ik}$,cs, conjg(ompq)& *sn ) if( rsvec ) then call stdlib${ii}$_${ci}$rot( mvl, v(1_${ik}$,p), 1_${ik}$,v(1_${ik}$,q), 1_${ik}$, cs, & conjg(ompq)*sn ) end if end if d(p) = -d(q) * ompq else ! .. have to use modified gram-schmidt like transformation if( aapp>aaqq ) then call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, p ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, work,lda,& ierr ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) call stdlib${ii}$_${ci}$axpy( m, -aapq, work,1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aaqq,m, 1_${ik}$, a( 1_${ik}$, q ),& lda,ierr ) sva( q ) = aaqq*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) else call stdlib${ii}$_${ci}$copy( m, a( 1_${ik}$, q ), 1_${ik}$,work, 1_${ik}$ ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aaqq, one,m, 1_${ik}$, work,lda,& ierr ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, aapp, one,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) call stdlib${ii}$_${ci}$axpy( m, -conjg(aapq),work, 1_${ik}$, a( 1_${ik}$, p ), 1_${ik}$ & ) call stdlib${ii}$_${ci}$lascl( 'G', 0_${ik}$, 0_${ik}$, one, aapp,m, 1_${ik}$, a( 1_${ik}$, p ),& lda,ierr ) sva( p ) = aapp*sqrt( max( zero,one-aapq1*aapq1 ) ) mxsinj = max( mxsinj, sfmin ) end if end if ! end if rotok then ... else ! in the case of cancellation in updating sva(q), sva(p) ! .. recompute sva(q), sva(p) if( ( sva( q ) / aaqq )**2_${ik}$<=rooteps )then if( ( aaqq<rootbig ) .and.( aaqq>rootsfmin ) ) then sva( q ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, q ), 1_${ik}$) else t = zero aaqq = one call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, q ), 1_${ik}$, t,aaqq ) sva( q ) = t*sqrt( aaqq ) end if end if if( ( aapp / aapp0 )**2_${ik}$<=rooteps ) then if( ( aapp<rootbig ) .and.( aapp>rootsfmin ) ) then aapp = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, p ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, p ), 1_${ik}$, t,aapp ) aapp = t*sqrt( aapp ) end if sva( p ) = aapp end if ! end of ok rotation else notrot = notrot + 1_${ik}$ ! [rtd] skipped = skipped + 1 pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if else notrot = notrot + 1_${ik}$ pskipped = pskipped + 1_${ik}$ ijblsk = ijblsk + 1_${ik}$ end if if( ( i<=swband ) .and. ( ijblsk>=blskip ) )then sva( p ) = aapp notrot = 0_${ik}$ go to 2011 end if if( ( i<=swband ) .and.( pskipped>rowskip ) ) then aapp = -aapp notrot = 0_${ik}$ go to 2203 end if end do loop_2200 ! end of the q-loop 2203 continue sva( p ) = aapp else if( aapp==zero )notrot = notrot +min( jgl+kbl-1, n ) - jgl + 1_${ik}$ if( aapp<zero )notrot = 0_${ik}$ end if end do loop_2100 ! end of the p-loop end do loop_2010 ! end of the jbc-loop 2011 continue ! 2011 bailed out of the jbc-loop do p = igl, min( igl+kbl-1, n ) sva( p ) = abs( sva( p ) ) end do ! ** end do loop_2000 ! 2000 :: end of the ibr-loop ! .. update sva(n) if( ( sva( n )<rootbig ) .and. ( sva( n )>rootsfmin ) )then sva( n ) = stdlib${ii}$_${c2ri(ci)}$znrm2( m, a( 1_${ik}$, n ), 1_${ik}$ ) else t = zero aapp = one call stdlib${ii}$_${ci}$lassq( m, a( 1_${ik}$, n ), 1_${ik}$, t, aapp ) sva( n ) = t*sqrt( aapp ) end if ! additional steering devices if( ( i<swband ) .and. ( ( mxaapq<=roottol ) .or.( iswrot<=n ) ) )swband = i if( ( i>swband+1 ) .and. ( mxaapq<sqrt( real( n,KIND=${ck}$) )*tol ) .and. ( real( n,& KIND=${ck}$)*mxaapq*mxsinj<tol ) ) then go to 1994 end if if( notrot>=emptsw )go to 1994 end do loop_1993 ! end i=1:nsweep loop ! #:( reaching this point means that the procedure has not converged. info = nsweep - 1_${ik}$ go to 1995 1994 continue ! #:) reaching this point means numerical convergence after the i-th ! sweep. info = 0_${ik}$ ! #:) info = 0 confirms successful iterations. 1995 continue ! sort the vector sva() of column norms. do p = 1, n - 1 q = stdlib${ii}$_i${c2ri(ci)}$amax( n-p+1, sva( p ), 1_${ik}$ ) + p - 1_${ik}$ if( p/=q ) then temp1 = sva( p ) sva( p ) = sva( q ) sva( q ) = temp1 aapq = d( p ) d( p ) = d( q ) d( q ) = aapq call stdlib${ii}$_${ci}$swap( m, a( 1_${ik}$, p ), 1_${ik}$, a( 1_${ik}$, q ), 1_${ik}$ ) if( rsvec )call stdlib${ii}$_${ci}$swap( mvl, v( 1_${ik}$, p ), 1_${ik}$, v( 1_${ik}$, q ), 1_${ik}$ ) end if end do return end subroutine stdlib${ii}$_${ci}$gsvj1 #:endif #:endfor pure module subroutine stdlib${ii}$_stgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb, & !! STGSJA computes the generalized singular value decomposition (GSVD) !! of two real upper triangular (or trapezoidal) matrices A and B. !! On entry, it is assumed that matrices A and B have the following !! forms, which may be obtained by the preprocessing subroutine SGGSVP !! from a general M-by-N matrix A and P-by-N matrix B: !! N-K-L K L !! A = K ( 0 A12 A13 ) if M-K-L >= 0; !! L ( 0 0 A23 ) !! M-K-L ( 0 0 0 ) !! N-K-L K L !! A = K ( 0 A12 A13 ) if M-K-L < 0; !! M-K ( 0 0 A23 ) !! N-K-L K L !! B = L ( 0 0 B13 ) !! P-L ( 0 0 0 ) !! where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular !! upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, !! otherwise A23 is (M-K)-by-L upper trapezoidal. !! On exit, !! U**T *A*Q = D1*( 0 R ), V**T *B*Q = D2*( 0 R ), !! where U, V and Q are orthogonal matrices. !! R is a nonsingular upper triangular matrix, and D1 and D2 are !! ``diagonal'' matrices, which are of the following structures: !! If M-K-L >= 0, !! K L !! D1 = K ( I 0 ) !! L ( 0 C ) !! M-K-L ( 0 0 ) !! K L !! D2 = L ( 0 S ) !! P-L ( 0 0 ) !! N-K-L K L !! ( 0 R ) = K ( 0 R11 R12 ) K !! L ( 0 0 R22 ) L !! where !! C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), !! S = diag( BETA(K+1), ... , BETA(K+L) ), !! C**2 + S**2 = I. !! R is stored in A(1:K+L,N-K-L+1:N) on exit. !! If M-K-L < 0, !! K M-K K+L-M !! D1 = K ( I 0 0 ) !! M-K ( 0 C 0 ) !! K M-K K+L-M !! D2 = M-K ( 0 S 0 ) !! K+L-M ( 0 0 I ) !! P-L ( 0 0 0 ) !! N-K-L K M-K K+L-M !! ( 0 R ) = K ( 0 R11 R12 R13 ) !! M-K ( 0 0 R22 R23 ) !! K+L-M ( 0 0 0 R33 ) !! where !! C = diag( ALPHA(K+1), ... , ALPHA(M) ), !! S = diag( BETA(K+1), ... , BETA(M) ), !! C**2 + S**2 = I. !! R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored !! ( 0 R22 R23 ) !! in B(M-K+1:L,N+M-K-L+1:N) on exit. !! The computation of the orthogonal transformation matrices U, V or Q !! is optional. These matrices may either be formed explicitly, or they !! may be postmultiplied into input matrices U1, V1, or Q1. alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobq, jobu, jobv integer(${ik}$), intent(out) :: info, ncycle integer(${ik}$), intent(in) :: k, l, lda, ldb, ldq, ldu, ldv, m, n, p real(sp), intent(in) :: tola, tolb ! Array Arguments real(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), u(ldu,*), v(ldv,*) real(sp), intent(out) :: alpha(*), beta(*), work(*) ! ===================================================================== ! Parameters integer(${ik}$), parameter :: maxit = 40_${ik}$ real(sp), parameter :: hugenum = huge(zero) ! Local Scalars logical(lk) :: initq, initu, initv, upper, wantq, wantu, wantv integer(${ik}$) :: i, j, kcycle real(sp) :: a1, a2, a3, b1, b2, b3, csq, csu, csv, error, gamma, rwk, snq, snu, snv, & ssmin ! Intrinsic Functions ! Executable Statements ! decode and test the input parameters initu = stdlib_lsame( jobu, 'I' ) wantu = initu .or. stdlib_lsame( jobu, 'U' ) initv = stdlib_lsame( jobv, 'I' ) wantv = initv .or. stdlib_lsame( jobv, 'V' ) initq = stdlib_lsame( jobq, 'I' ) wantq = initq .or. stdlib_lsame( jobq, 'Q' ) info = 0_${ik}$ if( .not.( initu .or. wantu .or. stdlib_lsame( jobu, 'N' ) ) ) then info = -1_${ik}$ else if( .not.( initv .or. wantv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -2_${ik}$ else if( .not.( initq .or. wantq .or. stdlib_lsame( jobq, 'N' ) ) ) then info = -3_${ik}$ else if( m<0_${ik}$ ) then info = -4_${ik}$ else if( p<0_${ik}$ ) then info = -5_${ik}$ else if( n<0_${ik}$ ) then info = -6_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -10_${ik}$ else if( ldb<max( 1_${ik}$, p ) ) then info = -12_${ik}$ else if( ldu<1_${ik}$ .or. ( wantu .and. ldu<m ) ) then info = -18_${ik}$ else if( ldv<1_${ik}$ .or. ( wantv .and. ldv<p ) ) then info = -20_${ik}$ else if( ldq<1_${ik}$ .or. ( wantq .and. ldq<n ) ) then info = -22_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'STGSJA', -info ) return end if ! initialize u, v and q, if necessary if( initu )call stdlib${ii}$_slaset( 'FULL', m, m, zero, one, u, ldu ) if( initv )call stdlib${ii}$_slaset( 'FULL', p, p, zero, one, v, ldv ) if( initq )call stdlib${ii}$_slaset( 'FULL', n, n, zero, one, q, ldq ) ! loop until convergence upper = .false. loop_40: do kcycle = 1, maxit upper = .not.upper loop_20: do i = 1, l - 1 loop_10: do j = i + 1, l a1 = zero a2 = zero a3 = zero if( k+i<=m )a1 = a( k+i, n-l+i ) if( k+j<=m )a3 = a( k+j, n-l+j ) b1 = b( i, n-l+i ) b3 = b( j, n-l+j ) if( upper ) then if( k+i<=m )a2 = a( k+i, n-l+j ) b2 = b( i, n-l+j ) else if( k+j<=m )a2 = a( k+j, n-l+i ) b2 = b( j, n-l+i ) end if call stdlib${ii}$_slags2( upper, a1, a2, a3, b1, b2, b3, csu, snu,csv, snv, csq, & snq ) ! update (k+i)-th and (k+j)-th rows of matrix a: u**t *a if( k+j<=m )call stdlib${ii}$_srot( l, a( k+j, n-l+1 ), lda, a( k+i, n-l+1 ),lda, & csu, snu ) ! update i-th and j-th rows of matrix b: v**t *b call stdlib${ii}$_srot( l, b( j, n-l+1 ), ldb, b( i, n-l+1 ), ldb,csv, snv ) ! update (n-l+i)-th and (n-l+j)-th columns of matrices ! a and b: a*q and b*q call stdlib${ii}$_srot( min( k+l, m ), a( 1_${ik}$, n-l+j ), 1_${ik}$,a( 1_${ik}$, n-l+i ), 1_${ik}$, csq, snq ) call stdlib${ii}$_srot( l, b( 1_${ik}$, n-l+j ), 1_${ik}$, b( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq ) if( upper ) then if( k+i<=m )a( k+i, n-l+j ) = zero b( i, n-l+j ) = zero else if( k+j<=m )a( k+j, n-l+i ) = zero b( j, n-l+i ) = zero end if ! update orthogonal matrices u, v, q, if desired. if( wantu .and. k+j<=m )call stdlib${ii}$_srot( m, u( 1_${ik}$, k+j ), 1_${ik}$, u( 1_${ik}$, k+i ), 1_${ik}$, & csu,snu ) if( wantv )call stdlib${ii}$_srot( p, v( 1_${ik}$, j ), 1_${ik}$, v( 1_${ik}$, i ), 1_${ik}$, csv, snv ) if( wantq )call stdlib${ii}$_srot( n, q( 1_${ik}$, n-l+j ), 1_${ik}$, q( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq ) end do loop_10 end do loop_20 if( .not.upper ) then ! the matrices a13 and b13 were lower triangular at the start ! of the cycle, and are now upper triangular. ! convergence test: test the parallelism of the corresponding ! rows of a and b. error = zero do i = 1, min( l, m-k ) call stdlib${ii}$_scopy( l-i+1, a( k+i, n-l+i ), lda, work, 1_${ik}$ ) call stdlib${ii}$_scopy( l-i+1, b( i, n-l+i ), ldb, work( l+1 ), 1_${ik}$ ) call stdlib${ii}$_slapll( l-i+1, work, 1_${ik}$, work( l+1 ), 1_${ik}$, ssmin ) error = max( error, ssmin ) end do if( abs( error )<=min( tola, tolb ) )go to 50 end if ! end of cycle loop end do loop_40 ! the algorithm has not converged after maxit cycles. info = 1_${ik}$ go to 100 50 continue ! if error <= min(tola,tolb), then the algorithm has converged. ! compute the generalized singular value pairs (alpha, beta), and ! set the triangular matrix r to array a. do i = 1, k alpha( i ) = one beta( i ) = zero end do do i = 1, min( l, m-k ) a1 = a( k+i, n-l+i ) b1 = b( i, n-l+i ) gamma = b1 / a1 if( (gamma<=hugenum).and.(gamma>=-hugenum) ) then ! change sign if necessary if( gamma<zero ) then call stdlib${ii}$_sscal( l-i+1, -one, b( i, n-l+i ), ldb ) if( wantv )call stdlib${ii}$_sscal( p, -one, v( 1_${ik}$, i ), 1_${ik}$ ) end if call stdlib${ii}$_slartg( abs( gamma ), one, beta( k+i ), alpha( k+i ),rwk ) if( alpha( k+i )>=beta( k+i ) ) then call stdlib${ii}$_sscal( l-i+1, one / alpha( k+i ), a( k+i, n-l+i ),lda ) else call stdlib${ii}$_sscal( l-i+1, one / beta( k+i ), b( i, n-l+i ),ldb ) call stdlib${ii}$_scopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) end if else alpha( k+i ) = zero beta( k+i ) = one call stdlib${ii}$_scopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) end if end do ! post-assignment do i = m + 1, k + l alpha( i ) = zero beta( i ) = one end do if( k+l<n ) then do i = k + l + 1, n alpha( i ) = zero beta( i ) = zero end do end if 100 continue ncycle = kcycle return end subroutine stdlib${ii}$_stgsja pure module subroutine stdlib${ii}$_dtgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb, & !! DTGSJA computes the generalized singular value decomposition (GSVD) !! of two real upper triangular (or trapezoidal) matrices A and B. !! On entry, it is assumed that matrices A and B have the following !! forms, which may be obtained by the preprocessing subroutine DGGSVP !! from a general M-by-N matrix A and P-by-N matrix B: !! N-K-L K L !! A = K ( 0 A12 A13 ) if M-K-L >= 0; !! L ( 0 0 A23 ) !! M-K-L ( 0 0 0 ) !! N-K-L K L !! A = K ( 0 A12 A13 ) if M-K-L < 0; !! M-K ( 0 0 A23 ) !! N-K-L K L !! B = L ( 0 0 B13 ) !! P-L ( 0 0 0 ) !! where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular !! upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, !! otherwise A23 is (M-K)-by-L upper trapezoidal. !! On exit, !! U**T *A*Q = D1*( 0 R ), V**T *B*Q = D2*( 0 R ), !! where U, V and Q are orthogonal matrices. !! R is a nonsingular upper triangular matrix, and D1 and D2 are !! ``diagonal'' matrices, which are of the following structures: !! If M-K-L >= 0, !! K L !! D1 = K ( I 0 ) !! L ( 0 C ) !! M-K-L ( 0 0 ) !! K L !! D2 = L ( 0 S ) !! P-L ( 0 0 ) !! N-K-L K L !! ( 0 R ) = K ( 0 R11 R12 ) K !! L ( 0 0 R22 ) L !! where !! C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), !! S = diag( BETA(K+1), ... , BETA(K+L) ), !! C**2 + S**2 = I. !! R is stored in A(1:K+L,N-K-L+1:N) on exit. !! If M-K-L < 0, !! K M-K K+L-M !! D1 = K ( I 0 0 ) !! M-K ( 0 C 0 ) !! K M-K K+L-M !! D2 = M-K ( 0 S 0 ) !! K+L-M ( 0 0 I ) !! P-L ( 0 0 0 ) !! N-K-L K M-K K+L-M !! ( 0 R ) = K ( 0 R11 R12 R13 ) !! M-K ( 0 0 R22 R23 ) !! K+L-M ( 0 0 0 R33 ) !! where !! C = diag( ALPHA(K+1), ... , ALPHA(M) ), !! S = diag( BETA(K+1), ... , BETA(M) ), !! C**2 + S**2 = I. !! R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored !! ( 0 R22 R23 ) !! in B(M-K+1:L,N+M-K-L+1:N) on exit. !! The computation of the orthogonal transformation matrices U, V or Q !! is optional. These matrices may either be formed explicitly, or they !! may be postmultiplied into input matrices U1, V1, or Q1. alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobq, jobu, jobv integer(${ik}$), intent(out) :: info, ncycle integer(${ik}$), intent(in) :: k, l, lda, ldb, ldq, ldu, ldv, m, n, p real(dp), intent(in) :: tola, tolb ! Array Arguments real(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), u(ldu,*), v(ldv,*) real(dp), intent(out) :: alpha(*), beta(*), work(*) ! ===================================================================== ! Parameters integer(${ik}$), parameter :: maxit = 40_${ik}$ real(dp), parameter :: hugenum = huge(zero) ! Local Scalars logical(lk) :: initq, initu, initv, upper, wantq, wantu, wantv integer(${ik}$) :: i, j, kcycle real(dp) :: a1, a2, a3, b1, b2, b3, csq, csu, csv, error, gamma, rwk, snq, snu, snv, & ssmin ! Intrinsic Functions ! Executable Statements ! decode and test the input parameters initu = stdlib_lsame( jobu, 'I' ) wantu = initu .or. stdlib_lsame( jobu, 'U' ) initv = stdlib_lsame( jobv, 'I' ) wantv = initv .or. stdlib_lsame( jobv, 'V' ) initq = stdlib_lsame( jobq, 'I' ) wantq = initq .or. stdlib_lsame( jobq, 'Q' ) info = 0_${ik}$ if( .not.( initu .or. wantu .or. stdlib_lsame( jobu, 'N' ) ) ) then info = -1_${ik}$ else if( .not.( initv .or. wantv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -2_${ik}$ else if( .not.( initq .or. wantq .or. stdlib_lsame( jobq, 'N' ) ) ) then info = -3_${ik}$ else if( m<0_${ik}$ ) then info = -4_${ik}$ else if( p<0_${ik}$ ) then info = -5_${ik}$ else if( n<0_${ik}$ ) then info = -6_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -10_${ik}$ else if( ldb<max( 1_${ik}$, p ) ) then info = -12_${ik}$ else if( ldu<1_${ik}$ .or. ( wantu .and. ldu<m ) ) then info = -18_${ik}$ else if( ldv<1_${ik}$ .or. ( wantv .and. ldv<p ) ) then info = -20_${ik}$ else if( ldq<1_${ik}$ .or. ( wantq .and. ldq<n ) ) then info = -22_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DTGSJA', -info ) return end if ! initialize u, v and q, if necessary if( initu )call stdlib${ii}$_dlaset( 'FULL', m, m, zero, one, u, ldu ) if( initv )call stdlib${ii}$_dlaset( 'FULL', p, p, zero, one, v, ldv ) if( initq )call stdlib${ii}$_dlaset( 'FULL', n, n, zero, one, q, ldq ) ! loop until convergence upper = .false. loop_40: do kcycle = 1, maxit upper = .not.upper loop_20: do i = 1, l - 1 loop_10: do j = i + 1, l a1 = zero a2 = zero a3 = zero if( k+i<=m )a1 = a( k+i, n-l+i ) if( k+j<=m )a3 = a( k+j, n-l+j ) b1 = b( i, n-l+i ) b3 = b( j, n-l+j ) if( upper ) then if( k+i<=m )a2 = a( k+i, n-l+j ) b2 = b( i, n-l+j ) else if( k+j<=m )a2 = a( k+j, n-l+i ) b2 = b( j, n-l+i ) end if call stdlib${ii}$_dlags2( upper, a1, a2, a3, b1, b2, b3, csu, snu,csv, snv, csq, & snq ) ! update (k+i)-th and (k+j)-th rows of matrix a: u**t *a if( k+j<=m )call stdlib${ii}$_drot( l, a( k+j, n-l+1 ), lda, a( k+i, n-l+1 ),lda, & csu, snu ) ! update i-th and j-th rows of matrix b: v**t *b call stdlib${ii}$_drot( l, b( j, n-l+1 ), ldb, b( i, n-l+1 ), ldb,csv, snv ) ! update (n-l+i)-th and (n-l+j)-th columns of matrices ! a and b: a*q and b*q call stdlib${ii}$_drot( min( k+l, m ), a( 1_${ik}$, n-l+j ), 1_${ik}$,a( 1_${ik}$, n-l+i ), 1_${ik}$, csq, snq ) call stdlib${ii}$_drot( l, b( 1_${ik}$, n-l+j ), 1_${ik}$, b( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq ) if( upper ) then if( k+i<=m )a( k+i, n-l+j ) = zero b( i, n-l+j ) = zero else if( k+j<=m )a( k+j, n-l+i ) = zero b( j, n-l+i ) = zero end if ! update orthogonal matrices u, v, q, if desired. if( wantu .and. k+j<=m )call stdlib${ii}$_drot( m, u( 1_${ik}$, k+j ), 1_${ik}$, u( 1_${ik}$, k+i ), 1_${ik}$, & csu,snu ) if( wantv )call stdlib${ii}$_drot( p, v( 1_${ik}$, j ), 1_${ik}$, v( 1_${ik}$, i ), 1_${ik}$, csv, snv ) if( wantq )call stdlib${ii}$_drot( n, q( 1_${ik}$, n-l+j ), 1_${ik}$, q( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq ) end do loop_10 end do loop_20 if( .not.upper ) then ! the matrices a13 and b13 were lower triangular at the start ! of the cycle, and are now upper triangular. ! convergence test: test the parallelism of the corresponding ! rows of a and b. error = zero do i = 1, min( l, m-k ) call stdlib${ii}$_dcopy( l-i+1, a( k+i, n-l+i ), lda, work, 1_${ik}$ ) call stdlib${ii}$_dcopy( l-i+1, b( i, n-l+i ), ldb, work( l+1 ), 1_${ik}$ ) call stdlib${ii}$_dlapll( l-i+1, work, 1_${ik}$, work( l+1 ), 1_${ik}$, ssmin ) error = max( error, ssmin ) end do if( abs( error )<=min( tola, tolb ) )go to 50 end if ! end of cycle loop end do loop_40 ! the algorithm has not converged after maxit cycles. info = 1_${ik}$ go to 100 50 continue ! if error <= min(tola,tolb), then the algorithm has converged. ! compute the generalized singular value pairs (alpha, beta), and ! set the triangular matrix r to array a. do i = 1, k alpha( i ) = one beta( i ) = zero end do do i = 1, min( l, m-k ) a1 = a( k+i, n-l+i ) b1 = b( i, n-l+i ) gamma = b1 / a1 if( (gamma<=hugenum).and.(gamma>=-hugenum) ) then ! change sign if necessary if( gamma<zero ) then call stdlib${ii}$_dscal( l-i+1, -one, b( i, n-l+i ), ldb ) if( wantv )call stdlib${ii}$_dscal( p, -one, v( 1_${ik}$, i ), 1_${ik}$ ) end if call stdlib${ii}$_dlartg( abs( gamma ), one, beta( k+i ), alpha( k+i ),rwk ) if( alpha( k+i )>=beta( k+i ) ) then call stdlib${ii}$_dscal( l-i+1, one / alpha( k+i ), a( k+i, n-l+i ),lda ) else call stdlib${ii}$_dscal( l-i+1, one / beta( k+i ), b( i, n-l+i ),ldb ) call stdlib${ii}$_dcopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) end if else alpha( k+i ) = zero beta( k+i ) = one call stdlib${ii}$_dcopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) end if end do ! post-assignment do i = m + 1, k + l alpha( i ) = zero beta( i ) = one end do if( k+l<n ) then do i = k + l + 1, n alpha( i ) = zero beta( i ) = zero end do end if 100 continue ncycle = kcycle return end subroutine stdlib${ii}$_dtgsja #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$tgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb, & !! DTGSJA: computes the generalized singular value decomposition (GSVD) !! of two real upper triangular (or trapezoidal) matrices A and B. !! On entry, it is assumed that matrices A and B have the following !! forms, which may be obtained by the preprocessing subroutine DGGSVP !! from a general M-by-N matrix A and P-by-N matrix B: !! N-K-L K L !! A = K ( 0 A12 A13 ) if M-K-L >= 0; !! L ( 0 0 A23 ) !! M-K-L ( 0 0 0 ) !! N-K-L K L !! A = K ( 0 A12 A13 ) if M-K-L < 0; !! M-K ( 0 0 A23 ) !! N-K-L K L !! B = L ( 0 0 B13 ) !! P-L ( 0 0 0 ) !! where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular !! upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, !! otherwise A23 is (M-K)-by-L upper trapezoidal. !! On exit, !! U**T *A*Q = D1*( 0 R ), V**T *B*Q = D2*( 0 R ), !! where U, V and Q are orthogonal matrices. !! R is a nonsingular upper triangular matrix, and D1 and D2 are !! ``diagonal'' matrices, which are of the following structures: !! If M-K-L >= 0, !! K L !! D1 = K ( I 0 ) !! L ( 0 C ) !! M-K-L ( 0 0 ) !! K L !! D2 = L ( 0 S ) !! P-L ( 0 0 ) !! N-K-L K L !! ( 0 R ) = K ( 0 R11 R12 ) K !! L ( 0 0 R22 ) L !! where !! C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), !! S = diag( BETA(K+1), ... , BETA(K+L) ), !! C**2 + S**2 = I. !! R is stored in A(1:K+L,N-K-L+1:N) on exit. !! If M-K-L < 0, !! K M-K K+L-M !! D1 = K ( I 0 0 ) !! M-K ( 0 C 0 ) !! K M-K K+L-M !! D2 = M-K ( 0 S 0 ) !! K+L-M ( 0 0 I ) !! P-L ( 0 0 0 ) !! N-K-L K M-K K+L-M !! ( 0 R ) = K ( 0 R11 R12 R13 ) !! M-K ( 0 0 R22 R23 ) !! K+L-M ( 0 0 0 R33 ) !! where !! C = diag( ALPHA(K+1), ... , ALPHA(M) ), !! S = diag( BETA(K+1), ... , BETA(M) ), !! C**2 + S**2 = I. !! R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored !! ( 0 R22 R23 ) !! in B(M-K+1:L,N+M-K-L+1:N) on exit. !! The computation of the orthogonal transformation matrices U, V or Q !! is optional. These matrices may either be formed explicitly, or they !! may be postmultiplied into input matrices U1, V1, or Q1. alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobq, jobu, jobv integer(${ik}$), intent(out) :: info, ncycle integer(${ik}$), intent(in) :: k, l, lda, ldb, ldq, ldu, ldv, m, n, p real(${rk}$), intent(in) :: tola, tolb ! Array Arguments real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), u(ldu,*), v(ldv,*) real(${rk}$), intent(out) :: alpha(*), beta(*), work(*) ! ===================================================================== ! Parameters integer(${ik}$), parameter :: maxit = 40_${ik}$ real(${rk}$), parameter :: hugenum = huge(zero) ! Local Scalars logical(lk) :: initq, initu, initv, upper, wantq, wantu, wantv integer(${ik}$) :: i, j, kcycle real(${rk}$) :: a1, a2, a3, b1, b2, b3, csq, csu, csv, error, gamma, rwk, snq, snu, snv, & ssmin ! Intrinsic Functions ! Executable Statements ! decode and test the input parameters initu = stdlib_lsame( jobu, 'I' ) wantu = initu .or. stdlib_lsame( jobu, 'U' ) initv = stdlib_lsame( jobv, 'I' ) wantv = initv .or. stdlib_lsame( jobv, 'V' ) initq = stdlib_lsame( jobq, 'I' ) wantq = initq .or. stdlib_lsame( jobq, 'Q' ) info = 0_${ik}$ if( .not.( initu .or. wantu .or. stdlib_lsame( jobu, 'N' ) ) ) then info = -1_${ik}$ else if( .not.( initv .or. wantv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -2_${ik}$ else if( .not.( initq .or. wantq .or. stdlib_lsame( jobq, 'N' ) ) ) then info = -3_${ik}$ else if( m<0_${ik}$ ) then info = -4_${ik}$ else if( p<0_${ik}$ ) then info = -5_${ik}$ else if( n<0_${ik}$ ) then info = -6_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -10_${ik}$ else if( ldb<max( 1_${ik}$, p ) ) then info = -12_${ik}$ else if( ldu<1_${ik}$ .or. ( wantu .and. ldu<m ) ) then info = -18_${ik}$ else if( ldv<1_${ik}$ .or. ( wantv .and. ldv<p ) ) then info = -20_${ik}$ else if( ldq<1_${ik}$ .or. ( wantq .and. ldq<n ) ) then info = -22_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DTGSJA', -info ) return end if ! initialize u, v and q, if necessary if( initu )call stdlib${ii}$_${ri}$laset( 'FULL', m, m, zero, one, u, ldu ) if( initv )call stdlib${ii}$_${ri}$laset( 'FULL', p, p, zero, one, v, ldv ) if( initq )call stdlib${ii}$_${ri}$laset( 'FULL', n, n, zero, one, q, ldq ) ! loop until convergence upper = .false. loop_40: do kcycle = 1, maxit upper = .not.upper loop_20: do i = 1, l - 1 loop_10: do j = i + 1, l a1 = zero a2 = zero a3 = zero if( k+i<=m )a1 = a( k+i, n-l+i ) if( k+j<=m )a3 = a( k+j, n-l+j ) b1 = b( i, n-l+i ) b3 = b( j, n-l+j ) if( upper ) then if( k+i<=m )a2 = a( k+i, n-l+j ) b2 = b( i, n-l+j ) else if( k+j<=m )a2 = a( k+j, n-l+i ) b2 = b( j, n-l+i ) end if call stdlib${ii}$_${ri}$lags2( upper, a1, a2, a3, b1, b2, b3, csu, snu,csv, snv, csq, & snq ) ! update (k+i)-th and (k+j)-th rows of matrix a: u**t *a if( k+j<=m )call stdlib${ii}$_${ri}$rot( l, a( k+j, n-l+1 ), lda, a( k+i, n-l+1 ),lda, & csu, snu ) ! update i-th and j-th rows of matrix b: v**t *b call stdlib${ii}$_${ri}$rot( l, b( j, n-l+1 ), ldb, b( i, n-l+1 ), ldb,csv, snv ) ! update (n-l+i)-th and (n-l+j)-th columns of matrices ! a and b: a*q and b*q call stdlib${ii}$_${ri}$rot( min( k+l, m ), a( 1_${ik}$, n-l+j ), 1_${ik}$,a( 1_${ik}$, n-l+i ), 1_${ik}$, csq, snq ) call stdlib${ii}$_${ri}$rot( l, b( 1_${ik}$, n-l+j ), 1_${ik}$, b( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq ) if( upper ) then if( k+i<=m )a( k+i, n-l+j ) = zero b( i, n-l+j ) = zero else if( k+j<=m )a( k+j, n-l+i ) = zero b( j, n-l+i ) = zero end if ! update orthogonal matrices u, v, q, if desired. if( wantu .and. k+j<=m )call stdlib${ii}$_${ri}$rot( m, u( 1_${ik}$, k+j ), 1_${ik}$, u( 1_${ik}$, k+i ), 1_${ik}$, & csu,snu ) if( wantv )call stdlib${ii}$_${ri}$rot( p, v( 1_${ik}$, j ), 1_${ik}$, v( 1_${ik}$, i ), 1_${ik}$, csv, snv ) if( wantq )call stdlib${ii}$_${ri}$rot( n, q( 1_${ik}$, n-l+j ), 1_${ik}$, q( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq ) end do loop_10 end do loop_20 if( .not.upper ) then ! the matrices a13 and b13 were lower triangular at the start ! of the cycle, and are now upper triangular. ! convergence test: test the parallelism of the corresponding ! rows of a and b. error = zero do i = 1, min( l, m-k ) call stdlib${ii}$_${ri}$copy( l-i+1, a( k+i, n-l+i ), lda, work, 1_${ik}$ ) call stdlib${ii}$_${ri}$copy( l-i+1, b( i, n-l+i ), ldb, work( l+1 ), 1_${ik}$ ) call stdlib${ii}$_${ri}$lapll( l-i+1, work, 1_${ik}$, work( l+1 ), 1_${ik}$, ssmin ) error = max( error, ssmin ) end do if( abs( error )<=min( tola, tolb ) )go to 50 end if ! end of cycle loop end do loop_40 ! the algorithm has not converged after maxit cycles. info = 1_${ik}$ go to 100 50 continue ! if error <= min(tola,tolb), then the algorithm has converged. ! compute the generalized singular value pairs (alpha, beta), and ! set the triangular matrix r to array a. do i = 1, k alpha( i ) = one beta( i ) = zero end do do i = 1, min( l, m-k ) a1 = a( k+i, n-l+i ) b1 = b( i, n-l+i ) gamma = b1 / a1 if( (gamma<=hugenum).and.(gamma>=-hugenum) ) then ! change sign if necessary if( gamma<zero ) then call stdlib${ii}$_${ri}$scal( l-i+1, -one, b( i, n-l+i ), ldb ) if( wantv )call stdlib${ii}$_${ri}$scal( p, -one, v( 1_${ik}$, i ), 1_${ik}$ ) end if call stdlib${ii}$_${ri}$lartg( abs( gamma ), one, beta( k+i ), alpha( k+i ),rwk ) if( alpha( k+i )>=beta( k+i ) ) then call stdlib${ii}$_${ri}$scal( l-i+1, one / alpha( k+i ), a( k+i, n-l+i ),lda ) else call stdlib${ii}$_${ri}$scal( l-i+1, one / beta( k+i ), b( i, n-l+i ),ldb ) call stdlib${ii}$_${ri}$copy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) end if else alpha( k+i ) = zero beta( k+i ) = one call stdlib${ii}$_${ri}$copy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) end if end do ! post-assignment do i = m + 1, k + l alpha( i ) = zero beta( i ) = one end do if( k+l<n ) then do i = k + l + 1, n alpha( i ) = zero beta( i ) = zero end do end if 100 continue ncycle = kcycle return end subroutine stdlib${ii}$_${ri}$tgsja #:endif #:endfor pure module subroutine stdlib${ii}$_ctgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb, & !! CTGSJA computes the generalized singular value decomposition (GSVD) !! of two complex upper triangular (or trapezoidal) matrices A and B. !! On entry, it is assumed that matrices A and B have the following !! forms, which may be obtained by the preprocessing subroutine CGGSVP !! from a general M-by-N matrix A and P-by-N matrix B: !! N-K-L K L !! A = K ( 0 A12 A13 ) if M-K-L >= 0; !! L ( 0 0 A23 ) !! M-K-L ( 0 0 0 ) !! N-K-L K L !! A = K ( 0 A12 A13 ) if M-K-L < 0; !! M-K ( 0 0 A23 ) !! N-K-L K L !! B = L ( 0 0 B13 ) !! P-L ( 0 0 0 ) !! where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular !! upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, !! otherwise A23 is (M-K)-by-L upper trapezoidal. !! On exit, !! U**H *A*Q = D1*( 0 R ), V**H *B*Q = D2*( 0 R ), !! where U, V and Q are unitary matrices. !! R is a nonsingular upper triangular matrix, and D1 !! and D2 are ``diagonal'' matrices, which are of the following !! structures: !! If M-K-L >= 0, !! K L !! D1 = K ( I 0 ) !! L ( 0 C ) !! M-K-L ( 0 0 ) !! K L !! D2 = L ( 0 S ) !! P-L ( 0 0 ) !! N-K-L K L !! ( 0 R ) = K ( 0 R11 R12 ) K !! L ( 0 0 R22 ) L !! where !! C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), !! S = diag( BETA(K+1), ... , BETA(K+L) ), !! C**2 + S**2 = I. !! R is stored in A(1:K+L,N-K-L+1:N) on exit. !! If M-K-L < 0, !! K M-K K+L-M !! D1 = K ( I 0 0 ) !! M-K ( 0 C 0 ) !! K M-K K+L-M !! D2 = M-K ( 0 S 0 ) !! K+L-M ( 0 0 I ) !! P-L ( 0 0 0 ) !! N-K-L K M-K K+L-M !! ( 0 R ) = K ( 0 R11 R12 R13 ) !! M-K ( 0 0 R22 R23 ) !! K+L-M ( 0 0 0 R33 ) !! where !! C = diag( ALPHA(K+1), ... , ALPHA(M) ), !! S = diag( BETA(K+1), ... , BETA(M) ), !! C**2 + S**2 = I. !! R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored !! ( 0 R22 R23 ) !! in B(M-K+1:L,N+M-K-L+1:N) on exit. !! The computation of the unitary transformation matrices U, V or Q !! is optional. These matrices may either be formed explicitly, or they !! may be postmultiplied into input matrices U1, V1, or Q1. alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobq, jobu, jobv integer(${ik}$), intent(out) :: info, ncycle integer(${ik}$), intent(in) :: k, l, lda, ldb, ldq, ldu, ldv, m, n, p real(sp), intent(in) :: tola, tolb ! Array Arguments real(sp), intent(out) :: alpha(*), beta(*) complex(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), u(ldu,*), v(ldv,*) complex(sp), intent(out) :: work(*) ! ===================================================================== ! Parameters integer(${ik}$), parameter :: maxit = 40_${ik}$ real(sp), parameter :: hugenum = huge(zero) ! Local Scalars logical(lk) :: initq, initu, initv, upper, wantq, wantu, wantv integer(${ik}$) :: i, j, kcycle real(sp) :: a1, a3, b1, b3, csq, csu, csv, error, gamma, rwk, ssmin complex(sp) :: a2, b2, snq, snu, snv ! Intrinsic Functions ! Executable Statements ! decode and test the input parameters initu = stdlib_lsame( jobu, 'I' ) wantu = initu .or. stdlib_lsame( jobu, 'U' ) initv = stdlib_lsame( jobv, 'I' ) wantv = initv .or. stdlib_lsame( jobv, 'V' ) initq = stdlib_lsame( jobq, 'I' ) wantq = initq .or. stdlib_lsame( jobq, 'Q' ) info = 0_${ik}$ if( .not.( initu .or. wantu .or. stdlib_lsame( jobu, 'N' ) ) ) then info = -1_${ik}$ else if( .not.( initv .or. wantv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -2_${ik}$ else if( .not.( initq .or. wantq .or. stdlib_lsame( jobq, 'N' ) ) ) then info = -3_${ik}$ else if( m<0_${ik}$ ) then info = -4_${ik}$ else if( p<0_${ik}$ ) then info = -5_${ik}$ else if( n<0_${ik}$ ) then info = -6_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -10_${ik}$ else if( ldb<max( 1_${ik}$, p ) ) then info = -12_${ik}$ else if( ldu<1_${ik}$ .or. ( wantu .and. ldu<m ) ) then info = -18_${ik}$ else if( ldv<1_${ik}$ .or. ( wantv .and. ldv<p ) ) then info = -20_${ik}$ else if( ldq<1_${ik}$ .or. ( wantq .and. ldq<n ) ) then info = -22_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CTGSJA', -info ) return end if ! initialize u, v and q, if necessary if( initu )call stdlib${ii}$_claset( 'FULL', m, m, czero, cone, u, ldu ) if( initv )call stdlib${ii}$_claset( 'FULL', p, p, czero, cone, v, ldv ) if( initq )call stdlib${ii}$_claset( 'FULL', n, n, czero, cone, q, ldq ) ! loop until convergence upper = .false. loop_40: do kcycle = 1, maxit upper = .not.upper loop_20: do i = 1, l - 1 loop_10: do j = i + 1, l a1 = zero a2 = czero a3 = zero if( k+i<=m )a1 = real( a( k+i, n-l+i ),KIND=sp) if( k+j<=m )a3 = real( a( k+j, n-l+j ),KIND=sp) b1 = real( b( i, n-l+i ),KIND=sp) b3 = real( b( j, n-l+j ),KIND=sp) if( upper ) then if( k+i<=m )a2 = a( k+i, n-l+j ) b2 = b( i, n-l+j ) else if( k+j<=m )a2 = a( k+j, n-l+i ) b2 = b( j, n-l+i ) end if call stdlib${ii}$_clags2( upper, a1, a2, a3, b1, b2, b3, csu, snu,csv, snv, csq, & snq ) ! update (k+i)-th and (k+j)-th rows of matrix a: u**h *a if( k+j<=m )call stdlib${ii}$_crot( l, a( k+j, n-l+1 ), lda, a( k+i, n-l+1 ),lda, & csu, conjg( snu ) ) ! update i-th and j-th rows of matrix b: v**h *b call stdlib${ii}$_crot( l, b( j, n-l+1 ), ldb, b( i, n-l+1 ), ldb,csv, conjg( snv ) & ) ! update (n-l+i)-th and (n-l+j)-th columns of matrices ! a and b: a*q and b*q call stdlib${ii}$_crot( min( k+l, m ), a( 1_${ik}$, n-l+j ), 1_${ik}$,a( 1_${ik}$, n-l+i ), 1_${ik}$, csq, snq ) call stdlib${ii}$_crot( l, b( 1_${ik}$, n-l+j ), 1_${ik}$, b( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq ) if( upper ) then if( k+i<=m )a( k+i, n-l+j ) = czero b( i, n-l+j ) = czero else if( k+j<=m )a( k+j, n-l+i ) = czero b( j, n-l+i ) = czero end if ! ensure that the diagonal elements of a and b are real. if( k+i<=m )a( k+i, n-l+i ) = real( a( k+i, n-l+i ),KIND=sp) if( k+j<=m )a( k+j, n-l+j ) = real( a( k+j, n-l+j ),KIND=sp) b( i, n-l+i ) = real( b( i, n-l+i ),KIND=sp) b( j, n-l+j ) = real( b( j, n-l+j ),KIND=sp) ! update unitary matrices u, v, q, if desired. if( wantu .and. k+j<=m )call stdlib${ii}$_crot( m, u( 1_${ik}$, k+j ), 1_${ik}$, u( 1_${ik}$, k+i ), 1_${ik}$, & csu,snu ) if( wantv )call stdlib${ii}$_crot( p, v( 1_${ik}$, j ), 1_${ik}$, v( 1_${ik}$, i ), 1_${ik}$, csv, snv ) if( wantq )call stdlib${ii}$_crot( n, q( 1_${ik}$, n-l+j ), 1_${ik}$, q( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq ) end do loop_10 end do loop_20 if( .not.upper ) then ! the matrices a13 and b13 were lower triangular at the start ! of the cycle, and are now upper triangular. ! convergence test: test the parallelism of the corresponding ! rows of a and b. error = zero do i = 1, min( l, m-k ) call stdlib${ii}$_ccopy( l-i+1, a( k+i, n-l+i ), lda, work, 1_${ik}$ ) call stdlib${ii}$_ccopy( l-i+1, b( i, n-l+i ), ldb, work( l+1 ), 1_${ik}$ ) call stdlib${ii}$_clapll( l-i+1, work, 1_${ik}$, work( l+1 ), 1_${ik}$, ssmin ) error = max( error, ssmin ) end do if( abs( error )<=min( tola, tolb ) )go to 50 end if ! end of cycle loop end do loop_40 ! the algorithm has not converged after maxit cycles. info = 1_${ik}$ go to 100 50 continue ! if error <= min(tola,tolb), then the algorithm has converged. ! compute the generalized singular value pairs (alpha, beta), and ! set the triangular matrix r to array a. do i = 1, k alpha( i ) = one beta( i ) = zero end do do i = 1, min( l, m-k ) a1 = real( a( k+i, n-l+i ),KIND=sp) b1 = real( b( i, n-l+i ),KIND=sp) gamma = b1 / a1 if( (gamma<=hugenum).and.(gamma>=-hugenum) ) then if( gamma<zero ) then call stdlib${ii}$_csscal( l-i+1, -one, b( i, n-l+i ), ldb ) if( wantv )call stdlib${ii}$_csscal( p, -one, v( 1_${ik}$, i ), 1_${ik}$ ) end if call stdlib${ii}$_slartg( abs( gamma ), one, beta( k+i ), alpha( k+i ),rwk ) if( alpha( k+i )>=beta( k+i ) ) then call stdlib${ii}$_csscal( l-i+1, one / alpha( k+i ), a( k+i, n-l+i ),lda ) else call stdlib${ii}$_csscal( l-i+1, one / beta( k+i ), b( i, n-l+i ),ldb ) call stdlib${ii}$_ccopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) end if else alpha( k+i ) = zero beta( k+i ) = one call stdlib${ii}$_ccopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) end if end do ! post-assignment do i = m + 1, k + l alpha( i ) = zero beta( i ) = one end do if( k+l<n ) then do i = k + l + 1, n alpha( i ) = zero beta( i ) = zero end do end if 100 continue ncycle = kcycle return end subroutine stdlib${ii}$_ctgsja pure module subroutine stdlib${ii}$_ztgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb, & !! ZTGSJA computes the generalized singular value decomposition (GSVD) !! of two complex upper triangular (or trapezoidal) matrices A and B. !! On entry, it is assumed that matrices A and B have the following !! forms, which may be obtained by the preprocessing subroutine ZGGSVP !! from a general M-by-N matrix A and P-by-N matrix B: !! N-K-L K L !! A = K ( 0 A12 A13 ) if M-K-L >= 0; !! L ( 0 0 A23 ) !! M-K-L ( 0 0 0 ) !! N-K-L K L !! A = K ( 0 A12 A13 ) if M-K-L < 0; !! M-K ( 0 0 A23 ) !! N-K-L K L !! B = L ( 0 0 B13 ) !! P-L ( 0 0 0 ) !! where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular !! upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, !! otherwise A23 is (M-K)-by-L upper trapezoidal. !! On exit, !! U**H *A*Q = D1*( 0 R ), V**H *B*Q = D2*( 0 R ), !! where U, V and Q are unitary matrices. !! R is a nonsingular upper triangular matrix, and D1 !! and D2 are ``diagonal'' matrices, which are of the following !! structures: !! If M-K-L >= 0, !! K L !! D1 = K ( I 0 ) !! L ( 0 C ) !! M-K-L ( 0 0 ) !! K L !! D2 = L ( 0 S ) !! P-L ( 0 0 ) !! N-K-L K L !! ( 0 R ) = K ( 0 R11 R12 ) K !! L ( 0 0 R22 ) L !! where !! C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), !! S = diag( BETA(K+1), ... , BETA(K+L) ), !! C**2 + S**2 = I. !! R is stored in A(1:K+L,N-K-L+1:N) on exit. !! If M-K-L < 0, !! K M-K K+L-M !! D1 = K ( I 0 0 ) !! M-K ( 0 C 0 ) !! K M-K K+L-M !! D2 = M-K ( 0 S 0 ) !! K+L-M ( 0 0 I ) !! P-L ( 0 0 0 ) !! N-K-L K M-K K+L-M !! ( 0 R ) = K ( 0 R11 R12 R13 ) !! M-K ( 0 0 R22 R23 ) !! K+L-M ( 0 0 0 R33 ) !! where !! C = diag( ALPHA(K+1), ... , ALPHA(M) ), !! S = diag( BETA(K+1), ... , BETA(M) ), !! C**2 + S**2 = I. !! R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored !! ( 0 R22 R23 ) !! in B(M-K+1:L,N+M-K-L+1:N) on exit. !! The computation of the unitary transformation matrices U, V or Q !! is optional. These matrices may either be formed explicitly, or they !! may be postmultiplied into input matrices U1, V1, or Q1. alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobq, jobu, jobv integer(${ik}$), intent(out) :: info, ncycle integer(${ik}$), intent(in) :: k, l, lda, ldb, ldq, ldu, ldv, m, n, p real(dp), intent(in) :: tola, tolb ! Array Arguments real(dp), intent(out) :: alpha(*), beta(*) complex(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), u(ldu,*), v(ldv,*) complex(dp), intent(out) :: work(*) ! ===================================================================== ! Parameters integer(${ik}$), parameter :: maxit = 40_${ik}$ real(dp), parameter :: hugenum = huge(zero) ! Local Scalars logical(lk) :: initq, initu, initv, upper, wantq, wantu, wantv integer(${ik}$) :: i, j, kcycle real(dp) :: a1, a3, b1, b3, csq, csu, csv, error, gamma, rwk, ssmin complex(dp) :: a2, b2, snq, snu, snv ! Intrinsic Functions ! Executable Statements ! decode and test the input parameters initu = stdlib_lsame( jobu, 'I' ) wantu = initu .or. stdlib_lsame( jobu, 'U' ) initv = stdlib_lsame( jobv, 'I' ) wantv = initv .or. stdlib_lsame( jobv, 'V' ) initq = stdlib_lsame( jobq, 'I' ) wantq = initq .or. stdlib_lsame( jobq, 'Q' ) info = 0_${ik}$ if( .not.( initu .or. wantu .or. stdlib_lsame( jobu, 'N' ) ) ) then info = -1_${ik}$ else if( .not.( initv .or. wantv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -2_${ik}$ else if( .not.( initq .or. wantq .or. stdlib_lsame( jobq, 'N' ) ) ) then info = -3_${ik}$ else if( m<0_${ik}$ ) then info = -4_${ik}$ else if( p<0_${ik}$ ) then info = -5_${ik}$ else if( n<0_${ik}$ ) then info = -6_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -10_${ik}$ else if( ldb<max( 1_${ik}$, p ) ) then info = -12_${ik}$ else if( ldu<1_${ik}$ .or. ( wantu .and. ldu<m ) ) then info = -18_${ik}$ else if( ldv<1_${ik}$ .or. ( wantv .and. ldv<p ) ) then info = -20_${ik}$ else if( ldq<1_${ik}$ .or. ( wantq .and. ldq<n ) ) then info = -22_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZTGSJA', -info ) return end if ! initialize u, v and q, if necessary if( initu )call stdlib${ii}$_zlaset( 'FULL', m, m, czero, cone, u, ldu ) if( initv )call stdlib${ii}$_zlaset( 'FULL', p, p, czero, cone, v, ldv ) if( initq )call stdlib${ii}$_zlaset( 'FULL', n, n, czero, cone, q, ldq ) ! loop until convergence upper = .false. loop_40: do kcycle = 1, maxit upper = .not.upper loop_20: do i = 1, l - 1 loop_10: do j = i + 1, l a1 = zero a2 = czero a3 = zero if( k+i<=m )a1 = real( a( k+i, n-l+i ),KIND=dp) if( k+j<=m )a3 = real( a( k+j, n-l+j ),KIND=dp) b1 = real( b( i, n-l+i ),KIND=dp) b3 = real( b( j, n-l+j ),KIND=dp) if( upper ) then if( k+i<=m )a2 = a( k+i, n-l+j ) b2 = b( i, n-l+j ) else if( k+j<=m )a2 = a( k+j, n-l+i ) b2 = b( j, n-l+i ) end if call stdlib${ii}$_zlags2( upper, a1, a2, a3, b1, b2, b3, csu, snu,csv, snv, csq, & snq ) ! update (k+i)-th and (k+j)-th rows of matrix a: u**h *a if( k+j<=m )call stdlib${ii}$_zrot( l, a( k+j, n-l+1 ), lda, a( k+i, n-l+1 ),lda, & csu, conjg( snu ) ) ! update i-th and j-th rows of matrix b: v**h *b call stdlib${ii}$_zrot( l, b( j, n-l+1 ), ldb, b( i, n-l+1 ), ldb,csv, conjg( snv ) & ) ! update (n-l+i)-th and (n-l+j)-th columns of matrices ! a and b: a*q and b*q call stdlib${ii}$_zrot( min( k+l, m ), a( 1_${ik}$, n-l+j ), 1_${ik}$,a( 1_${ik}$, n-l+i ), 1_${ik}$, csq, snq ) call stdlib${ii}$_zrot( l, b( 1_${ik}$, n-l+j ), 1_${ik}$, b( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq ) if( upper ) then if( k+i<=m )a( k+i, n-l+j ) = czero b( i, n-l+j ) = czero else if( k+j<=m )a( k+j, n-l+i ) = czero b( j, n-l+i ) = czero end if ! ensure that the diagonal elements of a and b are real. if( k+i<=m )a( k+i, n-l+i ) = real( a( k+i, n-l+i ),KIND=dp) if( k+j<=m )a( k+j, n-l+j ) = real( a( k+j, n-l+j ),KIND=dp) b( i, n-l+i ) = real( b( i, n-l+i ),KIND=dp) b( j, n-l+j ) = real( b( j, n-l+j ),KIND=dp) ! update unitary matrices u, v, q, if desired. if( wantu .and. k+j<=m )call stdlib${ii}$_zrot( m, u( 1_${ik}$, k+j ), 1_${ik}$, u( 1_${ik}$, k+i ), 1_${ik}$, & csu,snu ) if( wantv )call stdlib${ii}$_zrot( p, v( 1_${ik}$, j ), 1_${ik}$, v( 1_${ik}$, i ), 1_${ik}$, csv, snv ) if( wantq )call stdlib${ii}$_zrot( n, q( 1_${ik}$, n-l+j ), 1_${ik}$, q( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq ) end do loop_10 end do loop_20 if( .not.upper ) then ! the matrices a13 and b13 were lower triangular at the start ! of the cycle, and are now upper triangular. ! convergence test: test the parallelism of the corresponding ! rows of a and b. error = zero do i = 1, min( l, m-k ) call stdlib${ii}$_zcopy( l-i+1, a( k+i, n-l+i ), lda, work, 1_${ik}$ ) call stdlib${ii}$_zcopy( l-i+1, b( i, n-l+i ), ldb, work( l+1 ), 1_${ik}$ ) call stdlib${ii}$_zlapll( l-i+1, work, 1_${ik}$, work( l+1 ), 1_${ik}$, ssmin ) error = max( error, ssmin ) end do if( abs( error )<=min( tola, tolb ) )go to 50 end if ! end of cycle loop end do loop_40 ! the algorithm has not converged after maxit cycles. info = 1_${ik}$ go to 100 50 continue ! if error <= min(tola,tolb), then the algorithm has converged. ! compute the generalized singular value pairs (alpha, beta), and ! set the triangular matrix r to array a. do i = 1, k alpha( i ) = one beta( i ) = zero end do do i = 1, min( l, m-k ) a1 = real( a( k+i, n-l+i ),KIND=dp) b1 = real( b( i, n-l+i ),KIND=dp) gamma = b1 / a1 if( (gamma<=hugenum).and.(gamma>=-hugenum) ) then if( gamma<zero ) then call stdlib${ii}$_zdscal( l-i+1, -one, b( i, n-l+i ), ldb ) if( wantv )call stdlib${ii}$_zdscal( p, -one, v( 1_${ik}$, i ), 1_${ik}$ ) end if call stdlib${ii}$_dlartg( abs( gamma ), one, beta( k+i ), alpha( k+i ),rwk ) if( alpha( k+i )>=beta( k+i ) ) then call stdlib${ii}$_zdscal( l-i+1, one / alpha( k+i ), a( k+i, n-l+i ),lda ) else call stdlib${ii}$_zdscal( l-i+1, one / beta( k+i ), b( i, n-l+i ),ldb ) call stdlib${ii}$_zcopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) end if else alpha( k+i ) = zero beta( k+i ) = one call stdlib${ii}$_zcopy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) end if end do ! post-assignment do i = m + 1, k + l alpha( i ) = zero beta( i ) = one end do if( k+l<n ) then do i = k + l + 1, n alpha( i ) = zero beta( i ) = zero end do end if 100 continue ncycle = kcycle return end subroutine stdlib${ii}$_ztgsja #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$tgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b,ldb, tola, tolb, & !! ZTGSJA: computes the generalized singular value decomposition (GSVD) !! of two complex upper triangular (or trapezoidal) matrices A and B. !! On entry, it is assumed that matrices A and B have the following !! forms, which may be obtained by the preprocessing subroutine ZGGSVP !! from a general M-by-N matrix A and P-by-N matrix B: !! N-K-L K L !! A = K ( 0 A12 A13 ) if M-K-L >= 0; !! L ( 0 0 A23 ) !! M-K-L ( 0 0 0 ) !! N-K-L K L !! A = K ( 0 A12 A13 ) if M-K-L < 0; !! M-K ( 0 0 A23 ) !! N-K-L K L !! B = L ( 0 0 B13 ) !! P-L ( 0 0 0 ) !! where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular !! upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, !! otherwise A23 is (M-K)-by-L upper trapezoidal. !! On exit, !! U**H *A*Q = D1*( 0 R ), V**H *B*Q = D2*( 0 R ), !! where U, V and Q are unitary matrices. !! R is a nonsingular upper triangular matrix, and D1 !! and D2 are ``diagonal'' matrices, which are of the following !! structures: !! If M-K-L >= 0, !! K L !! D1 = K ( I 0 ) !! L ( 0 C ) !! M-K-L ( 0 0 ) !! K L !! D2 = L ( 0 S ) !! P-L ( 0 0 ) !! N-K-L K L !! ( 0 R ) = K ( 0 R11 R12 ) K !! L ( 0 0 R22 ) L !! where !! C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), !! S = diag( BETA(K+1), ... , BETA(K+L) ), !! C**2 + S**2 = I. !! R is stored in A(1:K+L,N-K-L+1:N) on exit. !! If M-K-L < 0, !! K M-K K+L-M !! D1 = K ( I 0 0 ) !! M-K ( 0 C 0 ) !! K M-K K+L-M !! D2 = M-K ( 0 S 0 ) !! K+L-M ( 0 0 I ) !! P-L ( 0 0 0 ) !! N-K-L K M-K K+L-M !! ( 0 R ) = K ( 0 R11 R12 R13 ) !! M-K ( 0 0 R22 R23 ) !! K+L-M ( 0 0 0 R33 ) !! where !! C = diag( ALPHA(K+1), ... , ALPHA(M) ), !! S = diag( BETA(K+1), ... , BETA(M) ), !! C**2 + S**2 = I. !! R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored !! ( 0 R22 R23 ) !! in B(M-K+1:L,N+M-K-L+1:N) on exit. !! The computation of the unitary transformation matrices U, V or Q !! is optional. These matrices may either be formed explicitly, or they !! may be postmultiplied into input matrices U1, V1, or Q1. alpha, beta, u, ldu, v, ldv,q, ldq, work, ncycle, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobq, jobu, jobv integer(${ik}$), intent(out) :: info, ncycle integer(${ik}$), intent(in) :: k, l, lda, ldb, ldq, ldu, ldv, m, n, p real(${ck}$), intent(in) :: tola, tolb ! Array Arguments real(${ck}$), intent(out) :: alpha(*), beta(*) complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), u(ldu,*), v(ldv,*) complex(${ck}$), intent(out) :: work(*) ! ===================================================================== ! Parameters integer(${ik}$), parameter :: maxit = 40_${ik}$ real(${ck}$), parameter :: hugenum = huge(zero) ! Local Scalars logical(lk) :: initq, initu, initv, upper, wantq, wantu, wantv integer(${ik}$) :: i, j, kcycle real(${ck}$) :: a1, a3, b1, b3, csq, csu, csv, error, gamma, rwk, ssmin complex(${ck}$) :: a2, b2, snq, snu, snv ! Intrinsic Functions ! Executable Statements ! decode and test the input parameters initu = stdlib_lsame( jobu, 'I' ) wantu = initu .or. stdlib_lsame( jobu, 'U' ) initv = stdlib_lsame( jobv, 'I' ) wantv = initv .or. stdlib_lsame( jobv, 'V' ) initq = stdlib_lsame( jobq, 'I' ) wantq = initq .or. stdlib_lsame( jobq, 'Q' ) info = 0_${ik}$ if( .not.( initu .or. wantu .or. stdlib_lsame( jobu, 'N' ) ) ) then info = -1_${ik}$ else if( .not.( initv .or. wantv .or. stdlib_lsame( jobv, 'N' ) ) ) then info = -2_${ik}$ else if( .not.( initq .or. wantq .or. stdlib_lsame( jobq, 'N' ) ) ) then info = -3_${ik}$ else if( m<0_${ik}$ ) then info = -4_${ik}$ else if( p<0_${ik}$ ) then info = -5_${ik}$ else if( n<0_${ik}$ ) then info = -6_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -10_${ik}$ else if( ldb<max( 1_${ik}$, p ) ) then info = -12_${ik}$ else if( ldu<1_${ik}$ .or. ( wantu .and. ldu<m ) ) then info = -18_${ik}$ else if( ldv<1_${ik}$ .or. ( wantv .and. ldv<p ) ) then info = -20_${ik}$ else if( ldq<1_${ik}$ .or. ( wantq .and. ldq<n ) ) then info = -22_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZTGSJA', -info ) return end if ! initialize u, v and q, if necessary if( initu )call stdlib${ii}$_${ci}$laset( 'FULL', m, m, czero, cone, u, ldu ) if( initv )call stdlib${ii}$_${ci}$laset( 'FULL', p, p, czero, cone, v, ldv ) if( initq )call stdlib${ii}$_${ci}$laset( 'FULL', n, n, czero, cone, q, ldq ) ! loop until convergence upper = .false. loop_40: do kcycle = 1, maxit upper = .not.upper loop_20: do i = 1, l - 1 loop_10: do j = i + 1, l a1 = zero a2 = czero a3 = zero if( k+i<=m )a1 = real( a( k+i, n-l+i ),KIND=${ck}$) if( k+j<=m )a3 = real( a( k+j, n-l+j ),KIND=${ck}$) b1 = real( b( i, n-l+i ),KIND=${ck}$) b3 = real( b( j, n-l+j ),KIND=${ck}$) if( upper ) then if( k+i<=m )a2 = a( k+i, n-l+j ) b2 = b( i, n-l+j ) else if( k+j<=m )a2 = a( k+j, n-l+i ) b2 = b( j, n-l+i ) end if call stdlib${ii}$_${ci}$lags2( upper, a1, a2, a3, b1, b2, b3, csu, snu,csv, snv, csq, & snq ) ! update (k+i)-th and (k+j)-th rows of matrix a: u**h *a if( k+j<=m )call stdlib${ii}$_${ci}$rot( l, a( k+j, n-l+1 ), lda, a( k+i, n-l+1 ),lda, & csu, conjg( snu ) ) ! update i-th and j-th rows of matrix b: v**h *b call stdlib${ii}$_${ci}$rot( l, b( j, n-l+1 ), ldb, b( i, n-l+1 ), ldb,csv, conjg( snv ) & ) ! update (n-l+i)-th and (n-l+j)-th columns of matrices ! a and b: a*q and b*q call stdlib${ii}$_${ci}$rot( min( k+l, m ), a( 1_${ik}$, n-l+j ), 1_${ik}$,a( 1_${ik}$, n-l+i ), 1_${ik}$, csq, snq ) call stdlib${ii}$_${ci}$rot( l, b( 1_${ik}$, n-l+j ), 1_${ik}$, b( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq ) if( upper ) then if( k+i<=m )a( k+i, n-l+j ) = czero b( i, n-l+j ) = czero else if( k+j<=m )a( k+j, n-l+i ) = czero b( j, n-l+i ) = czero end if ! ensure that the diagonal elements of a and b are real. if( k+i<=m )a( k+i, n-l+i ) = real( a( k+i, n-l+i ),KIND=${ck}$) if( k+j<=m )a( k+j, n-l+j ) = real( a( k+j, n-l+j ),KIND=${ck}$) b( i, n-l+i ) = real( b( i, n-l+i ),KIND=${ck}$) b( j, n-l+j ) = real( b( j, n-l+j ),KIND=${ck}$) ! update unitary matrices u, v, q, if desired. if( wantu .and. k+j<=m )call stdlib${ii}$_${ci}$rot( m, u( 1_${ik}$, k+j ), 1_${ik}$, u( 1_${ik}$, k+i ), 1_${ik}$, & csu,snu ) if( wantv )call stdlib${ii}$_${ci}$rot( p, v( 1_${ik}$, j ), 1_${ik}$, v( 1_${ik}$, i ), 1_${ik}$, csv, snv ) if( wantq )call stdlib${ii}$_${ci}$rot( n, q( 1_${ik}$, n-l+j ), 1_${ik}$, q( 1_${ik}$, n-l+i ), 1_${ik}$, csq,snq ) end do loop_10 end do loop_20 if( .not.upper ) then ! the matrices a13 and b13 were lower triangular at the start ! of the cycle, and are now upper triangular. ! convergence test: test the parallelism of the corresponding ! rows of a and b. error = zero do i = 1, min( l, m-k ) call stdlib${ii}$_${ci}$copy( l-i+1, a( k+i, n-l+i ), lda, work, 1_${ik}$ ) call stdlib${ii}$_${ci}$copy( l-i+1, b( i, n-l+i ), ldb, work( l+1 ), 1_${ik}$ ) call stdlib${ii}$_${ci}$lapll( l-i+1, work, 1_${ik}$, work( l+1 ), 1_${ik}$, ssmin ) error = max( error, ssmin ) end do if( abs( error )<=min( tola, tolb ) )go to 50 end if ! end of cycle loop end do loop_40 ! the algorithm has not converged after maxit cycles. info = 1_${ik}$ go to 100 50 continue ! if error <= min(tola,tolb), then the algorithm has converged. ! compute the generalized singular value pairs (alpha, beta), and ! set the triangular matrix r to array a. do i = 1, k alpha( i ) = one beta( i ) = zero end do do i = 1, min( l, m-k ) a1 = real( a( k+i, n-l+i ),KIND=${ck}$) b1 = real( b( i, n-l+i ),KIND=${ck}$) gamma = b1 / a1 if( (gamma<=hugenum).and.(gamma>=-hugenum) ) then if( gamma<zero ) then call stdlib${ii}$_${ci}$dscal( l-i+1, -one, b( i, n-l+i ), ldb ) if( wantv )call stdlib${ii}$_${ci}$dscal( p, -one, v( 1_${ik}$, i ), 1_${ik}$ ) end if call stdlib${ii}$_${c2ri(ci)}$lartg( abs( gamma ), one, beta( k+i ), alpha( k+i ),rwk ) if( alpha( k+i )>=beta( k+i ) ) then call stdlib${ii}$_${ci}$dscal( l-i+1, one / alpha( k+i ), a( k+i, n-l+i ),lda ) else call stdlib${ii}$_${ci}$dscal( l-i+1, one / beta( k+i ), b( i, n-l+i ),ldb ) call stdlib${ii}$_${ci}$copy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) end if else alpha( k+i ) = zero beta( k+i ) = one call stdlib${ii}$_${ci}$copy( l-i+1, b( i, n-l+i ), ldb, a( k+i, n-l+i ),lda ) end if end do ! post-assignment do i = m + 1, k + l alpha( i ) = zero beta( i ) = one end do if( k+l<n ) then do i = k + l + 1, n alpha( i ) = zero beta( i ) = zero end do end if 100 continue ncycle = kcycle return end subroutine stdlib${ii}$_${ci}$tgsja #:endif #:endfor pure module subroutine stdlib${ii}$_cungbr( vect, m, n, k, a, lda, tau, work, lwork, info ) !! CUNGBR generates one of the complex unitary matrices Q or P**H !! determined by CGEBRD when reducing a complex matrix A to bidiagonal !! form: A = Q * B * P**H. Q and P**H are defined as products of !! elementary reflectors H(i) or G(i) respectively. !! If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q !! is of order M: !! if m >= k, Q = H(1) H(2) . . . H(k) and CUNGBR returns the first n !! columns of Q, where m >= n >= k; !! if m < k, Q = H(1) H(2) . . . H(m-1) and CUNGBR returns Q as an !! M-by-M matrix. !! If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H !! is of order N: !! if k < n, P**H = G(k) . . . G(2) G(1) and CUNGBR returns the first m !! rows of P**H, where n >= m >= k; !! if k >= n, P**H = G(n-1) . . . G(2) G(1) and CUNGBR returns P**H as !! an N-by-N matrix. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: vect integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: k, lda, lwork, m, n ! Array Arguments complex(sp), intent(inout) :: a(lda,*) complex(sp), intent(in) :: tau(*) complex(sp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wantq integer(${ik}$) :: i, iinfo, j, lwkopt, mn ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ wantq = stdlib_lsame( vect, 'Q' ) mn = min( m, n ) lquery = ( lwork==-1_${ik}$ ) if( .not.wantq .and. .not.stdlib_lsame( vect, 'P' ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ .or. ( wantq .and. ( n>m .or. n<min( m,k ) ) ) .or. ( .not.wantq .and. ( & m>n .or. m<min( n, k ) ) ) ) then info = -3_${ik}$ else if( k<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( lwork<max( 1_${ik}$, mn ) .and. .not.lquery ) then info = -9_${ik}$ end if if( info==0_${ik}$ ) then work( 1_${ik}$ ) = 1_${ik}$ if( wantq ) then if( m>=k ) then call stdlib${ii}$_cungqr( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo ) else if( m>1_${ik}$ ) then call stdlib${ii}$_cungqr( m-1, m-1, m-1, a, lda, tau, work, -1_${ik}$,iinfo ) end if end if else if( k<n ) then call stdlib${ii}$_cunglq( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo ) else if( n>1_${ik}$ ) then call stdlib${ii}$_cunglq( n-1, n-1, n-1, a, lda, tau, work, -1_${ik}$,iinfo ) end if end if end if lwkopt = real( work( 1_${ik}$ ),KIND=sp) lwkopt = max (lwkopt, mn) end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CUNGBR', -info ) return else if( lquery ) then work( 1_${ik}$ ) = lwkopt return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then work( 1_${ik}$ ) = 1_${ik}$ return end if if( wantq ) then ! form q, determined by a call to stdlib${ii}$_cgebrd to reduce an m-by-k ! matrix if( m>=k ) then ! if m >= k, assume m >= n >= k call stdlib${ii}$_cungqr( m, n, k, a, lda, tau, work, lwork, iinfo ) else ! if m < k, assume m = n ! shift the vectors which define the elementary reflectors cone ! column to the right, and set the first row and column of q ! to those of the unit matrix do j = m, 2, -1 a( 1_${ik}$, j ) = czero do i = j + 1, m a( i, j ) = a( i, j-1 ) end do end do a( 1_${ik}$, 1_${ik}$ ) = cone do i = 2, m a( i, 1_${ik}$ ) = czero end do if( m>1_${ik}$ ) then ! form q(2:m,2:m) call stdlib${ii}$_cungqr( m-1, m-1, m-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo ) end if end if else ! form p**h, determined by a call to stdlib${ii}$_cgebrd to reduce a k-by-n ! matrix if( k<n ) then ! if k < n, assume k <= m <= n call stdlib${ii}$_cunglq( m, n, k, a, lda, tau, work, lwork, iinfo ) else ! if k >= n, assume m = n ! shift the vectors which define the elementary reflectors cone ! row downward, and set the first row and column of p**h to ! those of the unit matrix a( 1_${ik}$, 1_${ik}$ ) = cone do i = 2, n a( i, 1_${ik}$ ) = czero end do do j = 2, n do i = j - 1, 2, -1 a( i, j ) = a( i-1, j ) end do a( 1_${ik}$, j ) = czero end do if( n>1_${ik}$ ) then ! form p**h(2:n,2:n) call stdlib${ii}$_cunglq( n-1, n-1, n-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo ) end if end if end if work( 1_${ik}$ ) = lwkopt return end subroutine stdlib${ii}$_cungbr pure module subroutine stdlib${ii}$_zungbr( vect, m, n, k, a, lda, tau, work, lwork, info ) !! ZUNGBR generates one of the complex unitary matrices Q or P**H !! determined by ZGEBRD when reducing a complex matrix A to bidiagonal !! form: A = Q * B * P**H. Q and P**H are defined as products of !! elementary reflectors H(i) or G(i) respectively. !! If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q !! is of order M: !! if m >= k, Q = H(1) H(2) . . . H(k) and ZUNGBR returns the first n !! columns of Q, where m >= n >= k; !! if m < k, Q = H(1) H(2) . . . H(m-1) and ZUNGBR returns Q as an !! M-by-M matrix. !! If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H !! is of order N: !! if k < n, P**H = G(k) . . . G(2) G(1) and ZUNGBR returns the first m !! rows of P**H, where n >= m >= k; !! if k >= n, P**H = G(n-1) . . . G(2) G(1) and ZUNGBR returns P**H as !! an N-by-N matrix. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: vect integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: k, lda, lwork, m, n ! Array Arguments complex(dp), intent(inout) :: a(lda,*) complex(dp), intent(in) :: tau(*) complex(dp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wantq integer(${ik}$) :: i, iinfo, j, lwkopt, mn ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ wantq = stdlib_lsame( vect, 'Q' ) mn = min( m, n ) lquery = ( lwork==-1_${ik}$ ) if( .not.wantq .and. .not.stdlib_lsame( vect, 'P' ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ .or. ( wantq .and. ( n>m .or. n<min( m,k ) ) ) .or. ( .not.wantq .and. ( & m>n .or. m<min( n, k ) ) ) ) then info = -3_${ik}$ else if( k<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( lwork<max( 1_${ik}$, mn ) .and. .not.lquery ) then info = -9_${ik}$ end if if( info==0_${ik}$ ) then work( 1_${ik}$ ) = 1_${ik}$ if( wantq ) then if( m>=k ) then call stdlib${ii}$_zungqr( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo ) else if( m>1_${ik}$ ) then call stdlib${ii}$_zungqr( m-1, m-1, m-1, a, lda, tau, work, -1_${ik}$,iinfo ) end if end if else if( k<n ) then call stdlib${ii}$_zunglq( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo ) else if( n>1_${ik}$ ) then call stdlib${ii}$_zunglq( n-1, n-1, n-1, a, lda, tau, work, -1_${ik}$,iinfo ) end if end if end if lwkopt = real( work( 1_${ik}$ ),KIND=dp) lwkopt = max (lwkopt, mn) end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNGBR', -info ) return else if( lquery ) then work( 1_${ik}$ ) = lwkopt return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then work( 1_${ik}$ ) = 1_${ik}$ return end if if( wantq ) then ! form q, determined by a call to stdlib${ii}$_zgebrd to reduce an m-by-k ! matrix if( m>=k ) then ! if m >= k, assume m >= n >= k call stdlib${ii}$_zungqr( m, n, k, a, lda, tau, work, lwork, iinfo ) else ! if m < k, assume m = n ! shift the vectors which define the elementary reflectors cone ! column to the right, and set the first row and column of q ! to those of the unit matrix do j = m, 2, -1 a( 1_${ik}$, j ) = czero do i = j + 1, m a( i, j ) = a( i, j-1 ) end do end do a( 1_${ik}$, 1_${ik}$ ) = cone do i = 2, m a( i, 1_${ik}$ ) = czero end do if( m>1_${ik}$ ) then ! form q(2:m,2:m) call stdlib${ii}$_zungqr( m-1, m-1, m-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo ) end if end if else ! form p**h, determined by a call to stdlib${ii}$_zgebrd to reduce a k-by-n ! matrix if( k<n ) then ! if k < n, assume k <= m <= n call stdlib${ii}$_zunglq( m, n, k, a, lda, tau, work, lwork, iinfo ) else ! if k >= n, assume m = n ! shift the vectors which define the elementary reflectors cone ! row downward, and set the first row and column of p**h to ! those of the unit matrix a( 1_${ik}$, 1_${ik}$ ) = cone do i = 2, n a( i, 1_${ik}$ ) = czero end do do j = 2, n do i = j - 1, 2, -1 a( i, j ) = a( i-1, j ) end do a( 1_${ik}$, j ) = czero end do if( n>1_${ik}$ ) then ! form p**h(2:n,2:n) call stdlib${ii}$_zunglq( n-1, n-1, n-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo ) end if end if end if work( 1_${ik}$ ) = lwkopt return end subroutine stdlib${ii}$_zungbr #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$ungbr( vect, m, n, k, a, lda, tau, work, lwork, info ) !! ZUNGBR: generates one of the complex unitary matrices Q or P**H !! determined by ZGEBRD when reducing a complex matrix A to bidiagonal !! form: A = Q * B * P**H. Q and P**H are defined as products of !! elementary reflectors H(i) or G(i) respectively. !! If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q !! is of order M: !! if m >= k, Q = H(1) H(2) . . . H(k) and ZUNGBR returns the first n !! columns of Q, where m >= n >= k; !! if m < k, Q = H(1) H(2) . . . H(m-1) and ZUNGBR returns Q as an !! M-by-M matrix. !! If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H !! is of order N: !! if k < n, P**H = G(k) . . . G(2) G(1) and ZUNGBR returns the first m !! rows of P**H, where n >= m >= k; !! if k >= n, P**H = G(n-1) . . . G(2) G(1) and ZUNGBR returns P**H as !! an N-by-N matrix. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: vect integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: k, lda, lwork, m, n ! Array Arguments complex(${ck}$), intent(inout) :: a(lda,*) complex(${ck}$), intent(in) :: tau(*) complex(${ck}$), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wantq integer(${ik}$) :: i, iinfo, j, lwkopt, mn ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ wantq = stdlib_lsame( vect, 'Q' ) mn = min( m, n ) lquery = ( lwork==-1_${ik}$ ) if( .not.wantq .and. .not.stdlib_lsame( vect, 'P' ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ .or. ( wantq .and. ( n>m .or. n<min( m,k ) ) ) .or. ( .not.wantq .and. ( & m>n .or. m<min( n, k ) ) ) ) then info = -3_${ik}$ else if( k<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( lwork<max( 1_${ik}$, mn ) .and. .not.lquery ) then info = -9_${ik}$ end if if( info==0_${ik}$ ) then work( 1_${ik}$ ) = 1_${ik}$ if( wantq ) then if( m>=k ) then call stdlib${ii}$_${ci}$ungqr( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo ) else if( m>1_${ik}$ ) then call stdlib${ii}$_${ci}$ungqr( m-1, m-1, m-1, a, lda, tau, work, -1_${ik}$,iinfo ) end if end if else if( k<n ) then call stdlib${ii}$_${ci}$unglq( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo ) else if( n>1_${ik}$ ) then call stdlib${ii}$_${ci}$unglq( n-1, n-1, n-1, a, lda, tau, work, -1_${ik}$,iinfo ) end if end if end if lwkopt = real( work( 1_${ik}$ ),KIND=${ck}$) lwkopt = max (lwkopt, mn) end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNGBR', -info ) return else if( lquery ) then work( 1_${ik}$ ) = lwkopt return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then work( 1_${ik}$ ) = 1_${ik}$ return end if if( wantq ) then ! form q, determined by a call to stdlib${ii}$_${ci}$gebrd to reduce an m-by-k ! matrix if( m>=k ) then ! if m >= k, assume m >= n >= k call stdlib${ii}$_${ci}$ungqr( m, n, k, a, lda, tau, work, lwork, iinfo ) else ! if m < k, assume m = n ! shift the vectors which define the elementary reflectors cone ! column to the right, and set the first row and column of q ! to those of the unit matrix do j = m, 2, -1 a( 1_${ik}$, j ) = czero do i = j + 1, m a( i, j ) = a( i, j-1 ) end do end do a( 1_${ik}$, 1_${ik}$ ) = cone do i = 2, m a( i, 1_${ik}$ ) = czero end do if( m>1_${ik}$ ) then ! form q(2:m,2:m) call stdlib${ii}$_${ci}$ungqr( m-1, m-1, m-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo ) end if end if else ! form p**h, determined by a call to stdlib${ii}$_${ci}$gebrd to reduce a k-by-n ! matrix if( k<n ) then ! if k < n, assume k <= m <= n call stdlib${ii}$_${ci}$unglq( m, n, k, a, lda, tau, work, lwork, iinfo ) else ! if k >= n, assume m = n ! shift the vectors which define the elementary reflectors cone ! row downward, and set the first row and column of p**h to ! those of the unit matrix a( 1_${ik}$, 1_${ik}$ ) = cone do i = 2, n a( i, 1_${ik}$ ) = czero end do do j = 2, n do i = j - 1, 2, -1 a( i, j ) = a( i-1, j ) end do a( 1_${ik}$, j ) = czero end do if( n>1_${ik}$ ) then ! form p**h(2:n,2:n) call stdlib${ii}$_${ci}$unglq( n-1, n-1, n-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo ) end if end if end if work( 1_${ik}$ ) = lwkopt return end subroutine stdlib${ii}$_${ci}$ungbr #:endif #:endfor pure module subroutine stdlib${ii}$_sorgbr( vect, m, n, k, a, lda, tau, work, lwork, info ) !! SORGBR generates one of the real orthogonal matrices Q or P**T !! determined by SGEBRD when reducing a real matrix A to bidiagonal !! form: A = Q * B * P**T. Q and P**T are defined as products of !! elementary reflectors H(i) or G(i) respectively. !! If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q !! is of order M: !! if m >= k, Q = H(1) H(2) . . . H(k) and SORGBR returns the first n !! columns of Q, where m >= n >= k; !! if m < k, Q = H(1) H(2) . . . H(m-1) and SORGBR returns Q as an !! M-by-M matrix. !! If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T !! is of order N: !! if k < n, P**T = G(k) . . . G(2) G(1) and SORGBR returns the first m !! rows of P**T, where n >= m >= k; !! if k >= n, P**T = G(n-1) . . . G(2) G(1) and SORGBR returns P**T as !! an N-by-N matrix. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: vect integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: k, lda, lwork, m, n ! Array Arguments real(sp), intent(inout) :: a(lda,*) real(sp), intent(in) :: tau(*) real(sp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wantq integer(${ik}$) :: i, iinfo, j, lwkopt, mn ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ wantq = stdlib_lsame( vect, 'Q' ) mn = min( m, n ) lquery = ( lwork==-1_${ik}$ ) if( .not.wantq .and. .not.stdlib_lsame( vect, 'P' ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ .or. ( wantq .and. ( n>m .or. n<min( m,k ) ) ) .or. ( .not.wantq .and. ( & m>n .or. m<min( n, k ) ) ) ) then info = -3_${ik}$ else if( k<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( lwork<max( 1_${ik}$, mn ) .and. .not.lquery ) then info = -9_${ik}$ end if if( info==0_${ik}$ ) then work( 1_${ik}$ ) = 1_${ik}$ if( wantq ) then if( m>=k ) then call stdlib${ii}$_sorgqr( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo ) else if( m>1_${ik}$ ) then call stdlib${ii}$_sorgqr( m-1, m-1, m-1, a, lda, tau, work, -1_${ik}$,iinfo ) end if end if else if( k<n ) then call stdlib${ii}$_sorglq( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo ) else if( n>1_${ik}$ ) then call stdlib${ii}$_sorglq( n-1, n-1, n-1, a, lda, tau, work, -1_${ik}$,iinfo ) end if end if end if lwkopt = work( 1_${ik}$ ) lwkopt = max (lwkopt, mn) end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SORGBR', -info ) return else if( lquery ) then work( 1_${ik}$ ) = lwkopt return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then work( 1_${ik}$ ) = 1_${ik}$ return end if if( wantq ) then ! form q, determined by a call to stdlib${ii}$_sgebrd to reduce an m-by-k ! matrix if( m>=k ) then ! if m >= k, assume m >= n >= k call stdlib${ii}$_sorgqr( m, n, k, a, lda, tau, work, lwork, iinfo ) else ! if m < k, assume m = n ! shift the vectors which define the elementary reflectors one ! column to the right, and set the first row and column of q ! to those of the unit matrix do j = m, 2, -1 a( 1_${ik}$, j ) = zero do i = j + 1, m a( i, j ) = a( i, j-1 ) end do end do a( 1_${ik}$, 1_${ik}$ ) = one do i = 2, m a( i, 1_${ik}$ ) = zero end do if( m>1_${ik}$ ) then ! form q(2:m,2:m) call stdlib${ii}$_sorgqr( m-1, m-1, m-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo ) end if end if else ! form p**t, determined by a call to stdlib${ii}$_sgebrd to reduce a k-by-n ! matrix if( k<n ) then ! if k < n, assume k <= m <= n call stdlib${ii}$_sorglq( m, n, k, a, lda, tau, work, lwork, iinfo ) else ! if k >= n, assume m = n ! shift the vectors which define the elementary reflectors one ! row downward, and set the first row and column of p**t to ! those of the unit matrix a( 1_${ik}$, 1_${ik}$ ) = one do i = 2, n a( i, 1_${ik}$ ) = zero end do do j = 2, n do i = j - 1, 2, -1 a( i, j ) = a( i-1, j ) end do a( 1_${ik}$, j ) = zero end do if( n>1_${ik}$ ) then ! form p**t(2:n,2:n) call stdlib${ii}$_sorglq( n-1, n-1, n-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo ) end if end if end if work( 1_${ik}$ ) = lwkopt return end subroutine stdlib${ii}$_sorgbr pure module subroutine stdlib${ii}$_dorgbr( vect, m, n, k, a, lda, tau, work, lwork, info ) !! DORGBR generates one of the real orthogonal matrices Q or P**T !! determined by DGEBRD when reducing a real matrix A to bidiagonal !! form: A = Q * B * P**T. Q and P**T are defined as products of !! elementary reflectors H(i) or G(i) respectively. !! If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q !! is of order M: !! if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n !! columns of Q, where m >= n >= k; !! if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an !! M-by-M matrix. !! If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T !! is of order N: !! if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m !! rows of P**T, where n >= m >= k; !! if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as !! an N-by-N matrix. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: vect integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: k, lda, lwork, m, n ! Array Arguments real(dp), intent(inout) :: a(lda,*) real(dp), intent(in) :: tau(*) real(dp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wantq integer(${ik}$) :: i, iinfo, j, lwkopt, mn ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ wantq = stdlib_lsame( vect, 'Q' ) mn = min( m, n ) lquery = ( lwork==-1_${ik}$ ) if( .not.wantq .and. .not.stdlib_lsame( vect, 'P' ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ .or. ( wantq .and. ( n>m .or. n<min( m,k ) ) ) .or. ( .not.wantq .and. ( & m>n .or. m<min( n, k ) ) ) ) then info = -3_${ik}$ else if( k<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( lwork<max( 1_${ik}$, mn ) .and. .not.lquery ) then info = -9_${ik}$ end if if( info==0_${ik}$ ) then work( 1_${ik}$ ) = 1_${ik}$ if( wantq ) then if( m>=k ) then call stdlib${ii}$_dorgqr( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo ) else if( m>1_${ik}$ ) then call stdlib${ii}$_dorgqr( m-1, m-1, m-1, a, lda, tau, work, -1_${ik}$,iinfo ) end if end if else if( k<n ) then call stdlib${ii}$_dorglq( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo ) else if( n>1_${ik}$ ) then call stdlib${ii}$_dorglq( n-1, n-1, n-1, a, lda, tau, work, -1_${ik}$,iinfo ) end if end if end if lwkopt = work( 1_${ik}$ ) lwkopt = max (lwkopt, mn) end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DORGBR', -info ) return else if( lquery ) then work( 1_${ik}$ ) = lwkopt return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then work( 1_${ik}$ ) = 1_${ik}$ return end if if( wantq ) then ! form q, determined by a call to stdlib${ii}$_dgebrd to reduce an m-by-k ! matrix if( m>=k ) then ! if m >= k, assume m >= n >= k call stdlib${ii}$_dorgqr( m, n, k, a, lda, tau, work, lwork, iinfo ) else ! if m < k, assume m = n ! shift the vectors which define the elementary reflectors one ! column to the right, and set the first row and column of q ! to those of the unit matrix do j = m, 2, -1 a( 1_${ik}$, j ) = zero do i = j + 1, m a( i, j ) = a( i, j-1 ) end do end do a( 1_${ik}$, 1_${ik}$ ) = one do i = 2, m a( i, 1_${ik}$ ) = zero end do if( m>1_${ik}$ ) then ! form q(2:m,2:m) call stdlib${ii}$_dorgqr( m-1, m-1, m-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo ) end if end if else ! form p**t, determined by a call to stdlib${ii}$_dgebrd to reduce a k-by-n ! matrix if( k<n ) then ! if k < n, assume k <= m <= n call stdlib${ii}$_dorglq( m, n, k, a, lda, tau, work, lwork, iinfo ) else ! if k >= n, assume m = n ! shift the vectors which define the elementary reflectors one ! row downward, and set the first row and column of p**t to ! those of the unit matrix a( 1_${ik}$, 1_${ik}$ ) = one do i = 2, n a( i, 1_${ik}$ ) = zero end do do j = 2, n do i = j - 1, 2, -1 a( i, j ) = a( i-1, j ) end do a( 1_${ik}$, j ) = zero end do if( n>1_${ik}$ ) then ! form p**t(2:n,2:n) call stdlib${ii}$_dorglq( n-1, n-1, n-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo ) end if end if end if work( 1_${ik}$ ) = lwkopt return end subroutine stdlib${ii}$_dorgbr #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$orgbr( vect, m, n, k, a, lda, tau, work, lwork, info ) !! DORGBR: generates one of the real orthogonal matrices Q or P**T !! determined by DGEBRD when reducing a real matrix A to bidiagonal !! form: A = Q * B * P**T. Q and P**T are defined as products of !! elementary reflectors H(i) or G(i) respectively. !! If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q !! is of order M: !! if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n !! columns of Q, where m >= n >= k; !! if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an !! M-by-M matrix. !! If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T !! is of order N: !! if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m !! rows of P**T, where n >= m >= k; !! if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as !! an N-by-N matrix. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: vect integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: k, lda, lwork, m, n ! Array Arguments real(${rk}$), intent(inout) :: a(lda,*) real(${rk}$), intent(in) :: tau(*) real(${rk}$), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: lquery, wantq integer(${ik}$) :: i, iinfo, j, lwkopt, mn ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ wantq = stdlib_lsame( vect, 'Q' ) mn = min( m, n ) lquery = ( lwork==-1_${ik}$ ) if( .not.wantq .and. .not.stdlib_lsame( vect, 'P' ) ) then info = -1_${ik}$ else if( m<0_${ik}$ ) then info = -2_${ik}$ else if( n<0_${ik}$ .or. ( wantq .and. ( n>m .or. n<min( m,k ) ) ) .or. ( .not.wantq .and. ( & m>n .or. m<min( n, k ) ) ) ) then info = -3_${ik}$ else if( k<0_${ik}$ ) then info = -4_${ik}$ else if( lda<max( 1_${ik}$, m ) ) then info = -6_${ik}$ else if( lwork<max( 1_${ik}$, mn ) .and. .not.lquery ) then info = -9_${ik}$ end if if( info==0_${ik}$ ) then work( 1_${ik}$ ) = 1_${ik}$ if( wantq ) then if( m>=k ) then call stdlib${ii}$_${ri}$orgqr( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo ) else if( m>1_${ik}$ ) then call stdlib${ii}$_${ri}$orgqr( m-1, m-1, m-1, a, lda, tau, work, -1_${ik}$,iinfo ) end if end if else if( k<n ) then call stdlib${ii}$_${ri}$orglq( m, n, k, a, lda, tau, work, -1_${ik}$, iinfo ) else if( n>1_${ik}$ ) then call stdlib${ii}$_${ri}$orglq( n-1, n-1, n-1, a, lda, tau, work, -1_${ik}$,iinfo ) end if end if end if lwkopt = work( 1_${ik}$ ) lwkopt = max (lwkopt, mn) end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DORGBR', -info ) return else if( lquery ) then work( 1_${ik}$ ) = lwkopt return end if ! quick return if possible if( m==0_${ik}$ .or. n==0_${ik}$ ) then work( 1_${ik}$ ) = 1_${ik}$ return end if if( wantq ) then ! form q, determined by a call to stdlib${ii}$_${ri}$gebrd to reduce an m-by-k ! matrix if( m>=k ) then ! if m >= k, assume m >= n >= k call stdlib${ii}$_${ri}$orgqr( m, n, k, a, lda, tau, work, lwork, iinfo ) else ! if m < k, assume m = n ! shift the vectors which define the elementary reflectors one ! column to the right, and set the first row and column of q ! to those of the unit matrix do j = m, 2, -1 a( 1_${ik}$, j ) = zero do i = j + 1, m a( i, j ) = a( i, j-1 ) end do end do a( 1_${ik}$, 1_${ik}$ ) = one do i = 2, m a( i, 1_${ik}$ ) = zero end do if( m>1_${ik}$ ) then ! form q(2:m,2:m) call stdlib${ii}$_${ri}$orgqr( m-1, m-1, m-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo ) end if end if else ! form p**t, determined by a call to stdlib${ii}$_${ri}$gebrd to reduce a k-by-n ! matrix if( k<n ) then ! if k < n, assume k <= m <= n call stdlib${ii}$_${ri}$orglq( m, n, k, a, lda, tau, work, lwork, iinfo ) else ! if k >= n, assume m = n ! shift the vectors which define the elementary reflectors one ! row downward, and set the first row and column of p**t to ! those of the unit matrix a( 1_${ik}$, 1_${ik}$ ) = one do i = 2, n a( i, 1_${ik}$ ) = zero end do do j = 2, n do i = j - 1, 2, -1 a( i, j ) = a( i-1, j ) end do a( 1_${ik}$, j ) = zero end do if( n>1_${ik}$ ) then ! form p**t(2:n,2:n) call stdlib${ii}$_${ri}$orglq( n-1, n-1, n-1, a( 2_${ik}$, 2_${ik}$ ), lda, tau, work,lwork, iinfo ) end if end if end if work( 1_${ik}$ ) = lwkopt return end subroutine stdlib${ii}$_${ri}$orgbr #:endif #:endfor pure module subroutine stdlib${ii}$_cunmbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, lwork, & !! If VECT = 'Q', CUNMBR: overwrites the general complex M-by-N matrix C !! with !! SIDE = 'L' SIDE = 'R' !! TRANS = 'N': Q * C C * Q !! TRANS = 'C': Q**H * C C * Q**H !! If VECT = 'P', CUNMBR overwrites the general complex M-by-N matrix C !! with !! SIDE = 'L' SIDE = 'R' !! TRANS = 'N': P * C C * P !! TRANS = 'C': P**H * C C * P**H !! Here Q and P**H are the unitary matrices determined by CGEBRD when !! reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q !! and P**H are defined as products of elementary reflectors H(i) and !! G(i) respectively. !! Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the !! order of the unitary matrix Q or P**H that is applied. !! If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: !! if nq >= k, Q = H(1) H(2) . . . H(k); !! if nq < k, Q = H(1) H(2) . . . H(nq-1). !! If VECT = 'P', A is assumed to have been a K-by-NQ matrix: !! if k < nq, P = G(1) G(2) . . . G(k); !! if k >= nq, P = G(1) G(2) . . . G(nq-1). info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: side, trans, vect integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n ! Array Arguments complex(sp), intent(inout) :: a(lda,*), c(ldc,*) complex(sp), intent(in) :: tau(*) complex(sp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: applyq, left, lquery, notran character :: transt integer(${ik}$) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ applyq = stdlib_lsame( vect, 'Q' ) left = stdlib_lsame( side, 'L' ) notran = stdlib_lsame( trans, 'N' ) lquery = ( lwork==-1_${ik}$ ) ! nq is the order of q or p and nw is the minimum dimension of work if( left ) then nq = m nw = max( 1_${ik}$, n ) else nq = n nw = max( 1_${ik}$, m ) end if if( .not.applyq .and. .not.stdlib_lsame( vect, 'P' ) ) then info = -1_${ik}$ else if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then info = -2_${ik}$ else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then info = -3_${ik}$ else if( m<0_${ik}$ ) then info = -4_${ik}$ else if( n<0_${ik}$ ) then info = -5_${ik}$ else if( k<0_${ik}$ ) then info = -6_${ik}$ else if( ( applyq .and. lda<max( 1_${ik}$, nq ) ) .or.( .not.applyq .and. lda<max( 1_${ik}$, min( nq,& k ) ) ) )then info = -8_${ik}$ else if( ldc<max( 1_${ik}$, m ) ) then info = -11_${ik}$ else if( lwork<nw .and. .not.lquery ) then info = -13_${ik}$ end if if( info==0_${ik}$ ) then if( m>0_${ik}$ .and. n>0_${ik}$ ) then if( applyq ) then if( left ) then nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', side // trans, m-1, n, m-1,-1_${ik}$ ) else nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMQR', side // trans, m, n-1, n-1,-1_${ik}$ ) end if else if( left ) then nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMLQ', side // trans, m-1, n, m-1,-1_${ik}$ ) else nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'CUNMLQ', side // trans, m, n-1, n-1,-1_${ik}$ ) end if end if lwkopt = nw*nb else lwkopt = 1_${ik}$ end if work( 1_${ik}$ ) = lwkopt end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CUNMBR', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0 .or. n==0 )return if( applyq ) then ! apply q if( nq>=k ) then ! q was determined by a call to stdlib${ii}$_cgebrd with nq >= k call stdlib${ii}$_cunmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, iinfo & ) else if( nq>1_${ik}$ ) then ! q was determined by a call to stdlib${ii}$_cgebrd with nq < k if( left ) then mi = m - 1_${ik}$ ni = n i1 = 2_${ik}$ i2 = 1_${ik}$ else mi = m ni = n - 1_${ik}$ i1 = 1_${ik}$ i2 = 2_${ik}$ end if call stdlib${ii}$_cunmqr( side, trans, mi, ni, nq-1, a( 2_${ik}$, 1_${ik}$ ), lda, tau,c( i1, i2 ), & ldc, work, lwork, iinfo ) end if else ! apply p if( notran ) then transt = 'C' else transt = 'N' end if if( nq>k ) then ! p was determined by a call to stdlib${ii}$_cgebrd with nq > k call stdlib${ii}$_cunmlq( side, transt, m, n, k, a, lda, tau, c, ldc,work, lwork, & iinfo ) else if( nq>1_${ik}$ ) then ! p was determined by a call to stdlib${ii}$_cgebrd with nq <= k if( left ) then mi = m - 1_${ik}$ ni = n i1 = 2_${ik}$ i2 = 1_${ik}$ else mi = m ni = n - 1_${ik}$ i1 = 1_${ik}$ i2 = 2_${ik}$ end if call stdlib${ii}$_cunmlq( side, transt, mi, ni, nq-1, a( 1_${ik}$, 2_${ik}$ ), lda,tau, c( i1, i2 ), & ldc, work, lwork, iinfo ) end if end if work( 1_${ik}$ ) = lwkopt return end subroutine stdlib${ii}$_cunmbr pure module subroutine stdlib${ii}$_zunmbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, lwork, & !! If VECT = 'Q', ZUNMBR: overwrites the general complex M-by-N matrix C !! with !! SIDE = 'L' SIDE = 'R' !! TRANS = 'N': Q * C C * Q !! TRANS = 'C': Q**H * C C * Q**H !! If VECT = 'P', ZUNMBR overwrites the general complex M-by-N matrix C !! with !! SIDE = 'L' SIDE = 'R' !! TRANS = 'N': P * C C * P !! TRANS = 'C': P**H * C C * P**H !! Here Q and P**H are the unitary matrices determined by ZGEBRD when !! reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q !! and P**H are defined as products of elementary reflectors H(i) and !! G(i) respectively. !! Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the !! order of the unitary matrix Q or P**H that is applied. !! If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: !! if nq >= k, Q = H(1) H(2) . . . H(k); !! if nq < k, Q = H(1) H(2) . . . H(nq-1). !! If VECT = 'P', A is assumed to have been a K-by-NQ matrix: !! if k < nq, P = G(1) G(2) . . . G(k); !! if k >= nq, P = G(1) G(2) . . . G(nq-1). info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: side, trans, vect integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n ! Array Arguments complex(dp), intent(inout) :: a(lda,*), c(ldc,*) complex(dp), intent(in) :: tau(*) complex(dp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: applyq, left, lquery, notran character :: transt integer(${ik}$) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ applyq = stdlib_lsame( vect, 'Q' ) left = stdlib_lsame( side, 'L' ) notran = stdlib_lsame( trans, 'N' ) lquery = ( lwork==-1_${ik}$ ) ! nq is the order of q or p and nw is the minimum dimension of work if( left ) then nq = m nw = max( 1_${ik}$, n ) else nq = n nw = max( 1_${ik}$, m ) end if if( .not.applyq .and. .not.stdlib_lsame( vect, 'P' ) ) then info = -1_${ik}$ else if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then info = -2_${ik}$ else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then info = -3_${ik}$ else if( m<0_${ik}$ ) then info = -4_${ik}$ else if( n<0_${ik}$ ) then info = -5_${ik}$ else if( k<0_${ik}$ ) then info = -6_${ik}$ else if( ( applyq .and. lda<max( 1_${ik}$, nq ) ) .or.( .not.applyq .and. lda<max( 1_${ik}$, min( nq,& k ) ) ) )then info = -8_${ik}$ else if( ldc<max( 1_${ik}$, m ) ) then info = -11_${ik}$ else if( lwork<nw .and. .not.lquery ) then info = -13_${ik}$ end if if( info==0_${ik}$ ) then if( m>0_${ik}$ .and. n>0_${ik}$ ) then if( applyq ) then if( left ) then nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', side // trans, m-1, n, m-1,-1_${ik}$ ) else nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', side // trans, m, n-1, n-1,-1_${ik}$ ) end if else if( left ) then nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMLQ', side // trans, m-1, n, m-1,-1_${ik}$ ) else nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMLQ', side // trans, m, n-1, n-1,-1_${ik}$ ) end if end if lwkopt = nw*nb else lwkopt = 1_${ik}$ end if work( 1_${ik}$ ) = lwkopt end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNMBR', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0 .or. n==0 )return if( applyq ) then ! apply q if( nq>=k ) then ! q was determined by a call to stdlib${ii}$_zgebrd with nq >= k call stdlib${ii}$_zunmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, iinfo & ) else if( nq>1_${ik}$ ) then ! q was determined by a call to stdlib${ii}$_zgebrd with nq < k if( left ) then mi = m - 1_${ik}$ ni = n i1 = 2_${ik}$ i2 = 1_${ik}$ else mi = m ni = n - 1_${ik}$ i1 = 1_${ik}$ i2 = 2_${ik}$ end if call stdlib${ii}$_zunmqr( side, trans, mi, ni, nq-1, a( 2_${ik}$, 1_${ik}$ ), lda, tau,c( i1, i2 ), & ldc, work, lwork, iinfo ) end if else ! apply p if( notran ) then transt = 'C' else transt = 'N' end if if( nq>k ) then ! p was determined by a call to stdlib${ii}$_zgebrd with nq > k call stdlib${ii}$_zunmlq( side, transt, m, n, k, a, lda, tau, c, ldc,work, lwork, & iinfo ) else if( nq>1_${ik}$ ) then ! p was determined by a call to stdlib${ii}$_zgebrd with nq <= k if( left ) then mi = m - 1_${ik}$ ni = n i1 = 2_${ik}$ i2 = 1_${ik}$ else mi = m ni = n - 1_${ik}$ i1 = 1_${ik}$ i2 = 2_${ik}$ end if call stdlib${ii}$_zunmlq( side, transt, mi, ni, nq-1, a( 1_${ik}$, 2_${ik}$ ), lda,tau, c( i1, i2 ), & ldc, work, lwork, iinfo ) end if end if work( 1_${ik}$ ) = lwkopt return end subroutine stdlib${ii}$_zunmbr #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$unmbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, lwork, & !! If VECT = 'Q', ZUNMBR: overwrites the general complex M-by-N matrix C !! with !! SIDE = 'L' SIDE = 'R' !! TRANS = 'N': Q * C C * Q !! TRANS = 'C': Q**H * C C * Q**H !! If VECT = 'P', ZUNMBR overwrites the general complex M-by-N matrix C !! with !! SIDE = 'L' SIDE = 'R' !! TRANS = 'N': P * C C * P !! TRANS = 'C': P**H * C C * P**H !! Here Q and P**H are the unitary matrices determined by ZGEBRD when !! reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q !! and P**H are defined as products of elementary reflectors H(i) and !! G(i) respectively. !! Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the !! order of the unitary matrix Q or P**H that is applied. !! If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: !! if nq >= k, Q = H(1) H(2) . . . H(k); !! if nq < k, Q = H(1) H(2) . . . H(nq-1). !! If VECT = 'P', A is assumed to have been a K-by-NQ matrix: !! if k < nq, P = G(1) G(2) . . . G(k); !! if k >= nq, P = G(1) G(2) . . . G(nq-1). info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: side, trans, vect integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n ! Array Arguments complex(${ck}$), intent(inout) :: a(lda,*), c(ldc,*) complex(${ck}$), intent(in) :: tau(*) complex(${ck}$), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: applyq, left, lquery, notran character :: transt integer(${ik}$) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ applyq = stdlib_lsame( vect, 'Q' ) left = stdlib_lsame( side, 'L' ) notran = stdlib_lsame( trans, 'N' ) lquery = ( lwork==-1_${ik}$ ) ! nq is the order of q or p and nw is the minimum dimension of work if( left ) then nq = m nw = max( 1_${ik}$, n ) else nq = n nw = max( 1_${ik}$, m ) end if if( .not.applyq .and. .not.stdlib_lsame( vect, 'P' ) ) then info = -1_${ik}$ else if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then info = -2_${ik}$ else if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then info = -3_${ik}$ else if( m<0_${ik}$ ) then info = -4_${ik}$ else if( n<0_${ik}$ ) then info = -5_${ik}$ else if( k<0_${ik}$ ) then info = -6_${ik}$ else if( ( applyq .and. lda<max( 1_${ik}$, nq ) ) .or.( .not.applyq .and. lda<max( 1_${ik}$, min( nq,& k ) ) ) )then info = -8_${ik}$ else if( ldc<max( 1_${ik}$, m ) ) then info = -11_${ik}$ else if( lwork<nw .and. .not.lquery ) then info = -13_${ik}$ end if if( info==0_${ik}$ ) then if( m>0_${ik}$ .and. n>0_${ik}$ ) then if( applyq ) then if( left ) then nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', side // trans, m-1, n, m-1,-1_${ik}$ ) else nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMQR', side // trans, m, n-1, n-1,-1_${ik}$ ) end if else if( left ) then nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMLQ', side // trans, m-1, n, m-1,-1_${ik}$ ) else nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'ZUNMLQ', side // trans, m, n-1, n-1,-1_${ik}$ ) end if end if lwkopt = nw*nb else lwkopt = 1_${ik}$ end if work( 1_${ik}$ ) = lwkopt end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNMBR', -info ) return else if( lquery ) then return end if ! quick return if possible if( m==0 .or. n==0 )return if( applyq ) then ! apply q if( nq>=k ) then ! q was determined by a call to stdlib${ii}$_${ci}$gebrd with nq >= k call stdlib${ii}$_${ci}$unmqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, iinfo & ) else if( nq>1_${ik}$ ) then ! q was determined by a call to stdlib${ii}$_${ci}$gebrd with nq < k if( left ) then mi = m - 1_${ik}$ ni = n i1 = 2_${ik}$ i2 = 1_${ik}$ else mi = m ni = n - 1_${ik}$ i1 = 1_${ik}$ i2 = 2_${ik}$ end if call stdlib${ii}$_${ci}$unmqr( side, trans, mi, ni, nq-1, a( 2_${ik}$, 1_${ik}$ ), lda, tau,c( i1, i2 ), & ldc, work, lwork, iinfo ) end if else ! apply p if( notran ) then transt = 'C' else transt = 'N' end if if( nq>k ) then ! p was determined by a call to stdlib${ii}$_${ci}$gebrd with nq > k call stdlib${ii}$_${ci}$unmlq( side, transt, m, n, k, a, lda, tau, c, ldc,work, lwork, & iinfo ) else if( nq>1_${ik}$ ) then ! p was determined by a call to stdlib${ii}$_${ci}$gebrd with nq <= k if( left ) then mi = m - 1_${ik}$ ni = n i1 = 2_${ik}$ i2 = 1_${ik}$ else mi = m ni = n - 1_${ik}$ i1 = 1_${ik}$ i2 = 2_${ik}$ end if call stdlib${ii}$_${ci}$unmlq( side, transt, mi, ni, nq-1, a( 1_${ik}$, 2_${ik}$ ), lda,tau, c( i1, i2 ), & ldc, work, lwork, iinfo ) end if end if work( 1_${ik}$ ) = lwkopt return end subroutine stdlib${ii}$_${ci}$unmbr #:endif #:endfor pure module subroutine stdlib${ii}$_sormbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, lwork, & !! If VECT = 'Q', SORMBR: overwrites the general real M-by-N matrix C !! with !! SIDE = 'L' SIDE = 'R' !! TRANS = 'N': Q * C C * Q !! TRANS = 'T': Q**T * C C * Q**T !! If VECT = 'P', SORMBR overwrites the general real M-by-N matrix C !! with !! SIDE = 'L' SIDE = 'R' !! TRANS = 'N': P * C C * P !! TRANS = 'T': P**T * C C * P**T !! Here Q and P**T are the orthogonal matrices determined by SGEBRD when !! reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and !! P**T are defined as products of elementary reflectors H(i) and G(i) !! respectively. !! Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the !! order of the orthogonal matrix Q or P**T that is applied. !! If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: !! if nq >= k, Q = H(1) H(2) . . . H(k); !! if nq < k, Q = H(1) H(2) . . . H(nq-1). !! If VECT = 'P', A is assumed to have been a K-by-NQ matrix: !! if k < nq, P = G(1) G(2) . . . G(k); !! if k >= nq, P = G(1) G(2) . . . G(nq-1). info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: side, trans, vect integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n ! Array Arguments real(sp), intent(inout) :: a(lda,*), c(ldc,*) real(sp), intent(in) :: tau(*) real(sp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: applyq, left, lquery, notran character :: transt integer(${ik}$) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ applyq = stdlib_lsame( vect, 'Q' ) left = stdlib_lsame( side, 'L' ) notran = stdlib_lsame( trans, 'N' ) lquery = ( lwork==-1_${ik}$ ) ! nq is the order of q or p and nw is the minimum dimension of work if( left ) then nq = m nw = max( 1_${ik}$, n ) else nq = n nw = max( 1_${ik}$, m ) end if if( .not.applyq .and. .not.stdlib_lsame( vect, 'P' ) ) then info = -1_${ik}$ else if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then info = -2_${ik}$ else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then info = -3_${ik}$ else if( m<0_${ik}$ ) then info = -4_${ik}$ else if( n<0_${ik}$ ) then info = -5_${ik}$ else if( k<0_${ik}$ ) then info = -6_${ik}$ else if( ( applyq .and. lda<max( 1_${ik}$, nq ) ) .or.( .not.applyq .and. lda<max( 1_${ik}$, min( nq,& k ) ) ) )then info = -8_${ik}$ else if( ldc<max( 1_${ik}$, m ) ) then info = -11_${ik}$ else if( lwork<nw .and. .not.lquery ) then info = -13_${ik}$ end if if( info==0_${ik}$ ) then if( applyq ) then if( left ) then nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMQR', side // trans, m-1, n, m-1,-1_${ik}$ ) else nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMQR', side // trans, m, n-1, n-1,-1_${ik}$ ) end if else if( left ) then nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMLQ', side // trans, m-1, n, m-1,-1_${ik}$ ) else nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'SORMLQ', side // trans, m, n-1, n-1,-1_${ik}$ ) end if end if lwkopt = nw*nb work( 1_${ik}$ ) = lwkopt end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SORMBR', -info ) return else if( lquery ) then return end if ! quick return if possible work( 1_${ik}$ ) = 1_${ik}$ if( m==0 .or. n==0 )return if( applyq ) then ! apply q if( nq>=k ) then ! q was determined by a call to stdlib${ii}$_sgebrd with nq >= k call stdlib${ii}$_sormqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, iinfo & ) else if( nq>1_${ik}$ ) then ! q was determined by a call to stdlib${ii}$_sgebrd with nq < k if( left ) then mi = m - 1_${ik}$ ni = n i1 = 2_${ik}$ i2 = 1_${ik}$ else mi = m ni = n - 1_${ik}$ i1 = 1_${ik}$ i2 = 2_${ik}$ end if call stdlib${ii}$_sormqr( side, trans, mi, ni, nq-1, a( 2_${ik}$, 1_${ik}$ ), lda, tau,c( i1, i2 ), & ldc, work, lwork, iinfo ) end if else ! apply p if( notran ) then transt = 'T' else transt = 'N' end if if( nq>k ) then ! p was determined by a call to stdlib${ii}$_sgebrd with nq > k call stdlib${ii}$_sormlq( side, transt, m, n, k, a, lda, tau, c, ldc,work, lwork, & iinfo ) else if( nq>1_${ik}$ ) then ! p was determined by a call to stdlib${ii}$_sgebrd with nq <= k if( left ) then mi = m - 1_${ik}$ ni = n i1 = 2_${ik}$ i2 = 1_${ik}$ else mi = m ni = n - 1_${ik}$ i1 = 1_${ik}$ i2 = 2_${ik}$ end if call stdlib${ii}$_sormlq( side, transt, mi, ni, nq-1, a( 1_${ik}$, 2_${ik}$ ), lda,tau, c( i1, i2 ), & ldc, work, lwork, iinfo ) end if end if work( 1_${ik}$ ) = lwkopt return end subroutine stdlib${ii}$_sormbr pure module subroutine stdlib${ii}$_dormbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, lwork, & !! If VECT = 'Q', DORMBR: overwrites the general real M-by-N matrix C !! with !! SIDE = 'L' SIDE = 'R' !! TRANS = 'N': Q * C C * Q !! TRANS = 'T': Q**T * C C * Q**T !! If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C !! with !! SIDE = 'L' SIDE = 'R' !! TRANS = 'N': P * C C * P !! TRANS = 'T': P**T * C C * P**T !! Here Q and P**T are the orthogonal matrices determined by DGEBRD when !! reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and !! P**T are defined as products of elementary reflectors H(i) and G(i) !! respectively. !! Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the !! order of the orthogonal matrix Q or P**T that is applied. !! If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: !! if nq >= k, Q = H(1) H(2) . . . H(k); !! if nq < k, Q = H(1) H(2) . . . H(nq-1). !! If VECT = 'P', A is assumed to have been a K-by-NQ matrix: !! if k < nq, P = G(1) G(2) . . . G(k); !! if k >= nq, P = G(1) G(2) . . . G(nq-1). info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: side, trans, vect integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n ! Array Arguments real(dp), intent(inout) :: a(lda,*), c(ldc,*) real(dp), intent(in) :: tau(*) real(dp), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: applyq, left, lquery, notran character :: transt integer(${ik}$) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ applyq = stdlib_lsame( vect, 'Q' ) left = stdlib_lsame( side, 'L' ) notran = stdlib_lsame( trans, 'N' ) lquery = ( lwork==-1_${ik}$ ) ! nq is the order of q or p and nw is the minimum dimension of work if( left ) then nq = m nw = max( 1_${ik}$, n ) else nq = n nw = max( 1_${ik}$, m ) end if if( .not.applyq .and. .not.stdlib_lsame( vect, 'P' ) ) then info = -1_${ik}$ else if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then info = -2_${ik}$ else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then info = -3_${ik}$ else if( m<0_${ik}$ ) then info = -4_${ik}$ else if( n<0_${ik}$ ) then info = -5_${ik}$ else if( k<0_${ik}$ ) then info = -6_${ik}$ else if( ( applyq .and. lda<max( 1_${ik}$, nq ) ) .or.( .not.applyq .and. lda<max( 1_${ik}$, min( nq,& k ) ) ) )then info = -8_${ik}$ else if( ldc<max( 1_${ik}$, m ) ) then info = -11_${ik}$ else if( lwork<nw .and. .not.lquery ) then info = -13_${ik}$ end if if( info==0_${ik}$ ) then if( applyq ) then if( left ) then nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', side // trans, m-1, n, m-1,-1_${ik}$ ) else nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', side // trans, m, n-1, n-1,-1_${ik}$ ) end if else if( left ) then nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', side // trans, m-1, n, m-1,-1_${ik}$ ) else nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', side // trans, m, n-1, n-1,-1_${ik}$ ) end if end if lwkopt = nw*nb work( 1_${ik}$ ) = lwkopt end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DORMBR', -info ) return else if( lquery ) then return end if ! quick return if possible work( 1_${ik}$ ) = 1_${ik}$ if( m==0 .or. n==0 )return if( applyq ) then ! apply q if( nq>=k ) then ! q was determined by a call to stdlib${ii}$_dgebrd with nq >= k call stdlib${ii}$_dormqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, iinfo & ) else if( nq>1_${ik}$ ) then ! q was determined by a call to stdlib${ii}$_dgebrd with nq < k if( left ) then mi = m - 1_${ik}$ ni = n i1 = 2_${ik}$ i2 = 1_${ik}$ else mi = m ni = n - 1_${ik}$ i1 = 1_${ik}$ i2 = 2_${ik}$ end if call stdlib${ii}$_dormqr( side, trans, mi, ni, nq-1, a( 2_${ik}$, 1_${ik}$ ), lda, tau,c( i1, i2 ), & ldc, work, lwork, iinfo ) end if else ! apply p if( notran ) then transt = 'T' else transt = 'N' end if if( nq>k ) then ! p was determined by a call to stdlib${ii}$_dgebrd with nq > k call stdlib${ii}$_dormlq( side, transt, m, n, k, a, lda, tau, c, ldc,work, lwork, & iinfo ) else if( nq>1_${ik}$ ) then ! p was determined by a call to stdlib${ii}$_dgebrd with nq <= k if( left ) then mi = m - 1_${ik}$ ni = n i1 = 2_${ik}$ i2 = 1_${ik}$ else mi = m ni = n - 1_${ik}$ i1 = 1_${ik}$ i2 = 2_${ik}$ end if call stdlib${ii}$_dormlq( side, transt, mi, ni, nq-1, a( 1_${ik}$, 2_${ik}$ ), lda,tau, c( i1, i2 ), & ldc, work, lwork, iinfo ) end if end if work( 1_${ik}$ ) = lwkopt return end subroutine stdlib${ii}$_dormbr #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$ormbr( vect, side, trans, m, n, k, a, lda, tau, c,ldc, work, lwork, & !! If VECT = 'Q', DORMBR: overwrites the general real M-by-N matrix C !! with !! SIDE = 'L' SIDE = 'R' !! TRANS = 'N': Q * C C * Q !! TRANS = 'T': Q**T * C C * Q**T !! If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C !! with !! SIDE = 'L' SIDE = 'R' !! TRANS = 'N': P * C C * P !! TRANS = 'T': P**T * C C * P**T !! Here Q and P**T are the orthogonal matrices determined by DGEBRD when !! reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and !! P**T are defined as products of elementary reflectors H(i) and G(i) !! respectively. !! Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the !! order of the orthogonal matrix Q or P**T that is applied. !! If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: !! if nq >= k, Q = H(1) H(2) . . . H(k); !! if nq < k, Q = H(1) H(2) . . . H(nq-1). !! If VECT = 'P', A is assumed to have been a K-by-NQ matrix: !! if k < nq, P = G(1) G(2) . . . G(k); !! if k >= nq, P = G(1) G(2) . . . G(nq-1). info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: side, trans, vect integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: k, lda, ldc, lwork, m, n ! Array Arguments real(${rk}$), intent(inout) :: a(lda,*), c(ldc,*) real(${rk}$), intent(in) :: tau(*) real(${rk}$), intent(out) :: work(*) ! ===================================================================== ! Local Scalars logical(lk) :: applyq, left, lquery, notran character :: transt integer(${ik}$) :: i1, i2, iinfo, lwkopt, mi, nb, ni, nq, nw ! Intrinsic Functions ! Executable Statements ! test the input arguments info = 0_${ik}$ applyq = stdlib_lsame( vect, 'Q' ) left = stdlib_lsame( side, 'L' ) notran = stdlib_lsame( trans, 'N' ) lquery = ( lwork==-1_${ik}$ ) ! nq is the order of q or p and nw is the minimum dimension of work if( left ) then nq = m nw = max( 1_${ik}$, n ) else nq = n nw = max( 1_${ik}$, m ) end if if( .not.applyq .and. .not.stdlib_lsame( vect, 'P' ) ) then info = -1_${ik}$ else if( .not.left .and. .not.stdlib_lsame( side, 'R' ) ) then info = -2_${ik}$ else if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then info = -3_${ik}$ else if( m<0_${ik}$ ) then info = -4_${ik}$ else if( n<0_${ik}$ ) then info = -5_${ik}$ else if( k<0_${ik}$ ) then info = -6_${ik}$ else if( ( applyq .and. lda<max( 1_${ik}$, nq ) ) .or.( .not.applyq .and. lda<max( 1_${ik}$, min( nq,& k ) ) ) )then info = -8_${ik}$ else if( ldc<max( 1_${ik}$, m ) ) then info = -11_${ik}$ else if( lwork<nw .and. .not.lquery ) then info = -13_${ik}$ end if if( info==0_${ik}$ ) then if( applyq ) then if( left ) then nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', side // trans, m-1, n, m-1,-1_${ik}$ ) else nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMQR', side // trans, m, n-1, n-1,-1_${ik}$ ) end if else if( left ) then nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', side // trans, m-1, n, m-1,-1_${ik}$ ) else nb = stdlib${ii}$_ilaenv( 1_${ik}$, 'DORMLQ', side // trans, m, n-1, n-1,-1_${ik}$ ) end if end if lwkopt = nw*nb work( 1_${ik}$ ) = lwkopt end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DORMBR', -info ) return else if( lquery ) then return end if ! quick return if possible work( 1_${ik}$ ) = 1_${ik}$ if( m==0 .or. n==0 )return if( applyq ) then ! apply q if( nq>=k ) then ! q was determined by a call to stdlib${ii}$_${ri}$gebrd with nq >= k call stdlib${ii}$_${ri}$ormqr( side, trans, m, n, k, a, lda, tau, c, ldc,work, lwork, iinfo & ) else if( nq>1_${ik}$ ) then ! q was determined by a call to stdlib${ii}$_${ri}$gebrd with nq < k if( left ) then mi = m - 1_${ik}$ ni = n i1 = 2_${ik}$ i2 = 1_${ik}$ else mi = m ni = n - 1_${ik}$ i1 = 1_${ik}$ i2 = 2_${ik}$ end if call stdlib${ii}$_${ri}$ormqr( side, trans, mi, ni, nq-1, a( 2_${ik}$, 1_${ik}$ ), lda, tau,c( i1, i2 ), & ldc, work, lwork, iinfo ) end if else ! apply p if( notran ) then transt = 'T' else transt = 'N' end if if( nq>k ) then ! p was determined by a call to stdlib${ii}$_${ri}$gebrd with nq > k call stdlib${ii}$_${ri}$ormlq( side, transt, m, n, k, a, lda, tau, c, ldc,work, lwork, & iinfo ) else if( nq>1_${ik}$ ) then ! p was determined by a call to stdlib${ii}$_${ri}$gebrd with nq <= k if( left ) then mi = m - 1_${ik}$ ni = n i1 = 2_${ik}$ i2 = 1_${ik}$ else mi = m ni = n - 1_${ik}$ i1 = 1_${ik}$ i2 = 2_${ik}$ end if call stdlib${ii}$_${ri}$ormlq( side, transt, mi, ni, nq-1, a( 1_${ik}$, 2_${ik}$ ), lda,tau, c( i1, i2 ), & ldc, work, lwork, iinfo ) end if end if work( 1_${ik}$ ) = lwkopt return end subroutine stdlib${ii}$_${ri}$ormbr #:endif #:endfor #:endfor end submodule stdlib_lapack_svd_comp