#:include "common.fypp" submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_eigv_gen3 implicit none contains #:for ik,it,ii in LINALG_INT_KINDS_TYPES module subroutine stdlib${ii}$_slaqtr( ltran, lreal, n, t, ldt, b, w, scale, x, work,info ) !! SLAQTR solves the real quasi-triangular system !! op(T)*p = scale*c, if LREAL = .TRUE. !! or the complex quasi-triangular systems !! op(T + iB)*(p+iq) = scale*(c+id), if LREAL = .FALSE. !! in real arithmetic, where T is upper quasi-triangular. !! If LREAL = .FALSE., then the first diagonal block of T must be !! 1 by 1, B is the specially structured matrix !! B = [ b(1) b(2) ... b(n) ] !! [ w ] !! [ w ] !! [ . ] !! [ w ] !! op(A) = A or A**T, A**T denotes the transpose of !! matrix A. !! On input, X = [ c ]. On output, X = [ p ]. !! [ d ] [ q ] !! This subroutine is designed for the condition number estimation !! in routine STRSNA. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments logical(lk), intent(in) :: lreal, ltran integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldt, n real(sp), intent(out) :: scale real(sp), intent(in) :: w ! Array Arguments real(sp), intent(in) :: b(*), t(ldt,*) real(sp), intent(out) :: work(*) real(sp), intent(inout) :: x(*) ! ===================================================================== ! Local Scalars logical(lk) :: notran integer(${ik}$) :: i, ierr, j, j1, j2, jnext, k, n1, n2 real(sp) :: bignum, eps, rec, scaloc, si, smin, sminw, smlnum, sr, tjj, tmp, xj, xmax, & xnorm, z ! Local Arrays real(sp) :: d(2_${ik}$,2_${ik}$), v(2_${ik}$,2_${ik}$) ! Intrinsic Functions ! Executable Statements ! do not test the input parameters for errors notran = .not.ltran info = 0_${ik}$ ! quick return if possible if( n==0 )return ! set constants to control overflow eps = stdlib${ii}$_slamch( 'P' ) smlnum = stdlib${ii}$_slamch( 'S' ) / eps bignum = one / smlnum xnorm = stdlib${ii}$_slange( 'M', n, n, t, ldt, d ) if( .not.lreal )xnorm = max( xnorm, abs( w ), stdlib${ii}$_slange( 'M', n, 1_${ik}$, b, n, d ) ) smin = max( smlnum, eps*xnorm ) ! compute 1-norm of each column of strictly upper triangular ! part of t to control overflow in triangular solver. work( 1_${ik}$ ) = zero do j = 2, n work( j ) = stdlib${ii}$_sasum( j-1, t( 1_${ik}$, j ), 1_${ik}$ ) end do if( .not.lreal ) then do i = 2, n work( i ) = work( i ) + abs( b( i ) ) end do end if n2 = 2_${ik}$*n n1 = n if( .not.lreal )n1 = n2 k = stdlib${ii}$_isamax( n1, x, 1_${ik}$ ) xmax = abs( x( k ) ) scale = one if( xmax>bignum ) then scale = bignum / xmax call stdlib${ii}$_sscal( n1, scale, x, 1_${ik}$ ) xmax = bignum end if if( lreal ) then if( notran ) then ! solve t*p = scale*c jnext = n loop_30: do j = n, 1, -1 if( j>jnext )cycle loop_30 j1 = j j2 = j jnext = j - 1_${ik}$ if( j>1_${ik}$ ) then if( t( j, j-1 )/=zero ) then j1 = j - 1_${ik}$ jnext = j - 2_${ik}$ end if end if if( j1==j2 ) then ! meet 1 by 1 diagonal block ! scale to avoid overflow when computing ! x(j) = b(j)/t(j,j) xj = abs( x( j1 ) ) tjj = abs( t( j1, j1 ) ) tmp = t( j1, j1 ) if( tjj<smin ) then tmp = smin tjj = smin info = 1_${ik}$ end if if( xj==zero )cycle loop_30 if( tjj<one ) then if( xj>bignum*tjj ) then rec = one / xj call stdlib${ii}$_sscal( n, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if x( j1 ) = x( j1 ) / tmp xj = abs( x( j1 ) ) ! scale x if necessary to avoid overflow when adding a ! multiple of column j1 of t. if( xj>one ) then rec = one / xj if( work( j1 )>( bignum-xmax )*rec ) then call stdlib${ii}$_sscal( n, rec, x, 1_${ik}$ ) scale = scale*rec end if end if if( j1>1_${ik}$ ) then call stdlib${ii}$_saxpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ ) k = stdlib${ii}$_isamax( j1-1, x, 1_${ik}$ ) xmax = abs( x( k ) ) end if else ! meet 2 by 2 diagonal block ! call 2 by 2 linear system solve, to take ! care of possible overflow by scaling factor. d( 1_${ik}$, 1_${ik}$ ) = x( j1 ) d( 2_${ik}$, 1_${ik}$ ) = x( j2 ) call stdlib${ii}$_slaln2( .false., 2_${ik}$, 1_${ik}$, smin, one, t( j1, j1 ),ldt, one, one, d,& 2_${ik}$, zero, zero, v, 2_${ik}$,scaloc, xnorm, ierr ) if( ierr/=0_${ik}$ )info = 2_${ik}$ if( scaloc/=one ) then call stdlib${ii}$_sscal( n, scaloc, x, 1_${ik}$ ) scale = scale*scaloc end if x( j1 ) = v( 1_${ik}$, 1_${ik}$ ) x( j2 ) = v( 2_${ik}$, 1_${ik}$ ) ! scale v(1,1) (= x(j1)) and/or v(2,1) (=x(j2)) ! to avoid overflow in updating right-hand side. xj = max( abs( v( 1_${ik}$, 1_${ik}$ ) ), abs( v( 2_${ik}$, 1_${ik}$ ) ) ) if( xj>one ) then rec = one / xj if( max( work( j1 ), work( j2 ) )>( bignum-xmax )*rec ) then call stdlib${ii}$_sscal( n, rec, x, 1_${ik}$ ) scale = scale*rec end if end if ! update right-hand side if( j1>1_${ik}$ ) then call stdlib${ii}$_saxpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ ) call stdlib${ii}$_saxpy( j1-1, -x( j2 ), t( 1_${ik}$, j2 ), 1_${ik}$, x, 1_${ik}$ ) k = stdlib${ii}$_isamax( j1-1, x, 1_${ik}$ ) xmax = abs( x( k ) ) end if end if end do loop_30 else ! solve t**t*p = scale*c jnext = 1_${ik}$ loop_40: do j = 1, n if( j<jnext )cycle loop_40 j1 = j j2 = j jnext = j + 1_${ik}$ if( j<n ) then if( t( j+1, j )/=zero ) then j2 = j + 1_${ik}$ jnext = j + 2_${ik}$ end if end if if( j1==j2 ) then ! 1 by 1 diagonal block ! scale if necessary to avoid overflow in forming the ! right-hand side element by inner product. xj = abs( x( j1 ) ) if( xmax>one ) then rec = one / xmax if( work( j1 )>( bignum-xj )*rec ) then call stdlib${ii}$_sscal( n, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if x( j1 ) = x( j1 ) - stdlib${ii}$_sdot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ ) xj = abs( x( j1 ) ) tjj = abs( t( j1, j1 ) ) tmp = t( j1, j1 ) if( tjj<smin ) then tmp = smin tjj = smin info = 1_${ik}$ end if if( tjj<one ) then if( xj>bignum*tjj ) then rec = one / xj call stdlib${ii}$_sscal( n, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if x( j1 ) = x( j1 ) / tmp xmax = max( xmax, abs( x( j1 ) ) ) else ! 2 by 2 diagonal block ! scale if necessary to avoid overflow in forming the ! right-hand side elements by inner product. xj = max( abs( x( j1 ) ), abs( x( j2 ) ) ) if( xmax>one ) then rec = one / xmax if( max( work( j2 ), work( j1 ) )>( bignum-xj )*rec ) then call stdlib${ii}$_sscal( n, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if d( 1_${ik}$, 1_${ik}$ ) = x( j1 ) - stdlib${ii}$_sdot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x,1_${ik}$ ) d( 2_${ik}$, 1_${ik}$ ) = x( j2 ) - stdlib${ii}$_sdot( j1-1, t( 1_${ik}$, j2 ), 1_${ik}$, x,1_${ik}$ ) call stdlib${ii}$_slaln2( .true., 2_${ik}$, 1_${ik}$, smin, one, t( j1, j1 ),ldt, one, one, d, & 2_${ik}$, zero, zero, v, 2_${ik}$,scaloc, xnorm, ierr ) if( ierr/=0_${ik}$ )info = 2_${ik}$ if( scaloc/=one ) then call stdlib${ii}$_sscal( n, scaloc, x, 1_${ik}$ ) scale = scale*scaloc end if x( j1 ) = v( 1_${ik}$, 1_${ik}$ ) x( j2 ) = v( 2_${ik}$, 1_${ik}$ ) xmax = max( abs( x( j1 ) ), abs( x( j2 ) ), xmax ) end if end do loop_40 end if else sminw = max( eps*abs( w ), smin ) if( notran ) then ! solve (t + ib)*(p+iq) = c+id jnext = n loop_70: do j = n, 1, -1 if( j>jnext )cycle loop_70 j1 = j j2 = j jnext = j - 1_${ik}$ if( j>1_${ik}$ ) then if( t( j, j-1 )/=zero ) then j1 = j - 1_${ik}$ jnext = j - 2_${ik}$ end if end if if( j1==j2 ) then ! 1 by 1 diagonal block ! scale if necessary to avoid overflow in division z = w if( j1==1_${ik}$ )z = b( 1_${ik}$ ) xj = abs( x( j1 ) ) + abs( x( n+j1 ) ) tjj = abs( t( j1, j1 ) ) + abs( z ) tmp = t( j1, j1 ) if( tjj<sminw ) then tmp = sminw tjj = sminw info = 1_${ik}$ end if if( xj==zero )cycle loop_70 if( tjj<one ) then if( xj>bignum*tjj ) then rec = one / xj call stdlib${ii}$_sscal( n2, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if call stdlib${ii}$_sladiv( x( j1 ), x( n+j1 ), tmp, z, sr, si ) x( j1 ) = sr x( n+j1 ) = si xj = abs( x( j1 ) ) + abs( x( n+j1 ) ) ! scale x if necessary to avoid overflow when adding a ! multiple of column j1 of t. if( xj>one ) then rec = one / xj if( work( j1 )>( bignum-xmax )*rec ) then call stdlib${ii}$_sscal( n2, rec, x, 1_${ik}$ ) scale = scale*rec end if end if if( j1>1_${ik}$ ) then call stdlib${ii}$_saxpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ ) call stdlib${ii}$_saxpy( j1-1, -x( n+j1 ), t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ ) x( 1_${ik}$ ) = x( 1_${ik}$ ) + b( j1 )*x( n+j1 ) x( n+1 ) = x( n+1 ) - b( j1 )*x( j1 ) xmax = zero do k = 1, j1 - 1 xmax = max( xmax, abs( x( k ) )+abs( x( k+n ) ) ) end do end if else ! meet 2 by 2 diagonal block d( 1_${ik}$, 1_${ik}$ ) = x( j1 ) d( 2_${ik}$, 1_${ik}$ ) = x( j2 ) d( 1_${ik}$, 2_${ik}$ ) = x( n+j1 ) d( 2_${ik}$, 2_${ik}$ ) = x( n+j2 ) call stdlib${ii}$_slaln2( .false., 2_${ik}$, 2_${ik}$, sminw, one, t( j1, j1 ),ldt, one, one, & d, 2_${ik}$, zero, -w, v, 2_${ik}$,scaloc, xnorm, ierr ) if( ierr/=0_${ik}$ )info = 2_${ik}$ if( scaloc/=one ) then call stdlib${ii}$_sscal( 2_${ik}$*n, scaloc, x, 1_${ik}$ ) scale = scaloc*scale end if x( j1 ) = v( 1_${ik}$, 1_${ik}$ ) x( j2 ) = v( 2_${ik}$, 1_${ik}$ ) x( n+j1 ) = v( 1_${ik}$, 2_${ik}$ ) x( n+j2 ) = v( 2_${ik}$, 2_${ik}$ ) ! scale x(j1), .... to avoid overflow in ! updating right hand side. xj = max( abs( v( 1_${ik}$, 1_${ik}$ ) )+abs( v( 1_${ik}$, 2_${ik}$ ) ),abs( v( 2_${ik}$, 1_${ik}$ ) )+abs( v( 2_${ik}$, 2_${ik}$ )& ) ) if( xj>one ) then rec = one / xj if( max( work( j1 ), work( j2 ) )>( bignum-xmax )*rec ) then call stdlib${ii}$_sscal( n2, rec, x, 1_${ik}$ ) scale = scale*rec end if end if ! update the right-hand side. if( j1>1_${ik}$ ) then call stdlib${ii}$_saxpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ ) call stdlib${ii}$_saxpy( j1-1, -x( j2 ), t( 1_${ik}$, j2 ), 1_${ik}$, x, 1_${ik}$ ) call stdlib${ii}$_saxpy( j1-1, -x( n+j1 ), t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ ) call stdlib${ii}$_saxpy( j1-1, -x( n+j2 ), t( 1_${ik}$, j2 ), 1_${ik}$,x( n+1 ), 1_${ik}$ ) x( 1_${ik}$ ) = x( 1_${ik}$ ) + b( j1 )*x( n+j1 ) +b( j2 )*x( n+j2 ) x( n+1 ) = x( n+1 ) - b( j1 )*x( j1 ) -b( j2 )*x( j2 ) xmax = zero do k = 1, j1 - 1 xmax = max( abs( x( k ) )+abs( x( k+n ) ),xmax ) end do end if end if end do loop_70 else ! solve (t + ib)**t*(p+iq) = c+id jnext = 1_${ik}$ loop_80: do j = 1, n if( j<jnext )cycle loop_80 j1 = j j2 = j jnext = j + 1_${ik}$ if( j<n ) then if( t( j+1, j )/=zero ) then j2 = j + 1_${ik}$ jnext = j + 2_${ik}$ end if end if if( j1==j2 ) then ! 1 by 1 diagonal block ! scale if necessary to avoid overflow in forming the ! right-hand side element by inner product. xj = abs( x( j1 ) ) + abs( x( j1+n ) ) if( xmax>one ) then rec = one / xmax if( work( j1 )>( bignum-xj )*rec ) then call stdlib${ii}$_sscal( n2, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if x( j1 ) = x( j1 ) - stdlib${ii}$_sdot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ ) x( n+j1 ) = x( n+j1 ) - stdlib${ii}$_sdot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ ) if( j1>1_${ik}$ ) then x( j1 ) = x( j1 ) - b( j1 )*x( n+1 ) x( n+j1 ) = x( n+j1 ) + b( j1 )*x( 1_${ik}$ ) end if xj = abs( x( j1 ) ) + abs( x( j1+n ) ) z = w if( j1==1_${ik}$ )z = b( 1_${ik}$ ) ! scale if necessary to avoid overflow in ! complex division tjj = abs( t( j1, j1 ) ) + abs( z ) tmp = t( j1, j1 ) if( tjj<sminw ) then tmp = sminw tjj = sminw info = 1_${ik}$ end if if( tjj<one ) then if( xj>bignum*tjj ) then rec = one / xj call stdlib${ii}$_sscal( n2, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if call stdlib${ii}$_sladiv( x( j1 ), x( n+j1 ), tmp, -z, sr, si ) x( j1 ) = sr x( j1+n ) = si xmax = max( abs( x( j1 ) )+abs( x( j1+n ) ), xmax ) else ! 2 by 2 diagonal block ! scale if necessary to avoid overflow in forming the ! right-hand side element by inner product. xj = max( abs( x( j1 ) )+abs( x( n+j1 ) ),abs( x( j2 ) )+abs( x( n+j2 ) ) ) if( xmax>one ) then rec = one / xmax if( max( work( j1 ), work( j2 ) )>( bignum-xj ) / xmax ) then call stdlib${ii}$_sscal( n2, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if d( 1_${ik}$, 1_${ik}$ ) = x( j1 ) - stdlib${ii}$_sdot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x,1_${ik}$ ) d( 2_${ik}$, 1_${ik}$ ) = x( j2 ) - stdlib${ii}$_sdot( j1-1, t( 1_${ik}$, j2 ), 1_${ik}$, x,1_${ik}$ ) d( 1_${ik}$, 2_${ik}$ ) = x( n+j1 ) - stdlib${ii}$_sdot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ ) d( 2_${ik}$, 2_${ik}$ ) = x( n+j2 ) - stdlib${ii}$_sdot( j1-1, t( 1_${ik}$, j2 ), 1_${ik}$,x( n+1 ), 1_${ik}$ ) d( 1_${ik}$, 1_${ik}$ ) = d( 1_${ik}$, 1_${ik}$ ) - b( j1 )*x( n+1 ) d( 2_${ik}$, 1_${ik}$ ) = d( 2_${ik}$, 1_${ik}$ ) - b( j2 )*x( n+1 ) d( 1_${ik}$, 2_${ik}$ ) = d( 1_${ik}$, 2_${ik}$ ) + b( j1 )*x( 1_${ik}$ ) d( 2_${ik}$, 2_${ik}$ ) = d( 2_${ik}$, 2_${ik}$ ) + b( j2 )*x( 1_${ik}$ ) call stdlib${ii}$_slaln2( .true., 2_${ik}$, 2_${ik}$, sminw, one, t( j1, j1 ),ldt, one, one, d,& 2_${ik}$, zero, w, v, 2_${ik}$,scaloc, xnorm, ierr ) if( ierr/=0_${ik}$ )info = 2_${ik}$ if( scaloc/=one ) then call stdlib${ii}$_sscal( n2, scaloc, x, 1_${ik}$ ) scale = scaloc*scale end if x( j1 ) = v( 1_${ik}$, 1_${ik}$ ) x( j2 ) = v( 2_${ik}$, 1_${ik}$ ) x( n+j1 ) = v( 1_${ik}$, 2_${ik}$ ) x( n+j2 ) = v( 2_${ik}$, 2_${ik}$ ) xmax = max( abs( x( j1 ) )+abs( x( n+j1 ) ),abs( x( j2 ) )+abs( x( n+j2 ) )& , xmax ) end if end do loop_80 end if end if return end subroutine stdlib${ii}$_slaqtr module subroutine stdlib${ii}$_dlaqtr( ltran, lreal, n, t, ldt, b, w, scale, x, work,info ) !! DLAQTR solves the real quasi-triangular system !! op(T)*p = scale*c, if LREAL = .TRUE. !! or the complex quasi-triangular systems !! op(T + iB)*(p+iq) = scale*(c+id), if LREAL = .FALSE. !! in real arithmetic, where T is upper quasi-triangular. !! If LREAL = .FALSE., then the first diagonal block of T must be !! 1 by 1, B is the specially structured matrix !! B = [ b(1) b(2) ... b(n) ] !! [ w ] !! [ w ] !! [ . ] !! [ w ] !! op(A) = A or A**T, A**T denotes the transpose of !! matrix A. !! On input, X = [ c ]. On output, X = [ p ]. !! [ d ] [ q ] !! This subroutine is designed for the condition number estimation !! in routine DTRSNA. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments logical(lk), intent(in) :: lreal, ltran integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldt, n real(dp), intent(out) :: scale real(dp), intent(in) :: w ! Array Arguments real(dp), intent(in) :: b(*), t(ldt,*) real(dp), intent(out) :: work(*) real(dp), intent(inout) :: x(*) ! ===================================================================== ! Local Scalars logical(lk) :: notran integer(${ik}$) :: i, ierr, j, j1, j2, jnext, k, n1, n2 real(dp) :: bignum, eps, rec, scaloc, si, smin, sminw, smlnum, sr, tjj, tmp, xj, xmax, & xnorm, z ! Local Arrays real(dp) :: d(2_${ik}$,2_${ik}$), v(2_${ik}$,2_${ik}$) ! Intrinsic Functions ! Executable Statements ! do not test the input parameters for errors notran = .not.ltran info = 0_${ik}$ ! quick return if possible if( n==0 )return ! set constants to control overflow eps = stdlib${ii}$_dlamch( 'P' ) smlnum = stdlib${ii}$_dlamch( 'S' ) / eps bignum = one / smlnum xnorm = stdlib${ii}$_dlange( 'M', n, n, t, ldt, d ) if( .not.lreal )xnorm = max( xnorm, abs( w ), stdlib${ii}$_dlange( 'M', n, 1_${ik}$, b, n, d ) ) smin = max( smlnum, eps*xnorm ) ! compute 1-norm of each column of strictly upper triangular ! part of t to control overflow in triangular solver. work( 1_${ik}$ ) = zero do j = 2, n work( j ) = stdlib${ii}$_dasum( j-1, t( 1_${ik}$, j ), 1_${ik}$ ) end do if( .not.lreal ) then do i = 2, n work( i ) = work( i ) + abs( b( i ) ) end do end if n2 = 2_${ik}$*n n1 = n if( .not.lreal )n1 = n2 k = stdlib${ii}$_idamax( n1, x, 1_${ik}$ ) xmax = abs( x( k ) ) scale = one if( xmax>bignum ) then scale = bignum / xmax call stdlib${ii}$_dscal( n1, scale, x, 1_${ik}$ ) xmax = bignum end if if( lreal ) then if( notran ) then ! solve t*p = scale*c jnext = n loop_30: do j = n, 1, -1 if( j>jnext )cycle loop_30 j1 = j j2 = j jnext = j - 1_${ik}$ if( j>1_${ik}$ ) then if( t( j, j-1 )/=zero ) then j1 = j - 1_${ik}$ jnext = j - 2_${ik}$ end if end if if( j1==j2 ) then ! meet 1 by 1 diagonal block ! scale to avoid overflow when computing ! x(j) = b(j)/t(j,j) xj = abs( x( j1 ) ) tjj = abs( t( j1, j1 ) ) tmp = t( j1, j1 ) if( tjj<smin ) then tmp = smin tjj = smin info = 1_${ik}$ end if if( xj==zero )cycle loop_30 if( tjj<one ) then if( xj>bignum*tjj ) then rec = one / xj call stdlib${ii}$_dscal( n, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if x( j1 ) = x( j1 ) / tmp xj = abs( x( j1 ) ) ! scale x if necessary to avoid overflow when adding a ! multiple of column j1 of t. if( xj>one ) then rec = one / xj if( work( j1 )>( bignum-xmax )*rec ) then call stdlib${ii}$_dscal( n, rec, x, 1_${ik}$ ) scale = scale*rec end if end if if( j1>1_${ik}$ ) then call stdlib${ii}$_daxpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ ) k = stdlib${ii}$_idamax( j1-1, x, 1_${ik}$ ) xmax = abs( x( k ) ) end if else ! meet 2 by 2 diagonal block ! call 2 by 2 linear system solve, to take ! care of possible overflow by scaling factor. d( 1_${ik}$, 1_${ik}$ ) = x( j1 ) d( 2_${ik}$, 1_${ik}$ ) = x( j2 ) call stdlib${ii}$_dlaln2( .false., 2_${ik}$, 1_${ik}$, smin, one, t( j1, j1 ),ldt, one, one, d,& 2_${ik}$, zero, zero, v, 2_${ik}$,scaloc, xnorm, ierr ) if( ierr/=0_${ik}$ )info = 2_${ik}$ if( scaloc/=one ) then call stdlib${ii}$_dscal( n, scaloc, x, 1_${ik}$ ) scale = scale*scaloc end if x( j1 ) = v( 1_${ik}$, 1_${ik}$ ) x( j2 ) = v( 2_${ik}$, 1_${ik}$ ) ! scale v(1,1) (= x(j1)) and/or v(2,1) (=x(j2)) ! to avoid overflow in updating right-hand side. xj = max( abs( v( 1_${ik}$, 1_${ik}$ ) ), abs( v( 2_${ik}$, 1_${ik}$ ) ) ) if( xj>one ) then rec = one / xj if( max( work( j1 ), work( j2 ) )>( bignum-xmax )*rec ) then call stdlib${ii}$_dscal( n, rec, x, 1_${ik}$ ) scale = scale*rec end if end if ! update right-hand side if( j1>1_${ik}$ ) then call stdlib${ii}$_daxpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ ) call stdlib${ii}$_daxpy( j1-1, -x( j2 ), t( 1_${ik}$, j2 ), 1_${ik}$, x, 1_${ik}$ ) k = stdlib${ii}$_idamax( j1-1, x, 1_${ik}$ ) xmax = abs( x( k ) ) end if end if end do loop_30 else ! solve t**t*p = scale*c jnext = 1_${ik}$ loop_40: do j = 1, n if( j<jnext )cycle loop_40 j1 = j j2 = j jnext = j + 1_${ik}$ if( j<n ) then if( t( j+1, j )/=zero ) then j2 = j + 1_${ik}$ jnext = j + 2_${ik}$ end if end if if( j1==j2 ) then ! 1 by 1 diagonal block ! scale if necessary to avoid overflow in forming the ! right-hand side element by inner product. xj = abs( x( j1 ) ) if( xmax>one ) then rec = one / xmax if( work( j1 )>( bignum-xj )*rec ) then call stdlib${ii}$_dscal( n, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if x( j1 ) = x( j1 ) - stdlib${ii}$_ddot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ ) xj = abs( x( j1 ) ) tjj = abs( t( j1, j1 ) ) tmp = t( j1, j1 ) if( tjj<smin ) then tmp = smin tjj = smin info = 1_${ik}$ end if if( tjj<one ) then if( xj>bignum*tjj ) then rec = one / xj call stdlib${ii}$_dscal( n, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if x( j1 ) = x( j1 ) / tmp xmax = max( xmax, abs( x( j1 ) ) ) else ! 2 by 2 diagonal block ! scale if necessary to avoid overflow in forming the ! right-hand side elements by inner product. xj = max( abs( x( j1 ) ), abs( x( j2 ) ) ) if( xmax>one ) then rec = one / xmax if( max( work( j2 ), work( j1 ) )>( bignum-xj )*rec ) then call stdlib${ii}$_dscal( n, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if d( 1_${ik}$, 1_${ik}$ ) = x( j1 ) - stdlib${ii}$_ddot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x,1_${ik}$ ) d( 2_${ik}$, 1_${ik}$ ) = x( j2 ) - stdlib${ii}$_ddot( j1-1, t( 1_${ik}$, j2 ), 1_${ik}$, x,1_${ik}$ ) call stdlib${ii}$_dlaln2( .true., 2_${ik}$, 1_${ik}$, smin, one, t( j1, j1 ),ldt, one, one, d, & 2_${ik}$, zero, zero, v, 2_${ik}$,scaloc, xnorm, ierr ) if( ierr/=0_${ik}$ )info = 2_${ik}$ if( scaloc/=one ) then call stdlib${ii}$_dscal( n, scaloc, x, 1_${ik}$ ) scale = scale*scaloc end if x( j1 ) = v( 1_${ik}$, 1_${ik}$ ) x( j2 ) = v( 2_${ik}$, 1_${ik}$ ) xmax = max( abs( x( j1 ) ), abs( x( j2 ) ), xmax ) end if end do loop_40 end if else sminw = max( eps*abs( w ), smin ) if( notran ) then ! solve (t + ib)*(p+iq) = c+id jnext = n loop_70: do j = n, 1, -1 if( j>jnext )cycle loop_70 j1 = j j2 = j jnext = j - 1_${ik}$ if( j>1_${ik}$ ) then if( t( j, j-1 )/=zero ) then j1 = j - 1_${ik}$ jnext = j - 2_${ik}$ end if end if if( j1==j2 ) then ! 1 by 1 diagonal block ! scale if necessary to avoid overflow in division z = w if( j1==1_${ik}$ )z = b( 1_${ik}$ ) xj = abs( x( j1 ) ) + abs( x( n+j1 ) ) tjj = abs( t( j1, j1 ) ) + abs( z ) tmp = t( j1, j1 ) if( tjj<sminw ) then tmp = sminw tjj = sminw info = 1_${ik}$ end if if( xj==zero )cycle loop_70 if( tjj<one ) then if( xj>bignum*tjj ) then rec = one / xj call stdlib${ii}$_dscal( n2, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if call stdlib${ii}$_dladiv( x( j1 ), x( n+j1 ), tmp, z, sr, si ) x( j1 ) = sr x( n+j1 ) = si xj = abs( x( j1 ) ) + abs( x( n+j1 ) ) ! scale x if necessary to avoid overflow when adding a ! multiple of column j1 of t. if( xj>one ) then rec = one / xj if( work( j1 )>( bignum-xmax )*rec ) then call stdlib${ii}$_dscal( n2, rec, x, 1_${ik}$ ) scale = scale*rec end if end if if( j1>1_${ik}$ ) then call stdlib${ii}$_daxpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ ) call stdlib${ii}$_daxpy( j1-1, -x( n+j1 ), t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ ) x( 1_${ik}$ ) = x( 1_${ik}$ ) + b( j1 )*x( n+j1 ) x( n+1 ) = x( n+1 ) - b( j1 )*x( j1 ) xmax = zero do k = 1, j1 - 1 xmax = max( xmax, abs( x( k ) )+abs( x( k+n ) ) ) end do end if else ! meet 2 by 2 diagonal block d( 1_${ik}$, 1_${ik}$ ) = x( j1 ) d( 2_${ik}$, 1_${ik}$ ) = x( j2 ) d( 1_${ik}$, 2_${ik}$ ) = x( n+j1 ) d( 2_${ik}$, 2_${ik}$ ) = x( n+j2 ) call stdlib${ii}$_dlaln2( .false., 2_${ik}$, 2_${ik}$, sminw, one, t( j1, j1 ),ldt, one, one, & d, 2_${ik}$, zero, -w, v, 2_${ik}$,scaloc, xnorm, ierr ) if( ierr/=0_${ik}$ )info = 2_${ik}$ if( scaloc/=one ) then call stdlib${ii}$_dscal( 2_${ik}$*n, scaloc, x, 1_${ik}$ ) scale = scaloc*scale end if x( j1 ) = v( 1_${ik}$, 1_${ik}$ ) x( j2 ) = v( 2_${ik}$, 1_${ik}$ ) x( n+j1 ) = v( 1_${ik}$, 2_${ik}$ ) x( n+j2 ) = v( 2_${ik}$, 2_${ik}$ ) ! scale x(j1), .... to avoid overflow in ! updating right hand side. xj = max( abs( v( 1_${ik}$, 1_${ik}$ ) )+abs( v( 1_${ik}$, 2_${ik}$ ) ),abs( v( 2_${ik}$, 1_${ik}$ ) )+abs( v( 2_${ik}$, 2_${ik}$ )& ) ) if( xj>one ) then rec = one / xj if( max( work( j1 ), work( j2 ) )>( bignum-xmax )*rec ) then call stdlib${ii}$_dscal( n2, rec, x, 1_${ik}$ ) scale = scale*rec end if end if ! update the right-hand side. if( j1>1_${ik}$ ) then call stdlib${ii}$_daxpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ ) call stdlib${ii}$_daxpy( j1-1, -x( j2 ), t( 1_${ik}$, j2 ), 1_${ik}$, x, 1_${ik}$ ) call stdlib${ii}$_daxpy( j1-1, -x( n+j1 ), t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ ) call stdlib${ii}$_daxpy( j1-1, -x( n+j2 ), t( 1_${ik}$, j2 ), 1_${ik}$,x( n+1 ), 1_${ik}$ ) x( 1_${ik}$ ) = x( 1_${ik}$ ) + b( j1 )*x( n+j1 ) +b( j2 )*x( n+j2 ) x( n+1 ) = x( n+1 ) - b( j1 )*x( j1 ) -b( j2 )*x( j2 ) xmax = zero do k = 1, j1 - 1 xmax = max( abs( x( k ) )+abs( x( k+n ) ),xmax ) end do end if end if end do loop_70 else ! solve (t + ib)**t*(p+iq) = c+id jnext = 1_${ik}$ loop_80: do j = 1, n if( j<jnext )cycle loop_80 j1 = j j2 = j jnext = j + 1_${ik}$ if( j<n ) then if( t( j+1, j )/=zero ) then j2 = j + 1_${ik}$ jnext = j + 2_${ik}$ end if end if if( j1==j2 ) then ! 1 by 1 diagonal block ! scale if necessary to avoid overflow in forming the ! right-hand side element by inner product. xj = abs( x( j1 ) ) + abs( x( j1+n ) ) if( xmax>one ) then rec = one / xmax if( work( j1 )>( bignum-xj )*rec ) then call stdlib${ii}$_dscal( n2, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if x( j1 ) = x( j1 ) - stdlib${ii}$_ddot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ ) x( n+j1 ) = x( n+j1 ) - stdlib${ii}$_ddot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ ) if( j1>1_${ik}$ ) then x( j1 ) = x( j1 ) - b( j1 )*x( n+1 ) x( n+j1 ) = x( n+j1 ) + b( j1 )*x( 1_${ik}$ ) end if xj = abs( x( j1 ) ) + abs( x( j1+n ) ) z = w if( j1==1_${ik}$ )z = b( 1_${ik}$ ) ! scale if necessary to avoid overflow in ! complex division tjj = abs( t( j1, j1 ) ) + abs( z ) tmp = t( j1, j1 ) if( tjj<sminw ) then tmp = sminw tjj = sminw info = 1_${ik}$ end if if( tjj<one ) then if( xj>bignum*tjj ) then rec = one / xj call stdlib${ii}$_dscal( n2, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if call stdlib${ii}$_dladiv( x( j1 ), x( n+j1 ), tmp, -z, sr, si ) x( j1 ) = sr x( j1+n ) = si xmax = max( abs( x( j1 ) )+abs( x( j1+n ) ), xmax ) else ! 2 by 2 diagonal block ! scale if necessary to avoid overflow in forming the ! right-hand side element by inner product. xj = max( abs( x( j1 ) )+abs( x( n+j1 ) ),abs( x( j2 ) )+abs( x( n+j2 ) ) ) if( xmax>one ) then rec = one / xmax if( max( work( j1 ), work( j2 ) )>( bignum-xj ) / xmax ) then call stdlib${ii}$_dscal( n2, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if d( 1_${ik}$, 1_${ik}$ ) = x( j1 ) - stdlib${ii}$_ddot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x,1_${ik}$ ) d( 2_${ik}$, 1_${ik}$ ) = x( j2 ) - stdlib${ii}$_ddot( j1-1, t( 1_${ik}$, j2 ), 1_${ik}$, x,1_${ik}$ ) d( 1_${ik}$, 2_${ik}$ ) = x( n+j1 ) - stdlib${ii}$_ddot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ ) d( 2_${ik}$, 2_${ik}$ ) = x( n+j2 ) - stdlib${ii}$_ddot( j1-1, t( 1_${ik}$, j2 ), 1_${ik}$,x( n+1 ), 1_${ik}$ ) d( 1_${ik}$, 1_${ik}$ ) = d( 1_${ik}$, 1_${ik}$ ) - b( j1 )*x( n+1 ) d( 2_${ik}$, 1_${ik}$ ) = d( 2_${ik}$, 1_${ik}$ ) - b( j2 )*x( n+1 ) d( 1_${ik}$, 2_${ik}$ ) = d( 1_${ik}$, 2_${ik}$ ) + b( j1 )*x( 1_${ik}$ ) d( 2_${ik}$, 2_${ik}$ ) = d( 2_${ik}$, 2_${ik}$ ) + b( j2 )*x( 1_${ik}$ ) call stdlib${ii}$_dlaln2( .true., 2_${ik}$, 2_${ik}$, sminw, one, t( j1, j1 ),ldt, one, one, d,& 2_${ik}$, zero, w, v, 2_${ik}$,scaloc, xnorm, ierr ) if( ierr/=0_${ik}$ )info = 2_${ik}$ if( scaloc/=one ) then call stdlib${ii}$_dscal( n2, scaloc, x, 1_${ik}$ ) scale = scaloc*scale end if x( j1 ) = v( 1_${ik}$, 1_${ik}$ ) x( j2 ) = v( 2_${ik}$, 1_${ik}$ ) x( n+j1 ) = v( 1_${ik}$, 2_${ik}$ ) x( n+j2 ) = v( 2_${ik}$, 2_${ik}$ ) xmax = max( abs( x( j1 ) )+abs( x( n+j1 ) ),abs( x( j2 ) )+abs( x( n+j2 ) )& , xmax ) end if end do loop_80 end if end if return end subroutine stdlib${ii}$_dlaqtr #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] module subroutine stdlib${ii}$_${ri}$laqtr( ltran, lreal, n, t, ldt, b, w, scale, x, work,info ) !! DLAQTR: solves the real quasi-triangular system !! op(T)*p = scale*c, if LREAL = .TRUE. !! or the complex quasi-triangular systems !! op(T + iB)*(p+iq) = scale*(c+id), if LREAL = .FALSE. !! in real arithmetic, where T is upper quasi-triangular. !! If LREAL = .FALSE., then the first diagonal block of T must be !! 1 by 1, B is the specially structured matrix !! B = [ b(1) b(2) ... b(n) ] !! [ w ] !! [ w ] !! [ . ] !! [ w ] !! op(A) = A or A**T, A**T denotes the transpose of !! matrix A. !! On input, X = [ c ]. On output, X = [ p ]. !! [ d ] [ q ] !! This subroutine is designed for the condition number estimation !! in routine DTRSNA. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments logical(lk), intent(in) :: lreal, ltran integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldt, n real(${rk}$), intent(out) :: scale real(${rk}$), intent(in) :: w ! Array Arguments real(${rk}$), intent(in) :: b(*), t(ldt,*) real(${rk}$), intent(out) :: work(*) real(${rk}$), intent(inout) :: x(*) ! ===================================================================== ! Local Scalars logical(lk) :: notran integer(${ik}$) :: i, ierr, j, j1, j2, jnext, k, n1, n2 real(${rk}$) :: bignum, eps, rec, scaloc, si, smin, sminw, smlnum, sr, tjj, tmp, xj, xmax, & xnorm, z ! Local Arrays real(${rk}$) :: d(2_${ik}$,2_${ik}$), v(2_${ik}$,2_${ik}$) ! Intrinsic Functions ! Executable Statements ! do not test the input parameters for errors notran = .not.ltran info = 0_${ik}$ ! quick return if possible if( n==0 )return ! set constants to control overflow eps = stdlib${ii}$_${ri}$lamch( 'P' ) smlnum = stdlib${ii}$_${ri}$lamch( 'S' ) / eps bignum = one / smlnum xnorm = stdlib${ii}$_${ri}$lange( 'M', n, n, t, ldt, d ) if( .not.lreal )xnorm = max( xnorm, abs( w ), stdlib${ii}$_${ri}$lange( 'M', n, 1_${ik}$, b, n, d ) ) smin = max( smlnum, eps*xnorm ) ! compute 1-norm of each column of strictly upper triangular ! part of t to control overflow in triangular solver. work( 1_${ik}$ ) = zero do j = 2, n work( j ) = stdlib${ii}$_${ri}$asum( j-1, t( 1_${ik}$, j ), 1_${ik}$ ) end do if( .not.lreal ) then do i = 2, n work( i ) = work( i ) + abs( b( i ) ) end do end if n2 = 2_${ik}$*n n1 = n if( .not.lreal )n1 = n2 k = stdlib${ii}$_i${ri}$amax( n1, x, 1_${ik}$ ) xmax = abs( x( k ) ) scale = one if( xmax>bignum ) then scale = bignum / xmax call stdlib${ii}$_${ri}$scal( n1, scale, x, 1_${ik}$ ) xmax = bignum end if if( lreal ) then if( notran ) then ! solve t*p = scale*c jnext = n loop_30: do j = n, 1, -1 if( j>jnext )cycle loop_30 j1 = j j2 = j jnext = j - 1_${ik}$ if( j>1_${ik}$ ) then if( t( j, j-1 )/=zero ) then j1 = j - 1_${ik}$ jnext = j - 2_${ik}$ end if end if if( j1==j2 ) then ! meet 1 by 1 diagonal block ! scale to avoid overflow when computing ! x(j) = b(j)/t(j,j) xj = abs( x( j1 ) ) tjj = abs( t( j1, j1 ) ) tmp = t( j1, j1 ) if( tjj<smin ) then tmp = smin tjj = smin info = 1_${ik}$ end if if( xj==zero )cycle loop_30 if( tjj<one ) then if( xj>bignum*tjj ) then rec = one / xj call stdlib${ii}$_${ri}$scal( n, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if x( j1 ) = x( j1 ) / tmp xj = abs( x( j1 ) ) ! scale x if necessary to avoid overflow when adding a ! multiple of column j1 of t. if( xj>one ) then rec = one / xj if( work( j1 )>( bignum-xmax )*rec ) then call stdlib${ii}$_${ri}$scal( n, rec, x, 1_${ik}$ ) scale = scale*rec end if end if if( j1>1_${ik}$ ) then call stdlib${ii}$_${ri}$axpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ ) k = stdlib${ii}$_i${ri}$amax( j1-1, x, 1_${ik}$ ) xmax = abs( x( k ) ) end if else ! meet 2 by 2 diagonal block ! call 2 by 2 linear system solve, to take ! care of possible overflow by scaling factor. d( 1_${ik}$, 1_${ik}$ ) = x( j1 ) d( 2_${ik}$, 1_${ik}$ ) = x( j2 ) call stdlib${ii}$_${ri}$laln2( .false., 2_${ik}$, 1_${ik}$, smin, one, t( j1, j1 ),ldt, one, one, d,& 2_${ik}$, zero, zero, v, 2_${ik}$,scaloc, xnorm, ierr ) if( ierr/=0_${ik}$ )info = 2_${ik}$ if( scaloc/=one ) then call stdlib${ii}$_${ri}$scal( n, scaloc, x, 1_${ik}$ ) scale = scale*scaloc end if x( j1 ) = v( 1_${ik}$, 1_${ik}$ ) x( j2 ) = v( 2_${ik}$, 1_${ik}$ ) ! scale v(1,1) (= x(j1)) and/or v(2,1) (=x(j2)) ! to avoid overflow in updating right-hand side. xj = max( abs( v( 1_${ik}$, 1_${ik}$ ) ), abs( v( 2_${ik}$, 1_${ik}$ ) ) ) if( xj>one ) then rec = one / xj if( max( work( j1 ), work( j2 ) )>( bignum-xmax )*rec ) then call stdlib${ii}$_${ri}$scal( n, rec, x, 1_${ik}$ ) scale = scale*rec end if end if ! update right-hand side if( j1>1_${ik}$ ) then call stdlib${ii}$_${ri}$axpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( j1-1, -x( j2 ), t( 1_${ik}$, j2 ), 1_${ik}$, x, 1_${ik}$ ) k = stdlib${ii}$_i${ri}$amax( j1-1, x, 1_${ik}$ ) xmax = abs( x( k ) ) end if end if end do loop_30 else ! solve t**t*p = scale*c jnext = 1_${ik}$ loop_40: do j = 1, n if( j<jnext )cycle loop_40 j1 = j j2 = j jnext = j + 1_${ik}$ if( j<n ) then if( t( j+1, j )/=zero ) then j2 = j + 1_${ik}$ jnext = j + 2_${ik}$ end if end if if( j1==j2 ) then ! 1 by 1 diagonal block ! scale if necessary to avoid overflow in forming the ! right-hand side element by inner product. xj = abs( x( j1 ) ) if( xmax>one ) then rec = one / xmax if( work( j1 )>( bignum-xj )*rec ) then call stdlib${ii}$_${ri}$scal( n, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if x( j1 ) = x( j1 ) - stdlib${ii}$_${ri}$dot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ ) xj = abs( x( j1 ) ) tjj = abs( t( j1, j1 ) ) tmp = t( j1, j1 ) if( tjj<smin ) then tmp = smin tjj = smin info = 1_${ik}$ end if if( tjj<one ) then if( xj>bignum*tjj ) then rec = one / xj call stdlib${ii}$_${ri}$scal( n, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if x( j1 ) = x( j1 ) / tmp xmax = max( xmax, abs( x( j1 ) ) ) else ! 2 by 2 diagonal block ! scale if necessary to avoid overflow in forming the ! right-hand side elements by inner product. xj = max( abs( x( j1 ) ), abs( x( j2 ) ) ) if( xmax>one ) then rec = one / xmax if( max( work( j2 ), work( j1 ) )>( bignum-xj )*rec ) then call stdlib${ii}$_${ri}$scal( n, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if d( 1_${ik}$, 1_${ik}$ ) = x( j1 ) - stdlib${ii}$_${ri}$dot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x,1_${ik}$ ) d( 2_${ik}$, 1_${ik}$ ) = x( j2 ) - stdlib${ii}$_${ri}$dot( j1-1, t( 1_${ik}$, j2 ), 1_${ik}$, x,1_${ik}$ ) call stdlib${ii}$_${ri}$laln2( .true., 2_${ik}$, 1_${ik}$, smin, one, t( j1, j1 ),ldt, one, one, d, & 2_${ik}$, zero, zero, v, 2_${ik}$,scaloc, xnorm, ierr ) if( ierr/=0_${ik}$ )info = 2_${ik}$ if( scaloc/=one ) then call stdlib${ii}$_${ri}$scal( n, scaloc, x, 1_${ik}$ ) scale = scale*scaloc end if x( j1 ) = v( 1_${ik}$, 1_${ik}$ ) x( j2 ) = v( 2_${ik}$, 1_${ik}$ ) xmax = max( abs( x( j1 ) ), abs( x( j2 ) ), xmax ) end if end do loop_40 end if else sminw = max( eps*abs( w ), smin ) if( notran ) then ! solve (t + ib)*(p+iq) = c+id jnext = n loop_70: do j = n, 1, -1 if( j>jnext )cycle loop_70 j1 = j j2 = j jnext = j - 1_${ik}$ if( j>1_${ik}$ ) then if( t( j, j-1 )/=zero ) then j1 = j - 1_${ik}$ jnext = j - 2_${ik}$ end if end if if( j1==j2 ) then ! 1 by 1 diagonal block ! scale if necessary to avoid overflow in division z = w if( j1==1_${ik}$ )z = b( 1_${ik}$ ) xj = abs( x( j1 ) ) + abs( x( n+j1 ) ) tjj = abs( t( j1, j1 ) ) + abs( z ) tmp = t( j1, j1 ) if( tjj<sminw ) then tmp = sminw tjj = sminw info = 1_${ik}$ end if if( xj==zero )cycle loop_70 if( tjj<one ) then if( xj>bignum*tjj ) then rec = one / xj call stdlib${ii}$_${ri}$scal( n2, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if call stdlib${ii}$_${ri}$ladiv( x( j1 ), x( n+j1 ), tmp, z, sr, si ) x( j1 ) = sr x( n+j1 ) = si xj = abs( x( j1 ) ) + abs( x( n+j1 ) ) ! scale x if necessary to avoid overflow when adding a ! multiple of column j1 of t. if( xj>one ) then rec = one / xj if( work( j1 )>( bignum-xmax )*rec ) then call stdlib${ii}$_${ri}$scal( n2, rec, x, 1_${ik}$ ) scale = scale*rec end if end if if( j1>1_${ik}$ ) then call stdlib${ii}$_${ri}$axpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( j1-1, -x( n+j1 ), t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ ) x( 1_${ik}$ ) = x( 1_${ik}$ ) + b( j1 )*x( n+j1 ) x( n+1 ) = x( n+1 ) - b( j1 )*x( j1 ) xmax = zero do k = 1, j1 - 1 xmax = max( xmax, abs( x( k ) )+abs( x( k+n ) ) ) end do end if else ! meet 2 by 2 diagonal block d( 1_${ik}$, 1_${ik}$ ) = x( j1 ) d( 2_${ik}$, 1_${ik}$ ) = x( j2 ) d( 1_${ik}$, 2_${ik}$ ) = x( n+j1 ) d( 2_${ik}$, 2_${ik}$ ) = x( n+j2 ) call stdlib${ii}$_${ri}$laln2( .false., 2_${ik}$, 2_${ik}$, sminw, one, t( j1, j1 ),ldt, one, one, & d, 2_${ik}$, zero, -w, v, 2_${ik}$,scaloc, xnorm, ierr ) if( ierr/=0_${ik}$ )info = 2_${ik}$ if( scaloc/=one ) then call stdlib${ii}$_${ri}$scal( 2_${ik}$*n, scaloc, x, 1_${ik}$ ) scale = scaloc*scale end if x( j1 ) = v( 1_${ik}$, 1_${ik}$ ) x( j2 ) = v( 2_${ik}$, 1_${ik}$ ) x( n+j1 ) = v( 1_${ik}$, 2_${ik}$ ) x( n+j2 ) = v( 2_${ik}$, 2_${ik}$ ) ! scale x(j1), .... to avoid overflow in ! updating right hand side. xj = max( abs( v( 1_${ik}$, 1_${ik}$ ) )+abs( v( 1_${ik}$, 2_${ik}$ ) ),abs( v( 2_${ik}$, 1_${ik}$ ) )+abs( v( 2_${ik}$, 2_${ik}$ )& ) ) if( xj>one ) then rec = one / xj if( max( work( j1 ), work( j2 ) )>( bignum-xmax )*rec ) then call stdlib${ii}$_${ri}$scal( n2, rec, x, 1_${ik}$ ) scale = scale*rec end if end if ! update the right-hand side. if( j1>1_${ik}$ ) then call stdlib${ii}$_${ri}$axpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( j1-1, -x( j2 ), t( 1_${ik}$, j2 ), 1_${ik}$, x, 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( j1-1, -x( n+j1 ), t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ ) call stdlib${ii}$_${ri}$axpy( j1-1, -x( n+j2 ), t( 1_${ik}$, j2 ), 1_${ik}$,x( n+1 ), 1_${ik}$ ) x( 1_${ik}$ ) = x( 1_${ik}$ ) + b( j1 )*x( n+j1 ) +b( j2 )*x( n+j2 ) x( n+1 ) = x( n+1 ) - b( j1 )*x( j1 ) -b( j2 )*x( j2 ) xmax = zero do k = 1, j1 - 1 xmax = max( abs( x( k ) )+abs( x( k+n ) ),xmax ) end do end if end if end do loop_70 else ! solve (t + ib)**t*(p+iq) = c+id jnext = 1_${ik}$ loop_80: do j = 1, n if( j<jnext )cycle loop_80 j1 = j j2 = j jnext = j + 1_${ik}$ if( j<n ) then if( t( j+1, j )/=zero ) then j2 = j + 1_${ik}$ jnext = j + 2_${ik}$ end if end if if( j1==j2 ) then ! 1 by 1 diagonal block ! scale if necessary to avoid overflow in forming the ! right-hand side element by inner product. xj = abs( x( j1 ) ) + abs( x( j1+n ) ) if( xmax>one ) then rec = one / xmax if( work( j1 )>( bignum-xj )*rec ) then call stdlib${ii}$_${ri}$scal( n2, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if x( j1 ) = x( j1 ) - stdlib${ii}$_${ri}$dot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ ) x( n+j1 ) = x( n+j1 ) - stdlib${ii}$_${ri}$dot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ ) if( j1>1_${ik}$ ) then x( j1 ) = x( j1 ) - b( j1 )*x( n+1 ) x( n+j1 ) = x( n+j1 ) + b( j1 )*x( 1_${ik}$ ) end if xj = abs( x( j1 ) ) + abs( x( j1+n ) ) z = w if( j1==1_${ik}$ )z = b( 1_${ik}$ ) ! scale if necessary to avoid overflow in ! complex division tjj = abs( t( j1, j1 ) ) + abs( z ) tmp = t( j1, j1 ) if( tjj<sminw ) then tmp = sminw tjj = sminw info = 1_${ik}$ end if if( tjj<one ) then if( xj>bignum*tjj ) then rec = one / xj call stdlib${ii}$_${ri}$scal( n2, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if call stdlib${ii}$_${ri}$ladiv( x( j1 ), x( n+j1 ), tmp, -z, sr, si ) x( j1 ) = sr x( j1+n ) = si xmax = max( abs( x( j1 ) )+abs( x( j1+n ) ), xmax ) else ! 2 by 2 diagonal block ! scale if necessary to avoid overflow in forming the ! right-hand side element by inner product. xj = max( abs( x( j1 ) )+abs( x( n+j1 ) ),abs( x( j2 ) )+abs( x( n+j2 ) ) ) if( xmax>one ) then rec = one / xmax if( max( work( j1 ), work( j2 ) )>( bignum-xj ) / xmax ) then call stdlib${ii}$_${ri}$scal( n2, rec, x, 1_${ik}$ ) scale = scale*rec xmax = xmax*rec end if end if d( 1_${ik}$, 1_${ik}$ ) = x( j1 ) - stdlib${ii}$_${ri}$dot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x,1_${ik}$ ) d( 2_${ik}$, 1_${ik}$ ) = x( j2 ) - stdlib${ii}$_${ri}$dot( j1-1, t( 1_${ik}$, j2 ), 1_${ik}$, x,1_${ik}$ ) d( 1_${ik}$, 2_${ik}$ ) = x( n+j1 ) - stdlib${ii}$_${ri}$dot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ ) d( 2_${ik}$, 2_${ik}$ ) = x( n+j2 ) - stdlib${ii}$_${ri}$dot( j1-1, t( 1_${ik}$, j2 ), 1_${ik}$,x( n+1 ), 1_${ik}$ ) d( 1_${ik}$, 1_${ik}$ ) = d( 1_${ik}$, 1_${ik}$ ) - b( j1 )*x( n+1 ) d( 2_${ik}$, 1_${ik}$ ) = d( 2_${ik}$, 1_${ik}$ ) - b( j2 )*x( n+1 ) d( 1_${ik}$, 2_${ik}$ ) = d( 1_${ik}$, 2_${ik}$ ) + b( j1 )*x( 1_${ik}$ ) d( 2_${ik}$, 2_${ik}$ ) = d( 2_${ik}$, 2_${ik}$ ) + b( j2 )*x( 1_${ik}$ ) call stdlib${ii}$_${ri}$laln2( .true., 2_${ik}$, 2_${ik}$, sminw, one, t( j1, j1 ),ldt, one, one, d,& 2_${ik}$, zero, w, v, 2_${ik}$,scaloc, xnorm, ierr ) if( ierr/=0_${ik}$ )info = 2_${ik}$ if( scaloc/=one ) then call stdlib${ii}$_${ri}$scal( n2, scaloc, x, 1_${ik}$ ) scale = scaloc*scale end if x( j1 ) = v( 1_${ik}$, 1_${ik}$ ) x( j2 ) = v( 2_${ik}$, 1_${ik}$ ) x( n+j1 ) = v( 1_${ik}$, 2_${ik}$ ) x( n+j2 ) = v( 2_${ik}$, 2_${ik}$ ) xmax = max( abs( x( j1 ) )+abs( x( n+j1 ) ),abs( x( j2 ) )+abs( x( n+j2 ) )& , xmax ) end if end do loop_80 end if end if return end subroutine stdlib${ii}$_${ri}$laqtr #:endif #:endfor pure module subroutine stdlib${ii}$_slahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, & !! SLAHQR is an auxiliary routine called by SHSEQR to update the !! eigenvalues and Schur decomposition already computed by SHSEQR, by !! dealing with the Hessenberg submatrix in rows and columns ILO to !! IHI. info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, n integer(${ik}$), intent(out) :: info logical(lk), intent(in) :: wantt, wantz ! Array Arguments real(sp), intent(inout) :: h(ldh,*), z(ldz,*) real(sp), intent(out) :: wi(*), wr(*) ! ========================================================= ! Parameters real(sp), parameter :: dat1 = 3.0_sp/4.0_sp real(sp), parameter :: dat2 = -0.4375_sp integer(${ik}$), parameter :: kexsh = 10_${ik}$ ! Local Scalars real(sp) :: aa, ab, ba, bb, cs, det, h11, h12, h21, h21s, h22, rt1i, rt1r, rt2i, rt2r, & rtdisc, s, safmax, safmin, smlnum, sn, sum, t1, t2, t3, tr, tst, ulp, v2, v3 integer(${ik}$) :: i, i1, i2, its, itmax, j, k, l, m, nh, nr, nz, kdefl ! Local Arrays real(sp) :: v(3_${ik}$) ! Intrinsic Functions ! Executable Statements info = 0_${ik}$ ! quick return if possible if( n==0 )return if( ilo==ihi ) then wr( ilo ) = h( ilo, ilo ) wi( ilo ) = zero return end if ! ==== clear out the trash ==== do j = ilo, ihi - 3 h( j+2, j ) = zero h( j+3, j ) = zero end do if( ilo<=ihi-2 )h( ihi, ihi-2 ) = zero nh = ihi - ilo + 1_${ik}$ nz = ihiz - iloz + 1_${ik}$ ! set machine-dependent constants for the stopping criterion. safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_slabad( safmin, safmax ) ulp = stdlib${ii}$_slamch( 'PRECISION' ) smlnum = safmin*( real( nh,KIND=sp) / ulp ) ! i1 and i2 are the indices of the first row and last column of h ! to which transformations must be applied. if eigenvalues only are ! being computed, i1 and i2 are set inside the main loop. if( wantt ) then i1 = 1_${ik}$ i2 = n end if ! itmax is the total number of qr iterations allowed. itmax = 30_${ik}$ * max( 10_${ik}$, nh ) ! kdefl counts the number of iterations since a deflation kdefl = 0_${ik}$ ! the main loop begins here. i is the loop index and decreases from ! ihi to ilo in steps of 1 or 2. each iteration of the loop works ! with the active submatrix in rows and columns l to i. ! eigenvalues i+1 to ihi have already converged. either l = ilo or ! h(l,l-1) is negligible so that the matrix splits. i = ihi 20 continue l = ilo if( i<ilo )go to 160 ! perform qr iterations on rows and columns ilo to i until a ! submatrix of order 1 or 2 splits off at the bottom because a ! subdiagonal element has become negligible. loop_140: do its = 0, itmax ! look for a single small subdiagonal element. do k = i, l + 1, -1 if( abs( h( k, k-1 ) )<=smlnum )go to 40 tst = abs( h( k-1, k-1 ) ) + abs( h( k, k ) ) if( tst==zero ) then if( k-2>=ilo )tst = tst + abs( h( k-1, k-2 ) ) if( k+1<=ihi )tst = tst + abs( h( k+1, k ) ) end if ! ==== the following is a conservative small subdiagonal ! . deflation criterion due to ahues ! . 1997). it has better mathematical foundation and ! . improves accuracy in some cases. ==== if( abs( h( k, k-1 ) )<=ulp*tst ) then ab = max( abs( h( k, k-1 ) ), abs( h( k-1, k ) ) ) ba = min( abs( h( k, k-1 ) ), abs( h( k-1, k ) ) ) aa = max( abs( h( k, k ) ),abs( h( k-1, k-1 )-h( k, k ) ) ) bb = min( abs( h( k, k ) ),abs( h( k-1, k-1 )-h( k, k ) ) ) s = aa + ab if( ba*( ab / s )<=max( smlnum,ulp*( bb*( aa / s ) ) ) )go to 40 end if end do 40 continue l = k if( l>ilo ) then ! h(l,l-1) is negligible h( l, l-1 ) = zero end if ! exit from loop if a submatrix of order 1 or 2 has split off. if( l>=i-1 )go to 150 kdefl = kdefl + 1_${ik}$ ! now the active submatrix is in rows and columns l to i. if ! eigenvalues only are being computed, only the active submatrix ! need be transformed. if( .not.wantt ) then i1 = l i2 = i end if if( mod(kdefl,2_${ik}$*kexsh)==0_${ik}$ ) then ! exceptional shift. s = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) h11 = dat1*s + h( i, i ) h12 = dat2*s h21 = s h22 = h11 else if( mod(kdefl,kexsh)==0_${ik}$ ) then ! exceptional shift. s = abs( h( l+1, l ) ) + abs( h( l+2, l+1 ) ) h11 = dat1*s + h( l, l ) h12 = dat2*s h21 = s h22 = h11 else ! prepare to use francis' double shift ! (i.e. 2nd degree generalized rayleigh quotient) h11 = h( i-1, i-1 ) h21 = h( i, i-1 ) h12 = h( i-1, i ) h22 = h( i, i ) end if s = abs( h11 ) + abs( h12 ) + abs( h21 ) + abs( h22 ) if( s==zero ) then rt1r = zero rt1i = zero rt2r = zero rt2i = zero else h11 = h11 / s h21 = h21 / s h12 = h12 / s h22 = h22 / s tr = ( h11+h22 ) / two det = ( h11-tr )*( h22-tr ) - h12*h21 rtdisc = sqrt( abs( det ) ) if( det>=zero ) then ! ==== complex conjugate shifts ==== rt1r = tr*s rt2r = rt1r rt1i = rtdisc*s rt2i = -rt1i else ! ==== realshifts (use only one of them,KIND=sp) ==== rt1r = tr + rtdisc rt2r = tr - rtdisc if( abs( rt1r-h22 )<=abs( rt2r-h22 ) ) then rt1r = rt1r*s rt2r = rt1r else rt2r = rt2r*s rt1r = rt2r end if rt1i = zero rt2i = zero end if end if ! look for two consecutive small subdiagonal elements. do m = i - 2, l, -1 ! determine the effect of starting the double-shift qr ! iteration at row m, and see if this would make h(m,m-1) ! negligible. (the following uses scaling to avoid ! overflows and most underflows.) h21s = h( m+1, m ) s = abs( h( m, m )-rt2r ) + abs( rt2i ) + abs( h21s ) h21s = h( m+1, m ) / s v( 1_${ik}$ ) = h21s*h( m, m+1 ) + ( h( m, m )-rt1r )*( ( h( m, m )-rt2r ) / s ) - & rt1i*( rt2i / s ) v( 2_${ik}$ ) = h21s*( h( m, m )+h( m+1, m+1 )-rt1r-rt2r ) v( 3_${ik}$ ) = h21s*h( m+2, m+1 ) s = abs( v( 1_${ik}$ ) ) + abs( v( 2_${ik}$ ) ) + abs( v( 3_${ik}$ ) ) v( 1_${ik}$ ) = v( 1_${ik}$ ) / s v( 2_${ik}$ ) = v( 2_${ik}$ ) / s v( 3_${ik}$ ) = v( 3_${ik}$ ) / s if( m==l )go to 60 if( abs( h( m, m-1 ) )*( abs( v( 2_${ik}$ ) )+abs( v( 3_${ik}$ ) ) )<=ulp*abs( v( 1_${ik}$ ) )*( abs( & h( m-1, m-1 ) )+abs( h( m,m ) )+abs( h( m+1, m+1 ) ) ) )go to 60 end do 60 continue ! double-shift qr step loop_130: do k = m, i - 1 ! the first iteration of this loop determines a reflection g ! from the vector v and applies it from left and right to h, ! thus creating a nonzero bulge below the subdiagonal. ! each subsequent iteration determines a reflection g to ! restore the hessenberg form in the (k-1)th column, and thus ! chases the bulge one step toward the bottom of the active ! submatrix. nr is the order of g. nr = min( 3_${ik}$, i-k+1 ) if( k>m )call stdlib${ii}$_scopy( nr, h( k, k-1 ), 1_${ik}$, v, 1_${ik}$ ) call stdlib${ii}$_slarfg( nr, v( 1_${ik}$ ), v( 2_${ik}$ ), 1_${ik}$, t1 ) if( k>m ) then h( k, k-1 ) = v( 1_${ik}$ ) h( k+1, k-1 ) = zero if( k<i-1 )h( k+2, k-1 ) = zero else if( m>l ) then ! ==== use the following instead of ! . h( k, k-1 ) = -h( k, k-1 ) to ! . avoid a bug when v(2) and v(3) ! . underflow. ==== h( k, k-1 ) = h( k, k-1 )*( one-t1 ) end if v2 = v( 2_${ik}$ ) t2 = t1*v2 if( nr==3_${ik}$ ) then v3 = v( 3_${ik}$ ) t3 = t1*v3 ! apply g from the left to transform the rows of the matrix ! in columns k to i2. do j = k, i2 sum = h( k, j ) + v2*h( k+1, j ) + v3*h( k+2, j ) h( k, j ) = h( k, j ) - sum*t1 h( k+1, j ) = h( k+1, j ) - sum*t2 h( k+2, j ) = h( k+2, j ) - sum*t3 end do ! apply g from the right to transform the columns of the ! matrix in rows i1 to min(k+3,i). do j = i1, min( k+3, i ) sum = h( j, k ) + v2*h( j, k+1 ) + v3*h( j, k+2 ) h( j, k ) = h( j, k ) - sum*t1 h( j, k+1 ) = h( j, k+1 ) - sum*t2 h( j, k+2 ) = h( j, k+2 ) - sum*t3 end do if( wantz ) then ! accumulate transformations in the matrix z do j = iloz, ihiz sum = z( j, k ) + v2*z( j, k+1 ) + v3*z( j, k+2 ) z( j, k ) = z( j, k ) - sum*t1 z( j, k+1 ) = z( j, k+1 ) - sum*t2 z( j, k+2 ) = z( j, k+2 ) - sum*t3 end do end if else if( nr==2_${ik}$ ) then ! apply g from the left to transform the rows of the matrix ! in columns k to i2. do j = k, i2 sum = h( k, j ) + v2*h( k+1, j ) h( k, j ) = h( k, j ) - sum*t1 h( k+1, j ) = h( k+1, j ) - sum*t2 end do ! apply g from the right to transform the columns of the ! matrix in rows i1 to min(k+3,i). do j = i1, i sum = h( j, k ) + v2*h( j, k+1 ) h( j, k ) = h( j, k ) - sum*t1 h( j, k+1 ) = h( j, k+1 ) - sum*t2 end do if( wantz ) then ! accumulate transformations in the matrix z do j = iloz, ihiz sum = z( j, k ) + v2*z( j, k+1 ) z( j, k ) = z( j, k ) - sum*t1 z( j, k+1 ) = z( j, k+1 ) - sum*t2 end do end if end if end do loop_130 end do loop_140 ! failure to converge in remaining number of iterations info = i return 150 continue if( l==i ) then ! h(i,i-1) is negligible: one eigenvalue has converged. wr( i ) = h( i, i ) wi( i ) = zero else if( l==i-1 ) then ! h(i-1,i-2) is negligible: a pair of eigenvalues have converged. ! transform the 2-by-2 submatrix to standard schur form, ! and compute and store the eigenvalues. call stdlib${ii}$_slanv2( h( i-1, i-1 ), h( i-1, i ), h( i, i-1 ),h( i, i ), wr( i-1 ), & wi( i-1 ), wr( i ), wi( i ),cs, sn ) if( wantt ) then ! apply the transformation to the rest of h. if( i2>i )call stdlib${ii}$_srot( i2-i, h( i-1, i+1 ), ldh, h( i, i+1 ), ldh,cs, sn ) call stdlib${ii}$_srot( i-i1-1, h( i1, i-1 ), 1_${ik}$, h( i1, i ), 1_${ik}$, cs, sn ) end if if( wantz ) then ! apply the transformation to z. call stdlib${ii}$_srot( nz, z( iloz, i-1 ), 1_${ik}$, z( iloz, i ), 1_${ik}$, cs, sn ) end if end if ! reset deflation counter kdefl = 0_${ik}$ ! return to start of the main loop with new value of i. i = l - 1_${ik}$ go to 20 160 continue return end subroutine stdlib${ii}$_slahqr pure module subroutine stdlib${ii}$_dlahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, & !! DLAHQR is an auxiliary routine called by DHSEQR to update the !! eigenvalues and Schur decomposition already computed by DHSEQR, by !! dealing with the Hessenberg submatrix in rows and columns ILO to !! IHI. info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, n integer(${ik}$), intent(out) :: info logical(lk), intent(in) :: wantt, wantz ! Array Arguments real(dp), intent(inout) :: h(ldh,*), z(ldz,*) real(dp), intent(out) :: wi(*), wr(*) ! ========================================================= ! Parameters real(dp), parameter :: dat1 = 3.0_dp/4.0_dp real(dp), parameter :: dat2 = -0.4375_dp integer(${ik}$), parameter :: kexsh = 10_${ik}$ ! Local Scalars real(dp) :: aa, ab, ba, bb, cs, det, h11, h12, h21, h21s, h22, rt1i, rt1r, rt2i, rt2r, & rtdisc, s, safmax, safmin, smlnum, sn, sum, t1, t2, t3, tr, tst, ulp, v2, v3 integer(${ik}$) :: i, i1, i2, its, itmax, j, k, l, m, nh, nr, nz, kdefl ! Local Arrays real(dp) :: v(3_${ik}$) ! Intrinsic Functions ! Executable Statements info = 0_${ik}$ ! quick return if possible if( n==0 )return if( ilo==ihi ) then wr( ilo ) = h( ilo, ilo ) wi( ilo ) = zero return end if ! ==== clear out the trash ==== do j = ilo, ihi - 3 h( j+2, j ) = zero h( j+3, j ) = zero end do if( ilo<=ihi-2 )h( ihi, ihi-2 ) = zero nh = ihi - ilo + 1_${ik}$ nz = ihiz - iloz + 1_${ik}$ ! set machine-dependent constants for the stopping criterion. safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_dlabad( safmin, safmax ) ulp = stdlib${ii}$_dlamch( 'PRECISION' ) smlnum = safmin*( real( nh,KIND=dp) / ulp ) ! i1 and i2 are the indices of the first row and last column of h ! to which transformations must be applied. if eigenvalues only are ! being computed, i1 and i2 are set inside the main loop. if( wantt ) then i1 = 1_${ik}$ i2 = n end if ! itmax is the total number of qr iterations allowed. itmax = 30_${ik}$ * max( 10_${ik}$, nh ) ! kdefl counts the number of iterations since a deflation kdefl = 0_${ik}$ ! the main loop begins here. i is the loop index and decreases from ! ihi to ilo in steps of 1 or 2. each iteration of the loop works ! with the active submatrix in rows and columns l to i. ! eigenvalues i+1 to ihi have already converged. either l = ilo or ! h(l,l-1) is negligible so that the matrix splits. i = ihi 20 continue l = ilo if( i<ilo )go to 160 ! perform qr iterations on rows and columns ilo to i until a ! submatrix of order 1 or 2 splits off at the bottom because a ! subdiagonal element has become negligible. loop_140: do its = 0, itmax ! look for a single small subdiagonal element. do k = i, l + 1, -1 if( abs( h( k, k-1 ) )<=smlnum )go to 40 tst = abs( h( k-1, k-1 ) ) + abs( h( k, k ) ) if( tst==zero ) then if( k-2>=ilo )tst = tst + abs( h( k-1, k-2 ) ) if( k+1<=ihi )tst = tst + abs( h( k+1, k ) ) end if ! ==== the following is a conservative small subdiagonal ! . deflation criterion due to ahues ! . 1997). it has better mathematical foundation and ! . improves accuracy in some cases. ==== if( abs( h( k, k-1 ) )<=ulp*tst ) then ab = max( abs( h( k, k-1 ) ), abs( h( k-1, k ) ) ) ba = min( abs( h( k, k-1 ) ), abs( h( k-1, k ) ) ) aa = max( abs( h( k, k ) ),abs( h( k-1, k-1 )-h( k, k ) ) ) bb = min( abs( h( k, k ) ),abs( h( k-1, k-1 )-h( k, k ) ) ) s = aa + ab if( ba*( ab / s )<=max( smlnum,ulp*( bb*( aa / s ) ) ) )go to 40 end if end do 40 continue l = k if( l>ilo ) then ! h(l,l-1) is negligible h( l, l-1 ) = zero end if ! exit from loop if a submatrix of order 1 or 2 has split off. if( l>=i-1 )go to 150 kdefl = kdefl + 1_${ik}$ ! now the active submatrix is in rows and columns l to i. if ! eigenvalues only are being computed, only the active submatrix ! need be transformed. if( .not.wantt ) then i1 = l i2 = i end if if( mod(kdefl,2_${ik}$*kexsh)==0_${ik}$ ) then ! exceptional shift. s = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) h11 = dat1*s + h( i, i ) h12 = dat2*s h21 = s h22 = h11 else if( mod(kdefl,kexsh)==0_${ik}$ ) then ! exceptional shift. s = abs( h( l+1, l ) ) + abs( h( l+2, l+1 ) ) h11 = dat1*s + h( l, l ) h12 = dat2*s h21 = s h22 = h11 else ! prepare to use francis' double shift ! (i.e. 2nd degree generalized rayleigh quotient) h11 = h( i-1, i-1 ) h21 = h( i, i-1 ) h12 = h( i-1, i ) h22 = h( i, i ) end if s = abs( h11 ) + abs( h12 ) + abs( h21 ) + abs( h22 ) if( s==zero ) then rt1r = zero rt1i = zero rt2r = zero rt2i = zero else h11 = h11 / s h21 = h21 / s h12 = h12 / s h22 = h22 / s tr = ( h11+h22 ) / two det = ( h11-tr )*( h22-tr ) - h12*h21 rtdisc = sqrt( abs( det ) ) if( det>=zero ) then ! ==== complex conjugate shifts ==== rt1r = tr*s rt2r = rt1r rt1i = rtdisc*s rt2i = -rt1i else ! ==== realshifts (use only one of them,KIND=dp) ==== rt1r = tr + rtdisc rt2r = tr - rtdisc if( abs( rt1r-h22 )<=abs( rt2r-h22 ) ) then rt1r = rt1r*s rt2r = rt1r else rt2r = rt2r*s rt1r = rt2r end if rt1i = zero rt2i = zero end if end if ! look for two consecutive small subdiagonal elements. do m = i - 2, l, -1 ! determine the effect of starting the double-shift qr ! iteration at row m, and see if this would make h(m,m-1) ! negligible. (the following uses scaling to avoid ! overflows and most underflows.) h21s = h( m+1, m ) s = abs( h( m, m )-rt2r ) + abs( rt2i ) + abs( h21s ) h21s = h( m+1, m ) / s v( 1_${ik}$ ) = h21s*h( m, m+1 ) + ( h( m, m )-rt1r )*( ( h( m, m )-rt2r ) / s ) - & rt1i*( rt2i / s ) v( 2_${ik}$ ) = h21s*( h( m, m )+h( m+1, m+1 )-rt1r-rt2r ) v( 3_${ik}$ ) = h21s*h( m+2, m+1 ) s = abs( v( 1_${ik}$ ) ) + abs( v( 2_${ik}$ ) ) + abs( v( 3_${ik}$ ) ) v( 1_${ik}$ ) = v( 1_${ik}$ ) / s v( 2_${ik}$ ) = v( 2_${ik}$ ) / s v( 3_${ik}$ ) = v( 3_${ik}$ ) / s if( m==l )go to 60 if( abs( h( m, m-1 ) )*( abs( v( 2_${ik}$ ) )+abs( v( 3_${ik}$ ) ) )<=ulp*abs( v( 1_${ik}$ ) )*( abs( & h( m-1, m-1 ) )+abs( h( m,m ) )+abs( h( m+1, m+1 ) ) ) )go to 60 end do 60 continue ! double-shift qr step loop_130: do k = m, i - 1 ! the first iteration of this loop determines a reflection g ! from the vector v and applies it from left and right to h, ! thus creating a nonzero bulge below the subdiagonal. ! each subsequent iteration determines a reflection g to ! restore the hessenberg form in the (k-1)th column, and thus ! chases the bulge one step toward the bottom of the active ! submatrix. nr is the order of g. nr = min( 3_${ik}$, i-k+1 ) if( k>m )call stdlib${ii}$_dcopy( nr, h( k, k-1 ), 1_${ik}$, v, 1_${ik}$ ) call stdlib${ii}$_dlarfg( nr, v( 1_${ik}$ ), v( 2_${ik}$ ), 1_${ik}$, t1 ) if( k>m ) then h( k, k-1 ) = v( 1_${ik}$ ) h( k+1, k-1 ) = zero if( k<i-1 )h( k+2, k-1 ) = zero else if( m>l ) then ! ==== use the following instead of ! . h( k, k-1 ) = -h( k, k-1 ) to ! . avoid a bug when v(2) and v(3) ! . underflow. ==== h( k, k-1 ) = h( k, k-1 )*( one-t1 ) end if v2 = v( 2_${ik}$ ) t2 = t1*v2 if( nr==3_${ik}$ ) then v3 = v( 3_${ik}$ ) t3 = t1*v3 ! apply g from the left to transform the rows of the matrix ! in columns k to i2. do j = k, i2 sum = h( k, j ) + v2*h( k+1, j ) + v3*h( k+2, j ) h( k, j ) = h( k, j ) - sum*t1 h( k+1, j ) = h( k+1, j ) - sum*t2 h( k+2, j ) = h( k+2, j ) - sum*t3 end do ! apply g from the right to transform the columns of the ! matrix in rows i1 to min(k+3,i). do j = i1, min( k+3, i ) sum = h( j, k ) + v2*h( j, k+1 ) + v3*h( j, k+2 ) h( j, k ) = h( j, k ) - sum*t1 h( j, k+1 ) = h( j, k+1 ) - sum*t2 h( j, k+2 ) = h( j, k+2 ) - sum*t3 end do if( wantz ) then ! accumulate transformations in the matrix z do j = iloz, ihiz sum = z( j, k ) + v2*z( j, k+1 ) + v3*z( j, k+2 ) z( j, k ) = z( j, k ) - sum*t1 z( j, k+1 ) = z( j, k+1 ) - sum*t2 z( j, k+2 ) = z( j, k+2 ) - sum*t3 end do end if else if( nr==2_${ik}$ ) then ! apply g from the left to transform the rows of the matrix ! in columns k to i2. do j = k, i2 sum = h( k, j ) + v2*h( k+1, j ) h( k, j ) = h( k, j ) - sum*t1 h( k+1, j ) = h( k+1, j ) - sum*t2 end do ! apply g from the right to transform the columns of the ! matrix in rows i1 to min(k+3,i). do j = i1, i sum = h( j, k ) + v2*h( j, k+1 ) h( j, k ) = h( j, k ) - sum*t1 h( j, k+1 ) = h( j, k+1 ) - sum*t2 end do if( wantz ) then ! accumulate transformations in the matrix z do j = iloz, ihiz sum = z( j, k ) + v2*z( j, k+1 ) z( j, k ) = z( j, k ) - sum*t1 z( j, k+1 ) = z( j, k+1 ) - sum*t2 end do end if end if end do loop_130 end do loop_140 ! failure to converge in remaining number of iterations info = i return 150 continue if( l==i ) then ! h(i,i-1) is negligible: one eigenvalue has converged. wr( i ) = h( i, i ) wi( i ) = zero else if( l==i-1 ) then ! h(i-1,i-2) is negligible: a pair of eigenvalues have converged. ! transform the 2-by-2 submatrix to standard schur form, ! and compute and store the eigenvalues. call stdlib${ii}$_dlanv2( h( i-1, i-1 ), h( i-1, i ), h( i, i-1 ),h( i, i ), wr( i-1 ), & wi( i-1 ), wr( i ), wi( i ),cs, sn ) if( wantt ) then ! apply the transformation to the rest of h. if( i2>i )call stdlib${ii}$_drot( i2-i, h( i-1, i+1 ), ldh, h( i, i+1 ), ldh,cs, sn ) call stdlib${ii}$_drot( i-i1-1, h( i1, i-1 ), 1_${ik}$, h( i1, i ), 1_${ik}$, cs, sn ) end if if( wantz ) then ! apply the transformation to z. call stdlib${ii}$_drot( nz, z( iloz, i-1 ), 1_${ik}$, z( iloz, i ), 1_${ik}$, cs, sn ) end if end if ! reset deflation counter kdefl = 0_${ik}$ ! return to start of the main loop with new value of i. i = l - 1_${ik}$ go to 20 160 continue return end subroutine stdlib${ii}$_dlahqr #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$lahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, & !! DLAHQR: is an auxiliary routine called by DHSEQR to update the !! eigenvalues and Schur decomposition already computed by DHSEQR, by !! dealing with the Hessenberg submatrix in rows and columns ILO to !! IHI. info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, n integer(${ik}$), intent(out) :: info logical(lk), intent(in) :: wantt, wantz ! Array Arguments real(${rk}$), intent(inout) :: h(ldh,*), z(ldz,*) real(${rk}$), intent(out) :: wi(*), wr(*) ! ========================================================= ! Parameters real(${rk}$), parameter :: dat1 = 3.0_${rk}$/4.0_${rk}$ real(${rk}$), parameter :: dat2 = -0.4375_${rk}$ integer(${ik}$), parameter :: kexsh = 10_${ik}$ ! Local Scalars real(${rk}$) :: aa, ab, ba, bb, cs, det, h11, h12, h21, h21s, h22, rt1i, rt1r, rt2i, rt2r, & rtdisc, s, safmax, safmin, smlnum, sn, sum, t1, t2, t3, tr, tst, ulp, v2, v3 integer(${ik}$) :: i, i1, i2, its, itmax, j, k, l, m, nh, nr, nz, kdefl ! Local Arrays real(${rk}$) :: v(3_${ik}$) ! Intrinsic Functions ! Executable Statements info = 0_${ik}$ ! quick return if possible if( n==0 )return if( ilo==ihi ) then wr( ilo ) = h( ilo, ilo ) wi( ilo ) = zero return end if ! ==== clear out the trash ==== do j = ilo, ihi - 3 h( j+2, j ) = zero h( j+3, j ) = zero end do if( ilo<=ihi-2 )h( ihi, ihi-2 ) = zero nh = ihi - ilo + 1_${ik}$ nz = ihiz - iloz + 1_${ik}$ ! set machine-dependent constants for the stopping criterion. safmin = stdlib${ii}$_${ri}$lamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_${ri}$labad( safmin, safmax ) ulp = stdlib${ii}$_${ri}$lamch( 'PRECISION' ) smlnum = safmin*( real( nh,KIND=${rk}$) / ulp ) ! i1 and i2 are the indices of the first row and last column of h ! to which transformations must be applied. if eigenvalues only are ! being computed, i1 and i2 are set inside the main loop. if( wantt ) then i1 = 1_${ik}$ i2 = n end if ! itmax is the total number of qr iterations allowed. itmax = 30_${ik}$ * max( 10_${ik}$, nh ) ! kdefl counts the number of iterations since a deflation kdefl = 0_${ik}$ ! the main loop begins here. i is the loop index and decreases from ! ihi to ilo in steps of 1 or 2. each iteration of the loop works ! with the active submatrix in rows and columns l to i. ! eigenvalues i+1 to ihi have already converged. either l = ilo or ! h(l,l-1) is negligible so that the matrix splits. i = ihi 20 continue l = ilo if( i<ilo )go to 160 ! perform qr iterations on rows and columns ilo to i until a ! submatrix of order 1 or 2 splits off at the bottom because a ! subdiagonal element has become negligible. loop_140: do its = 0, itmax ! look for a single small subdiagonal element. do k = i, l + 1, -1 if( abs( h( k, k-1 ) )<=smlnum )go to 40 tst = abs( h( k-1, k-1 ) ) + abs( h( k, k ) ) if( tst==zero ) then if( k-2>=ilo )tst = tst + abs( h( k-1, k-2 ) ) if( k+1<=ihi )tst = tst + abs( h( k+1, k ) ) end if ! ==== the following is a conservative small subdiagonal ! . deflation criterion due to ahues ! . 1997). it has better mathematical foundation and ! . improves accuracy in some cases. ==== if( abs( h( k, k-1 ) )<=ulp*tst ) then ab = max( abs( h( k, k-1 ) ), abs( h( k-1, k ) ) ) ba = min( abs( h( k, k-1 ) ), abs( h( k-1, k ) ) ) aa = max( abs( h( k, k ) ),abs( h( k-1, k-1 )-h( k, k ) ) ) bb = min( abs( h( k, k ) ),abs( h( k-1, k-1 )-h( k, k ) ) ) s = aa + ab if( ba*( ab / s )<=max( smlnum,ulp*( bb*( aa / s ) ) ) )go to 40 end if end do 40 continue l = k if( l>ilo ) then ! h(l,l-1) is negligible h( l, l-1 ) = zero end if ! exit from loop if a submatrix of order 1 or 2 has split off. if( l>=i-1 )go to 150 kdefl = kdefl + 1_${ik}$ ! now the active submatrix is in rows and columns l to i. if ! eigenvalues only are being computed, only the active submatrix ! need be transformed. if( .not.wantt ) then i1 = l i2 = i end if if( mod(kdefl,2_${ik}$*kexsh)==0_${ik}$ ) then ! exceptional shift. s = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) h11 = dat1*s + h( i, i ) h12 = dat2*s h21 = s h22 = h11 else if( mod(kdefl,kexsh)==0_${ik}$ ) then ! exceptional shift. s = abs( h( l+1, l ) ) + abs( h( l+2, l+1 ) ) h11 = dat1*s + h( l, l ) h12 = dat2*s h21 = s h22 = h11 else ! prepare to use francis' double shift ! (i.e. 2nd degree generalized rayleigh quotient) h11 = h( i-1, i-1 ) h21 = h( i, i-1 ) h12 = h( i-1, i ) h22 = h( i, i ) end if s = abs( h11 ) + abs( h12 ) + abs( h21 ) + abs( h22 ) if( s==zero ) then rt1r = zero rt1i = zero rt2r = zero rt2i = zero else h11 = h11 / s h21 = h21 / s h12 = h12 / s h22 = h22 / s tr = ( h11+h22 ) / two det = ( h11-tr )*( h22-tr ) - h12*h21 rtdisc = sqrt( abs( det ) ) if( det>=zero ) then ! ==== complex conjugate shifts ==== rt1r = tr*s rt2r = rt1r rt1i = rtdisc*s rt2i = -rt1i else ! ==== realshifts (use only one of them,KIND=${rk}$) ==== rt1r = tr + rtdisc rt2r = tr - rtdisc if( abs( rt1r-h22 )<=abs( rt2r-h22 ) ) then rt1r = rt1r*s rt2r = rt1r else rt2r = rt2r*s rt1r = rt2r end if rt1i = zero rt2i = zero end if end if ! look for two consecutive small subdiagonal elements. do m = i - 2, l, -1 ! determine the effect of starting the double-shift qr ! iteration at row m, and see if this would make h(m,m-1) ! negligible. (the following uses scaling to avoid ! overflows and most underflows.) h21s = h( m+1, m ) s = abs( h( m, m )-rt2r ) + abs( rt2i ) + abs( h21s ) h21s = h( m+1, m ) / s v( 1_${ik}$ ) = h21s*h( m, m+1 ) + ( h( m, m )-rt1r )*( ( h( m, m )-rt2r ) / s ) - & rt1i*( rt2i / s ) v( 2_${ik}$ ) = h21s*( h( m, m )+h( m+1, m+1 )-rt1r-rt2r ) v( 3_${ik}$ ) = h21s*h( m+2, m+1 ) s = abs( v( 1_${ik}$ ) ) + abs( v( 2_${ik}$ ) ) + abs( v( 3_${ik}$ ) ) v( 1_${ik}$ ) = v( 1_${ik}$ ) / s v( 2_${ik}$ ) = v( 2_${ik}$ ) / s v( 3_${ik}$ ) = v( 3_${ik}$ ) / s if( m==l )go to 60 if( abs( h( m, m-1 ) )*( abs( v( 2_${ik}$ ) )+abs( v( 3_${ik}$ ) ) )<=ulp*abs( v( 1_${ik}$ ) )*( abs( & h( m-1, m-1 ) )+abs( h( m,m ) )+abs( h( m+1, m+1 ) ) ) )go to 60 end do 60 continue ! double-shift qr step loop_130: do k = m, i - 1 ! the first iteration of this loop determines a reflection g ! from the vector v and applies it from left and right to h, ! thus creating a nonzero bulge below the subdiagonal. ! each subsequent iteration determines a reflection g to ! restore the hessenberg form in the (k-1)th column, and thus ! chases the bulge one step toward the bottom of the active ! submatrix. nr is the order of g. nr = min( 3_${ik}$, i-k+1 ) if( k>m )call stdlib${ii}$_${ri}$copy( nr, h( k, k-1 ), 1_${ik}$, v, 1_${ik}$ ) call stdlib${ii}$_${ri}$larfg( nr, v( 1_${ik}$ ), v( 2_${ik}$ ), 1_${ik}$, t1 ) if( k>m ) then h( k, k-1 ) = v( 1_${ik}$ ) h( k+1, k-1 ) = zero if( k<i-1 )h( k+2, k-1 ) = zero else if( m>l ) then ! ==== use the following instead of ! . h( k, k-1 ) = -h( k, k-1 ) to ! . avoid a bug when v(2) and v(3) ! . underflow. ==== h( k, k-1 ) = h( k, k-1 )*( one-t1 ) end if v2 = v( 2_${ik}$ ) t2 = t1*v2 if( nr==3_${ik}$ ) then v3 = v( 3_${ik}$ ) t3 = t1*v3 ! apply g from the left to transform the rows of the matrix ! in columns k to i2. do j = k, i2 sum = h( k, j ) + v2*h( k+1, j ) + v3*h( k+2, j ) h( k, j ) = h( k, j ) - sum*t1 h( k+1, j ) = h( k+1, j ) - sum*t2 h( k+2, j ) = h( k+2, j ) - sum*t3 end do ! apply g from the right to transform the columns of the ! matrix in rows i1 to min(k+3,i). do j = i1, min( k+3, i ) sum = h( j, k ) + v2*h( j, k+1 ) + v3*h( j, k+2 ) h( j, k ) = h( j, k ) - sum*t1 h( j, k+1 ) = h( j, k+1 ) - sum*t2 h( j, k+2 ) = h( j, k+2 ) - sum*t3 end do if( wantz ) then ! accumulate transformations in the matrix z do j = iloz, ihiz sum = z( j, k ) + v2*z( j, k+1 ) + v3*z( j, k+2 ) z( j, k ) = z( j, k ) - sum*t1 z( j, k+1 ) = z( j, k+1 ) - sum*t2 z( j, k+2 ) = z( j, k+2 ) - sum*t3 end do end if else if( nr==2_${ik}$ ) then ! apply g from the left to transform the rows of the matrix ! in columns k to i2. do j = k, i2 sum = h( k, j ) + v2*h( k+1, j ) h( k, j ) = h( k, j ) - sum*t1 h( k+1, j ) = h( k+1, j ) - sum*t2 end do ! apply g from the right to transform the columns of the ! matrix in rows i1 to min(k+3,i). do j = i1, i sum = h( j, k ) + v2*h( j, k+1 ) h( j, k ) = h( j, k ) - sum*t1 h( j, k+1 ) = h( j, k+1 ) - sum*t2 end do if( wantz ) then ! accumulate transformations in the matrix z do j = iloz, ihiz sum = z( j, k ) + v2*z( j, k+1 ) z( j, k ) = z( j, k ) - sum*t1 z( j, k+1 ) = z( j, k+1 ) - sum*t2 end do end if end if end do loop_130 end do loop_140 ! failure to converge in remaining number of iterations info = i return 150 continue if( l==i ) then ! h(i,i-1) is negligible: one eigenvalue has converged. wr( i ) = h( i, i ) wi( i ) = zero else if( l==i-1 ) then ! h(i-1,i-2) is negligible: a pair of eigenvalues have converged. ! transform the 2-by-2 submatrix to standard schur form, ! and compute and store the eigenvalues. call stdlib${ii}$_${ri}$lanv2( h( i-1, i-1 ), h( i-1, i ), h( i, i-1 ),h( i, i ), wr( i-1 ), & wi( i-1 ), wr( i ), wi( i ),cs, sn ) if( wantt ) then ! apply the transformation to the rest of h. if( i2>i )call stdlib${ii}$_${ri}$rot( i2-i, h( i-1, i+1 ), ldh, h( i, i+1 ), ldh,cs, sn ) call stdlib${ii}$_${ri}$rot( i-i1-1, h( i1, i-1 ), 1_${ik}$, h( i1, i ), 1_${ik}$, cs, sn ) end if if( wantz ) then ! apply the transformation to z. call stdlib${ii}$_${ri}$rot( nz, z( iloz, i-1 ), 1_${ik}$, z( iloz, i ), 1_${ik}$, cs, sn ) end if end if ! reset deflation counter kdefl = 0_${ik}$ ! return to start of the main loop with new value of i. i = l - 1_${ik}$ go to 20 160 continue return end subroutine stdlib${ii}$_${ri}$lahqr #:endif #:endfor pure module subroutine stdlib${ii}$_clahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, info & !! CLAHQR is an auxiliary routine called by CHSEQR to update the !! eigenvalues and Schur decomposition already computed by CHSEQR, by !! dealing with the Hessenberg submatrix in rows and columns ILO to !! IHI. ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, n integer(${ik}$), intent(out) :: info logical(lk), intent(in) :: wantt, wantz ! Array Arguments complex(sp), intent(inout) :: h(ldh,*), z(ldz,*) complex(sp), intent(out) :: w(*) ! ========================================================= ! Parameters real(sp), parameter :: dat1 = 3.0_sp/4.0_sp integer(${ik}$), parameter :: kexsh = 10_${ik}$ ! Local Scalars complex(sp) :: cdum, h11, h11s, h22, sc, sum, t, t1, temp, u, v2, x, y real(sp) :: aa, ab, ba, bb, h10, h21, rtemp, s, safmax, safmin, smlnum, sx, t2, tst, & ulp integer(${ik}$) :: i, i1, i2, its, itmax, j, jhi, jlo, k, l, m, nh, nz, kdefl ! Local Arrays complex(sp) :: v(2_${ik}$) ! Statement Functions real(sp) :: cabs1 ! Intrinsic Functions ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) ! Executable Statements info = 0_${ik}$ ! quick return if possible if( n==0 )return if( ilo==ihi ) then w( ilo ) = h( ilo, ilo ) return end if ! ==== clear out the trash ==== do j = ilo, ihi - 3 h( j+2, j ) = czero h( j+3, j ) = czero end do if( ilo<=ihi-2 )h( ihi, ihi-2 ) = czero ! ==== ensure that subdiagonal entries are real ==== if( wantt ) then jlo = 1_${ik}$ jhi = n else jlo = ilo jhi = ihi end if do i = ilo + 1, ihi if( aimag( h( i, i-1 ) )/=zero ) then ! ==== the following redundant normalization ! . avoids problems with both gradual and ! . sudden underflow in abs(h(i,i-1)) ==== sc = h( i, i-1 ) / cabs1( h( i, i-1 ) ) sc = conjg( sc ) / abs( sc ) h( i, i-1 ) = abs( h( i, i-1 ) ) call stdlib${ii}$_cscal( jhi-i+1, sc, h( i, i ), ldh ) call stdlib${ii}$_cscal( min( jhi, i+1 )-jlo+1, conjg( sc ), h( jlo, i ),1_${ik}$ ) if( wantz )call stdlib${ii}$_cscal( ihiz-iloz+1, conjg( sc ), z( iloz, i ), 1_${ik}$ ) end if end do nh = ihi - ilo + 1_${ik}$ nz = ihiz - iloz + 1_${ik}$ ! set machine-dependent constants for the stopping criterion. safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_slabad( safmin, safmax ) ulp = stdlib${ii}$_slamch( 'PRECISION' ) smlnum = safmin*( real( nh,KIND=sp) / ulp ) ! i1 and i2 are the indices of the first row and last column of h ! to which transformations must be applied. if eigenvalues only are ! being computed, i1 and i2 are set inside the main loop. if( wantt ) then i1 = 1_${ik}$ i2 = n end if ! itmax is the total number of qr iterations allowed. itmax = 30_${ik}$ * max( 10_${ik}$, nh ) ! kdefl counts the number of iterations since a deflation kdefl = 0_${ik}$ ! the main loop begins here. i is the loop index and decreases from ! ihi to ilo in steps of 1. each iteration of the loop works ! with the active submatrix in rows and columns l to i. ! eigenvalues i+1 to ihi have already converged. either l = ilo, or ! h(l,l-1) is negligible so that the matrix splits. i = ihi 30 continue if( i<ilo )go to 150 ! perform qr iterations on rows and columns ilo to i until a ! submatrix of order 1 splits off at the bottom because a ! subdiagonal element has become negligible. l = ilo loop_130: do its = 0, itmax ! look for a single small subdiagonal element. do k = i, l + 1, -1 if( cabs1( h( k, k-1 ) )<=smlnum )go to 50 tst = cabs1( h( k-1, k-1 ) ) + cabs1( h( k, k ) ) if( tst==czero ) then if( k-2>=ilo )tst = tst + abs( real( h( k-1, k-2 ),KIND=sp) ) if( k+1<=ihi )tst = tst + abs( real( h( k+1, k ),KIND=sp) ) end if ! ==== the following is a conservative small subdiagonal ! . deflation criterion due to ahues ! . 1997). it has better mathematical foundation and ! . improves accuracy in some examples. ==== if( abs( real( h( k, k-1 ),KIND=sp) )<=ulp*tst ) then ab = max( cabs1( h( k, k-1 ) ), cabs1( h( k-1, k ) ) ) ba = min( cabs1( h( k, k-1 ) ), cabs1( h( k-1, k ) ) ) aa = max( cabs1( h( k, k ) ),cabs1( h( k-1, k-1 )-h( k, k ) ) ) bb = min( cabs1( h( k, k ) ),cabs1( h( k-1, k-1 )-h( k, k ) ) ) s = aa + ab if( ba*( ab / s )<=max( smlnum,ulp*( bb*( aa / s ) ) ) )go to 50 end if end do 50 continue l = k if( l>ilo ) then ! h(l,l-1) is negligible h( l, l-1 ) = czero end if ! exit from loop if a submatrix of order 1 has split off. if( l>=i )go to 140 kdefl = kdefl + 1_${ik}$ ! now the active submatrix is in rows and columns l to i. if ! eigenvalues only are being computed, only the active submatrix ! need be transformed. if( .not.wantt ) then i1 = l i2 = i end if if( mod(kdefl,2_${ik}$*kexsh)==0_${ik}$ ) then ! exceptional shift. s = dat1*abs( real( h( i, i-1 ),KIND=sp) ) t = s + h( i, i ) else if( mod(kdefl,kexsh)==0_${ik}$ ) then ! exceptional shift. s = dat1*abs( real( h( l+1, l ),KIND=sp) ) t = s + h( l, l ) else ! wilkinson's shift. t = h( i, i ) u = sqrt( h( i-1, i ) )*sqrt( h( i, i-1 ) ) s = cabs1( u ) if( s/=zero ) then x = half*( h( i-1, i-1 )-t ) sx = cabs1( x ) s = max( s, cabs1( x ) ) y = s*sqrt( ( x / s )**2_${ik}$+( u / s )**2_${ik}$ ) if( sx>zero ) then if( real( x / sx,KIND=sp)*real( y,KIND=sp)+aimag( x / sx )*aimag( y )& <zero )y = -y end if t = t - u*stdlib${ii}$_cladiv( u, ( x+y ) ) end if end if ! look for two consecutive small subdiagonal elements. do m = i - 1, l + 1, -1 ! determine the effect of starting the single-shift qr ! iteration at row m, and see if this would make h(m,m-1) ! negligible. h11 = h( m, m ) h22 = h( m+1, m+1 ) h11s = h11 - t h21 = real( h( m+1, m ),KIND=sp) s = cabs1( h11s ) + abs( h21 ) h11s = h11s / s h21 = h21 / s v( 1_${ik}$ ) = h11s v( 2_${ik}$ ) = h21 h10 = real( h( m, m-1 ),KIND=sp) if( abs( h10 )*abs( h21 )<=ulp*( cabs1( h11s )*( cabs1( h11 )+cabs1( h22 ) ) ) )& go to 70 end do h11 = h( l, l ) h22 = h( l+1, l+1 ) h11s = h11 - t h21 = real( h( l+1, l ),KIND=sp) s = cabs1( h11s ) + abs( h21 ) h11s = h11s / s h21 = h21 / s v( 1_${ik}$ ) = h11s v( 2_${ik}$ ) = h21 70 continue ! single-shift qr step loop_120: do k = m, i - 1 ! the first iteration of this loop determines a reflection g ! from the vector v and applies it from left and right to h, ! thus creating a nonzero bulge below the subdiagonal. ! each subsequent iteration determines a reflection g to ! restore the hessenberg form in the (k-1)th column, and thus ! chases the bulge cone step toward the bottom of the active ! submatrix. ! v(2) is always real before the call to stdlib${ii}$_clarfg, and hence ! after the call t2 ( = t1*v(2) ) is also real. if( k>m )call stdlib${ii}$_ccopy( 2_${ik}$, h( k, k-1 ), 1_${ik}$, v, 1_${ik}$ ) call stdlib${ii}$_clarfg( 2_${ik}$, v( 1_${ik}$ ), v( 2_${ik}$ ), 1_${ik}$, t1 ) if( k>m ) then h( k, k-1 ) = v( 1_${ik}$ ) h( k+1, k-1 ) = czero end if v2 = v( 2_${ik}$ ) t2 = real( t1*v2,KIND=sp) ! apply g from the left to transform the rows of the matrix ! in columns k to i2. do j = k, i2 sum = conjg( t1 )*h( k, j ) + t2*h( k+1, j ) h( k, j ) = h( k, j ) - sum h( k+1, j ) = h( k+1, j ) - sum*v2 end do ! apply g from the right to transform the columns of the ! matrix in rows i1 to min(k+2,i). do j = i1, min( k+2, i ) sum = t1*h( j, k ) + t2*h( j, k+1 ) h( j, k ) = h( j, k ) - sum h( j, k+1 ) = h( j, k+1 ) - sum*conjg( v2 ) end do if( wantz ) then ! accumulate transformations in the matrix z do j = iloz, ihiz sum = t1*z( j, k ) + t2*z( j, k+1 ) z( j, k ) = z( j, k ) - sum z( j, k+1 ) = z( j, k+1 ) - sum*conjg( v2 ) end do end if if( k==m .and. m>l ) then ! if the qr step was started at row m > l because two ! consecutive small subdiagonals were found, then extra ! scaling must be performed to ensure that h(m,m-1) remains ! real. temp = cone - t1 temp = temp / abs( temp ) h( m+1, m ) = h( m+1, m )*conjg( temp ) if( m+2<=i )h( m+2, m+1 ) = h( m+2, m+1 )*temp do j = m, i if( j/=m+1 ) then if( i2>j )call stdlib${ii}$_cscal( i2-j, temp, h( j, j+1 ), ldh ) call stdlib${ii}$_cscal( j-i1, conjg( temp ), h( i1, j ), 1_${ik}$ ) if( wantz ) then call stdlib${ii}$_cscal( nz, conjg( temp ), z( iloz, j ), 1_${ik}$ ) end if end if end do end if end do loop_120 ! ensure that h(i,i-1) is real. temp = h( i, i-1 ) if( aimag( temp )/=zero ) then rtemp = abs( temp ) h( i, i-1 ) = rtemp temp = temp / rtemp if( i2>i )call stdlib${ii}$_cscal( i2-i, conjg( temp ), h( i, i+1 ), ldh ) call stdlib${ii}$_cscal( i-i1, temp, h( i1, i ), 1_${ik}$ ) if( wantz ) then call stdlib${ii}$_cscal( nz, temp, z( iloz, i ), 1_${ik}$ ) end if end if end do loop_130 ! failure to converge in remaining number of iterations info = i return 140 continue ! h(i,i-1) is negligible: cone eigenvalue has converged. w( i ) = h( i, i ) ! reset deflation counter kdefl = 0_${ik}$ ! return to start of the main loop with new value of i. i = l - 1_${ik}$ go to 30 150 continue return end subroutine stdlib${ii}$_clahqr pure module subroutine stdlib${ii}$_zlahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, info & !! ZLAHQR is an auxiliary routine called by CHSEQR to update the !! eigenvalues and Schur decomposition already computed by CHSEQR, by !! dealing with the Hessenberg submatrix in rows and columns ILO to !! IHI. ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, n integer(${ik}$), intent(out) :: info logical(lk), intent(in) :: wantt, wantz ! Array Arguments complex(dp), intent(inout) :: h(ldh,*), z(ldz,*) complex(dp), intent(out) :: w(*) ! ========================================================= ! Parameters real(dp), parameter :: dat1 = 3.0_dp/4.0_dp integer(${ik}$), parameter :: kexsh = 10_${ik}$ ! Local Scalars complex(dp) :: cdum, h11, h11s, h22, sc, sum, t, t1, temp, u, v2, x, y real(dp) :: aa, ab, ba, bb, h10, h21, rtemp, s, safmax, safmin, smlnum, sx, t2, tst, & ulp integer(${ik}$) :: i, i1, i2, its, itmax, j, jhi, jlo, k, l, m, nh, nz, kdefl ! Local Arrays complex(dp) :: v(2_${ik}$) ! Statement Functions real(dp) :: cabs1 ! Intrinsic Functions ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) ! Executable Statements info = 0_${ik}$ ! quick return if possible if( n==0 )return if( ilo==ihi ) then w( ilo ) = h( ilo, ilo ) return end if ! ==== clear out the trash ==== do j = ilo, ihi - 3 h( j+2, j ) = czero h( j+3, j ) = czero end do if( ilo<=ihi-2 )h( ihi, ihi-2 ) = czero ! ==== ensure that subdiagonal entries are real ==== if( wantt ) then jlo = 1_${ik}$ jhi = n else jlo = ilo jhi = ihi end if do i = ilo + 1, ihi if( aimag( h( i, i-1 ) )/=zero ) then ! ==== the following redundant normalization ! . avoids problems with both gradual and ! . sudden underflow in abs(h(i,i-1)) ==== sc = h( i, i-1 ) / cabs1( h( i, i-1 ) ) sc = conjg( sc ) / abs( sc ) h( i, i-1 ) = abs( h( i, i-1 ) ) call stdlib${ii}$_zscal( jhi-i+1, sc, h( i, i ), ldh ) call stdlib${ii}$_zscal( min( jhi, i+1 )-jlo+1, conjg( sc ),h( jlo, i ), 1_${ik}$ ) if( wantz )call stdlib${ii}$_zscal( ihiz-iloz+1, conjg( sc ), z( iloz, i ), 1_${ik}$ ) end if end do nh = ihi - ilo + 1_${ik}$ nz = ihiz - iloz + 1_${ik}$ ! set machine-dependent constants for the stopping criterion. safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_dlabad( safmin, safmax ) ulp = stdlib${ii}$_dlamch( 'PRECISION' ) smlnum = safmin*( real( nh,KIND=dp) / ulp ) ! i1 and i2 are the indices of the first row and last column of h ! to which transformations must be applied. if eigenvalues only are ! being computed, i1 and i2 are set inside the main loop. if( wantt ) then i1 = 1_${ik}$ i2 = n end if ! itmax is the total number of qr iterations allowed. itmax = 30_${ik}$ * max( 10_${ik}$, nh ) ! kdefl counts the number of iterations since a deflation kdefl = 0_${ik}$ ! the main loop begins here. i is the loop index and decreases from ! ihi to ilo in steps of 1. each iteration of the loop works ! with the active submatrix in rows and columns l to i. ! eigenvalues i+1 to ihi have already converged. either l = ilo, or ! h(l,l-1) is negligible so that the matrix splits. i = ihi 30 continue if( i<ilo )go to 150 ! perform qr iterations on rows and columns ilo to i until a ! submatrix of order 1 splits off at the bottom because a ! subdiagonal element has become negligible. l = ilo loop_130: do its = 0, itmax ! look for a single small subdiagonal element. do k = i, l + 1, -1 if( cabs1( h( k, k-1 ) )<=smlnum )go to 50 tst = cabs1( h( k-1, k-1 ) ) + cabs1( h( k, k ) ) if( tst==czero ) then if( k-2>=ilo )tst = tst + abs( real( h( k-1, k-2 ),KIND=dp) ) if( k+1<=ihi )tst = tst + abs( real( h( k+1, k ),KIND=dp) ) end if ! ==== the following is a conservative small subdiagonal ! . deflation criterion due to ahues ! . 1997). it has better mathematical foundation and ! . improves accuracy in some examples. ==== if( abs( real( h( k, k-1 ),KIND=dp) )<=ulp*tst ) then ab = max( cabs1( h( k, k-1 ) ), cabs1( h( k-1, k ) ) ) ba = min( cabs1( h( k, k-1 ) ), cabs1( h( k-1, k ) ) ) aa = max( cabs1( h( k, k ) ),cabs1( h( k-1, k-1 )-h( k, k ) ) ) bb = min( cabs1( h( k, k ) ),cabs1( h( k-1, k-1 )-h( k, k ) ) ) s = aa + ab if( ba*( ab / s )<=max( smlnum,ulp*( bb*( aa / s ) ) ) )go to 50 end if end do 50 continue l = k if( l>ilo ) then ! h(l,l-1) is negligible h( l, l-1 ) = czero end if ! exit from loop if a submatrix of order 1 has split off. if( l>=i )go to 140 kdefl = kdefl + 1_${ik}$ ! now the active submatrix is in rows and columns l to i. if ! eigenvalues only are being computed, only the active submatrix ! need be transformed. if( .not.wantt ) then i1 = l i2 = i end if if( mod(kdefl,2_${ik}$*kexsh)==0_${ik}$ ) then ! exceptional shift. s = dat1*abs( real( h( i, i-1 ),KIND=dp) ) t = s + h( i, i ) else if( mod(kdefl,kexsh)==0_${ik}$ ) then ! exceptional shift. s = dat1*abs( real( h( l+1, l ),KIND=dp) ) t = s + h( l, l ) else ! wilkinson's shift. t = h( i, i ) u = sqrt( h( i-1, i ) )*sqrt( h( i, i-1 ) ) s = cabs1( u ) if( s/=zero ) then x = half*( h( i-1, i-1 )-t ) sx = cabs1( x ) s = max( s, cabs1( x ) ) y = s*sqrt( ( x / s )**2_${ik}$+( u / s )**2_${ik}$ ) if( sx>zero ) then if( real( x / sx,KIND=dp)*real( y,KIND=dp)+aimag( x / sx )*aimag( y )& <zero )y = -y end if t = t - u*stdlib${ii}$_zladiv( u, ( x+y ) ) end if end if ! look for two consecutive small subdiagonal elements. do m = i - 1, l + 1, -1 ! determine the effect of starting the single-shift qr ! iteration at row m, and see if this would make h(m,m-1) ! negligible. h11 = h( m, m ) h22 = h( m+1, m+1 ) h11s = h11 - t h21 = real( h( m+1, m ),KIND=dp) s = cabs1( h11s ) + abs( h21 ) h11s = h11s / s h21 = h21 / s v( 1_${ik}$ ) = h11s v( 2_${ik}$ ) = h21 h10 = real( h( m, m-1 ),KIND=dp) if( abs( h10 )*abs( h21 )<=ulp*( cabs1( h11s )*( cabs1( h11 )+cabs1( h22 ) ) ) )& go to 70 end do h11 = h( l, l ) h22 = h( l+1, l+1 ) h11s = h11 - t h21 = real( h( l+1, l ),KIND=dp) s = cabs1( h11s ) + abs( h21 ) h11s = h11s / s h21 = h21 / s v( 1_${ik}$ ) = h11s v( 2_${ik}$ ) = h21 70 continue ! single-shift qr step loop_120: do k = m, i - 1 ! the first iteration of this loop determines a reflection g ! from the vector v and applies it from left and right to h, ! thus creating a nonzero bulge below the subdiagonal. ! each subsequent iteration determines a reflection g to ! restore the hessenberg form in the (k-1)th column, and thus ! chases the bulge cone step toward the bottom of the active ! submatrix. ! v(2) is always real before the call to stdlib${ii}$_zlarfg, and hence ! after the call t2 ( = t1*v(2) ) is also real. if( k>m )call stdlib${ii}$_zcopy( 2_${ik}$, h( k, k-1 ), 1_${ik}$, v, 1_${ik}$ ) call stdlib${ii}$_zlarfg( 2_${ik}$, v( 1_${ik}$ ), v( 2_${ik}$ ), 1_${ik}$, t1 ) if( k>m ) then h( k, k-1 ) = v( 1_${ik}$ ) h( k+1, k-1 ) = czero end if v2 = v( 2_${ik}$ ) t2 = real( t1*v2,KIND=dp) ! apply g from the left to transform the rows of the matrix ! in columns k to i2. do j = k, i2 sum = conjg( t1 )*h( k, j ) + t2*h( k+1, j ) h( k, j ) = h( k, j ) - sum h( k+1, j ) = h( k+1, j ) - sum*v2 end do ! apply g from the right to transform the columns of the ! matrix in rows i1 to min(k+2,i). do j = i1, min( k+2, i ) sum = t1*h( j, k ) + t2*h( j, k+1 ) h( j, k ) = h( j, k ) - sum h( j, k+1 ) = h( j, k+1 ) - sum*conjg( v2 ) end do if( wantz ) then ! accumulate transformations in the matrix z do j = iloz, ihiz sum = t1*z( j, k ) + t2*z( j, k+1 ) z( j, k ) = z( j, k ) - sum z( j, k+1 ) = z( j, k+1 ) - sum*conjg( v2 ) end do end if if( k==m .and. m>l ) then ! if the qr step was started at row m > l because two ! consecutive small subdiagonals were found, then extra ! scaling must be performed to ensure that h(m,m-1) remains ! real. temp = cone - t1 temp = temp / abs( temp ) h( m+1, m ) = h( m+1, m )*conjg( temp ) if( m+2<=i )h( m+2, m+1 ) = h( m+2, m+1 )*temp do j = m, i if( j/=m+1 ) then if( i2>j )call stdlib${ii}$_zscal( i2-j, temp, h( j, j+1 ), ldh ) call stdlib${ii}$_zscal( j-i1, conjg( temp ), h( i1, j ), 1_${ik}$ ) if( wantz ) then call stdlib${ii}$_zscal( nz, conjg( temp ), z( iloz, j ),1_${ik}$ ) end if end if end do end if end do loop_120 ! ensure that h(i,i-1) is real. temp = h( i, i-1 ) if( aimag( temp )/=zero ) then rtemp = abs( temp ) h( i, i-1 ) = rtemp temp = temp / rtemp if( i2>i )call stdlib${ii}$_zscal( i2-i, conjg( temp ), h( i, i+1 ), ldh ) call stdlib${ii}$_zscal( i-i1, temp, h( i1, i ), 1_${ik}$ ) if( wantz ) then call stdlib${ii}$_zscal( nz, temp, z( iloz, i ), 1_${ik}$ ) end if end if end do loop_130 ! failure to converge in remaining number of iterations info = i return 140 continue ! h(i,i-1) is negligible: cone eigenvalue has converged. w( i ) = h( i, i ) ! reset deflation counter kdefl = 0_${ik}$ ! return to start of the main loop with new value of i. i = l - 1_${ik}$ go to 30 150 continue return end subroutine stdlib${ii}$_zlahqr #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$lahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, info & !! ZLAHQR: is an auxiliary routine called by CHSEQR to update the !! eigenvalues and Schur decomposition already computed by CHSEQR, by !! dealing with the Hessenberg submatrix in rows and columns ILO to !! IHI. ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, n integer(${ik}$), intent(out) :: info logical(lk), intent(in) :: wantt, wantz ! Array Arguments complex(${ck}$), intent(inout) :: h(ldh,*), z(ldz,*) complex(${ck}$), intent(out) :: w(*) ! ========================================================= ! Parameters real(${ck}$), parameter :: dat1 = 3.0_${ck}$/4.0_${ck}$ integer(${ik}$), parameter :: kexsh = 10_${ik}$ ! Local Scalars complex(${ck}$) :: cdum, h11, h11s, h22, sc, sum, t, t1, temp, u, v2, x, y real(${ck}$) :: aa, ab, ba, bb, h10, h21, rtemp, s, safmax, safmin, smlnum, sx, t2, tst, & ulp integer(${ik}$) :: i, i1, i2, its, itmax, j, jhi, jlo, k, l, m, nh, nz, kdefl ! Local Arrays complex(${ck}$) :: v(2_${ik}$) ! Statement Functions real(${ck}$) :: cabs1 ! Intrinsic Functions ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=${ck}$) ) + abs( aimag( cdum ) ) ! Executable Statements info = 0_${ik}$ ! quick return if possible if( n==0 )return if( ilo==ihi ) then w( ilo ) = h( ilo, ilo ) return end if ! ==== clear out the trash ==== do j = ilo, ihi - 3 h( j+2, j ) = czero h( j+3, j ) = czero end do if( ilo<=ihi-2 )h( ihi, ihi-2 ) = czero ! ==== ensure that subdiagonal entries are real ==== if( wantt ) then jlo = 1_${ik}$ jhi = n else jlo = ilo jhi = ihi end if do i = ilo + 1, ihi if( aimag( h( i, i-1 ) )/=zero ) then ! ==== the following redundant normalization ! . avoids problems with both gradual and ! . sudden underflow in abs(h(i,i-1)) ==== sc = h( i, i-1 ) / cabs1( h( i, i-1 ) ) sc = conjg( sc ) / abs( sc ) h( i, i-1 ) = abs( h( i, i-1 ) ) call stdlib${ii}$_${ci}$scal( jhi-i+1, sc, h( i, i ), ldh ) call stdlib${ii}$_${ci}$scal( min( jhi, i+1 )-jlo+1, conjg( sc ),h( jlo, i ), 1_${ik}$ ) if( wantz )call stdlib${ii}$_${ci}$scal( ihiz-iloz+1, conjg( sc ), z( iloz, i ), 1_${ik}$ ) end if end do nh = ihi - ilo + 1_${ik}$ nz = ihiz - iloz + 1_${ik}$ ! set machine-dependent constants for the stopping criterion. safmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_${c2ri(ci)}$labad( safmin, safmax ) ulp = stdlib${ii}$_${c2ri(ci)}$lamch( 'PRECISION' ) smlnum = safmin*( real( nh,KIND=${ck}$) / ulp ) ! i1 and i2 are the indices of the first row and last column of h ! to which transformations must be applied. if eigenvalues only are ! being computed, i1 and i2 are set inside the main loop. if( wantt ) then i1 = 1_${ik}$ i2 = n end if ! itmax is the total number of qr iterations allowed. itmax = 30_${ik}$ * max( 10_${ik}$, nh ) ! kdefl counts the number of iterations since a deflation kdefl = 0_${ik}$ ! the main loop begins here. i is the loop index and decreases from ! ihi to ilo in steps of 1. each iteration of the loop works ! with the active submatrix in rows and columns l to i. ! eigenvalues i+1 to ihi have already converged. either l = ilo, or ! h(l,l-1) is negligible so that the matrix splits. i = ihi 30 continue if( i<ilo )go to 150 ! perform qr iterations on rows and columns ilo to i until a ! submatrix of order 1 splits off at the bottom because a ! subdiagonal element has become negligible. l = ilo loop_130: do its = 0, itmax ! look for a single small subdiagonal element. do k = i, l + 1, -1 if( cabs1( h( k, k-1 ) )<=smlnum )go to 50 tst = cabs1( h( k-1, k-1 ) ) + cabs1( h( k, k ) ) if( tst==czero ) then if( k-2>=ilo )tst = tst + abs( real( h( k-1, k-2 ),KIND=${ck}$) ) if( k+1<=ihi )tst = tst + abs( real( h( k+1, k ),KIND=${ck}$) ) end if ! ==== the following is a conservative small subdiagonal ! . deflation criterion due to ahues ! . 1997). it has better mathematical foundation and ! . improves accuracy in some examples. ==== if( abs( real( h( k, k-1 ),KIND=${ck}$) )<=ulp*tst ) then ab = max( cabs1( h( k, k-1 ) ), cabs1( h( k-1, k ) ) ) ba = min( cabs1( h( k, k-1 ) ), cabs1( h( k-1, k ) ) ) aa = max( cabs1( h( k, k ) ),cabs1( h( k-1, k-1 )-h( k, k ) ) ) bb = min( cabs1( h( k, k ) ),cabs1( h( k-1, k-1 )-h( k, k ) ) ) s = aa + ab if( ba*( ab / s )<=max( smlnum,ulp*( bb*( aa / s ) ) ) )go to 50 end if end do 50 continue l = k if( l>ilo ) then ! h(l,l-1) is negligible h( l, l-1 ) = czero end if ! exit from loop if a submatrix of order 1 has split off. if( l>=i )go to 140 kdefl = kdefl + 1_${ik}$ ! now the active submatrix is in rows and columns l to i. if ! eigenvalues only are being computed, only the active submatrix ! need be transformed. if( .not.wantt ) then i1 = l i2 = i end if if( mod(kdefl,2_${ik}$*kexsh)==0_${ik}$ ) then ! exceptional shift. s = dat1*abs( real( h( i, i-1 ),KIND=${ck}$) ) t = s + h( i, i ) else if( mod(kdefl,kexsh)==0_${ik}$ ) then ! exceptional shift. s = dat1*abs( real( h( l+1, l ),KIND=${ck}$) ) t = s + h( l, l ) else ! wilkinson's shift. t = h( i, i ) u = sqrt( h( i-1, i ) )*sqrt( h( i, i-1 ) ) s = cabs1( u ) if( s/=zero ) then x = half*( h( i-1, i-1 )-t ) sx = cabs1( x ) s = max( s, cabs1( x ) ) y = s*sqrt( ( x / s )**2_${ik}$+( u / s )**2_${ik}$ ) if( sx>zero ) then if( real( x / sx,KIND=${ck}$)*real( y,KIND=${ck}$)+aimag( x / sx )*aimag( y )& <zero )y = -y end if t = t - u*stdlib${ii}$_${ci}$ladiv( u, ( x+y ) ) end if end if ! look for two consecutive small subdiagonal elements. do m = i - 1, l + 1, -1 ! determine the effect of starting the single-shift qr ! iteration at row m, and see if this would make h(m,m-1) ! negligible. h11 = h( m, m ) h22 = h( m+1, m+1 ) h11s = h11 - t h21 = real( h( m+1, m ),KIND=${ck}$) s = cabs1( h11s ) + abs( h21 ) h11s = h11s / s h21 = h21 / s v( 1_${ik}$ ) = h11s v( 2_${ik}$ ) = h21 h10 = real( h( m, m-1 ),KIND=${ck}$) if( abs( h10 )*abs( h21 )<=ulp*( cabs1( h11s )*( cabs1( h11 )+cabs1( h22 ) ) ) )& go to 70 end do h11 = h( l, l ) h22 = h( l+1, l+1 ) h11s = h11 - t h21 = real( h( l+1, l ),KIND=${ck}$) s = cabs1( h11s ) + abs( h21 ) h11s = h11s / s h21 = h21 / s v( 1_${ik}$ ) = h11s v( 2_${ik}$ ) = h21 70 continue ! single-shift qr step loop_120: do k = m, i - 1 ! the first iteration of this loop determines a reflection g ! from the vector v and applies it from left and right to h, ! thus creating a nonzero bulge below the subdiagonal. ! each subsequent iteration determines a reflection g to ! restore the hessenberg form in the (k-1)th column, and thus ! chases the bulge cone step toward the bottom of the active ! submatrix. ! v(2) is always real before the call to stdlib${ii}$_${ci}$larfg, and hence ! after the call t2 ( = t1*v(2) ) is also real. if( k>m )call stdlib${ii}$_${ci}$copy( 2_${ik}$, h( k, k-1 ), 1_${ik}$, v, 1_${ik}$ ) call stdlib${ii}$_${ci}$larfg( 2_${ik}$, v( 1_${ik}$ ), v( 2_${ik}$ ), 1_${ik}$, t1 ) if( k>m ) then h( k, k-1 ) = v( 1_${ik}$ ) h( k+1, k-1 ) = czero end if v2 = v( 2_${ik}$ ) t2 = real( t1*v2,KIND=${ck}$) ! apply g from the left to transform the rows of the matrix ! in columns k to i2. do j = k, i2 sum = conjg( t1 )*h( k, j ) + t2*h( k+1, j ) h( k, j ) = h( k, j ) - sum h( k+1, j ) = h( k+1, j ) - sum*v2 end do ! apply g from the right to transform the columns of the ! matrix in rows i1 to min(k+2,i). do j = i1, min( k+2, i ) sum = t1*h( j, k ) + t2*h( j, k+1 ) h( j, k ) = h( j, k ) - sum h( j, k+1 ) = h( j, k+1 ) - sum*conjg( v2 ) end do if( wantz ) then ! accumulate transformations in the matrix z do j = iloz, ihiz sum = t1*z( j, k ) + t2*z( j, k+1 ) z( j, k ) = z( j, k ) - sum z( j, k+1 ) = z( j, k+1 ) - sum*conjg( v2 ) end do end if if( k==m .and. m>l ) then ! if the qr step was started at row m > l because two ! consecutive small subdiagonals were found, then extra ! scaling must be performed to ensure that h(m,m-1) remains ! real. temp = cone - t1 temp = temp / abs( temp ) h( m+1, m ) = h( m+1, m )*conjg( temp ) if( m+2<=i )h( m+2, m+1 ) = h( m+2, m+1 )*temp do j = m, i if( j/=m+1 ) then if( i2>j )call stdlib${ii}$_${ci}$scal( i2-j, temp, h( j, j+1 ), ldh ) call stdlib${ii}$_${ci}$scal( j-i1, conjg( temp ), h( i1, j ), 1_${ik}$ ) if( wantz ) then call stdlib${ii}$_${ci}$scal( nz, conjg( temp ), z( iloz, j ),1_${ik}$ ) end if end if end do end if end do loop_120 ! ensure that h(i,i-1) is real. temp = h( i, i-1 ) if( aimag( temp )/=zero ) then rtemp = abs( temp ) h( i, i-1 ) = rtemp temp = temp / rtemp if( i2>i )call stdlib${ii}$_${ci}$scal( i2-i, conjg( temp ), h( i, i+1 ), ldh ) call stdlib${ii}$_${ci}$scal( i-i1, temp, h( i1, i ), 1_${ik}$ ) if( wantz ) then call stdlib${ii}$_${ci}$scal( nz, temp, z( iloz, i ), 1_${ik}$ ) end if end if end do loop_130 ! failure to converge in remaining number of iterations info = i return 140 continue ! h(i,i-1) is negligible: cone eigenvalue has converged. w( i ) = h( i, i ) ! reset deflation counter kdefl = 0_${ik}$ ! return to start of the main loop with new value of i. i = l - 1_${ik}$ go to 30 150 continue return end subroutine stdlib${ii}$_${ci}$lahqr #:endif #:endfor module subroutine stdlib${ii}$_slaqr0( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, work,& !! SLAQR0 computes the eigenvalues of a Hessenberg matrix H !! and, optionally, the matrices T and Z from the Schur decomposition !! H = Z T Z**T, where T is an upper quasi-triangular matrix (the !! Schur form), and Z is the orthogonal matrix of Schur vectors. !! Optionally Z may be postmultiplied into an input orthogonal !! matrix Q so that this routine can give the Schur factorization !! of a matrix A which has been reduced to the Hessenberg form H !! by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. lwork, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n integer(${ik}$), intent(out) :: info logical(lk), intent(in) :: wantt, wantz ! Array Arguments real(sp), intent(inout) :: h(ldh,*), z(ldz,*) real(sp), intent(out) :: wi(*), work(*), wr(*) ! ================================================================ ! Parameters integer(${ik}$), parameter :: ntiny = 15_${ik}$ integer(${ik}$), parameter :: kexnw = 5_${ik}$ integer(${ik}$), parameter :: kexsh = 6_${ik}$ real(sp), parameter :: wilk1 = 0.75_sp real(sp), parameter :: wilk2 = -0.4375_sp ! ==== matrices of order ntiny or smaller must be processed by ! . stdlib${ii}$_slahqr because of insufficient subdiagonal scratch space. ! . (this is a hard limit.) ==== ! ==== exceptional deflation windows: try to cure rare ! . slow convergence by varying the size of the ! . deflation window after kexnw iterations. ==== ! ==== exceptional shifts: try to cure rare slow convergence ! . with ad-hoc exceptional shifts every kexsh iterations. ! . ==== ! ==== the constants wilk1 and wilk2 are used to form the ! . exceptional shifts. ==== ! Local Scalars real(sp) :: aa, bb, cc, cs, dd, sn, ss, swap integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& nwmax, nwr, nwupbd logical(lk) :: sorted character(len=2_${ik}$) :: jbcmpz ! Local Arrays real(sp) :: zdum(1_${ik}$,1_${ik}$) ! Intrinsic Functions ! Executable Statements info = 0_${ik}$ ! ==== quick return for n = 0: nothing to do. ==== if( n==0_${ik}$ ) then work( 1_${ik}$ ) = one return end if if( n<=ntiny ) then ! ==== tiny matrices must use stdlib_slahqr. ==== lwkopt = 1_${ik}$ if( lwork/=-1_${ik}$ )call stdlib${ii}$_slahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, & ihiz, z, ldz, info ) else ! ==== use small bulge multi-shift qr with aggressive early ! . deflation on larger-than-tiny matrices. ==== ! ==== hope for the best. ==== info = 0_${ik}$ ! ==== set up job flags for stdlib${ii}$_ilaenv. ==== if( wantt ) then jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S' else jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E' end if if( wantz ) then jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V' else jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N' end if ! ==== nwr = recommended deflation window size. at this ! . point, n > ntiny = 15, so there is enough ! . subdiagonal workspace for nwr>=2 as required. ! . (in fact, there is enough subdiagonal space for ! . nwr>=4.) ==== nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'SLAQR0', jbcmpz, n, ilo, ihi, lwork ) nwr = max( 2_${ik}$, nwr ) nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr ) ! ==== nsr = recommended number of simultaneous shifts. ! . at this point n > ntiny = 15, so there is at ! . enough subdiagonal workspace for nsr to be even ! . and greater than or equal to two as required. ==== nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'SLAQR0', jbcmpz, n, ilo, ihi, lwork ) nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo ) nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) ) ! ==== estimate optimal workspace ==== ! ==== workspace query call to stdlib${ii}$_slaqr3 ==== call stdlib${ii}$_slaqr3( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& ld, wr, wi, h, ldh, n, h, ldh,n, h, ldh, work, -1_${ik}$ ) ! ==== optimal workspace = max(stdlib${ii}$_slaqr5, stdlib${ii}$_slaqr3) ==== lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) ) ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = real( lwkopt,KIND=sp) return end if ! ==== stdlib${ii}$_slahqr/stdlib${ii}$_slaqr0 crossover point ==== nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'SLAQR0', jbcmpz, n, ilo, ihi, lwork ) nmin = max( ntiny, nmin ) ! ==== nibble crossover point ==== nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'SLAQR0', jbcmpz, n, ilo, ihi, lwork ) nibble = max( 0_${ik}$, nibble ) ! ==== accumulate reflections during ttswp? use block ! . 2-by-2 structure during matrix-matrix multiply? ==== kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'SLAQR0', jbcmpz, n, ilo, ihi, lwork ) kacc22 = max( 0_${ik}$, kacc22 ) kacc22 = min( 2_${ik}$, kacc22 ) ! ==== nwmax = the largest possible deflation window for ! . which there is sufficient workspace. ==== nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ ) nw = nwmax ! ==== nsmax = the largest number of simultaneous shifts ! . for which there is sufficient workspace. ==== nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ ) nsmax = nsmax - mod( nsmax, 2_${ik}$ ) ! ==== ndfl: an iteration count restarted at deflation. ==== ndfl = 1_${ik}$ ! ==== itmax = iteration limit ==== itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) ) ! ==== last row and column in the active block ==== kbot = ihi ! ==== main loop ==== loop_80: do it = 1, itmax ! ==== done when kbot falls below ilo ==== if( kbot<ilo )go to 90 ! ==== locate active block ==== do k = kbot, ilo + 1, -1 if( h( k, k-1 )==zero )go to 20 end do k = ilo 20 continue ktop = k ! ==== select deflation window size: ! . typical case: ! . if possible and advisable, nibble the entire ! . active block. if not, use size min(nwr,nwmax) ! . or min(nwr+1,nwmax) depending upon which has ! . the smaller corresponding subdiagonal entry ! . (a heuristic). ! . exceptional case: ! . if there have been no deflations in kexnw or ! . more iterations, then vary the deflation window ! . size. at first, because, larger windows are, ! . in general, more powerful than smaller ones, ! . rapidly increase the window to the maximum possible. ! . then, gradually reduce the window size. ==== nh = kbot - ktop + 1_${ik}$ nwupbd = min( nh, nwmax ) if( ndfl<kexnw ) then nw = min( nwupbd, nwr ) else nw = min( nwupbd, 2_${ik}$*nw ) end if if( nw<nwmax ) then if( nw>=nh-1 ) then nw = nh else kwtop = kbot - nw + 1_${ik}$ if( abs( h( kwtop, kwtop-1 ) )>abs( h( kwtop-1, kwtop-2 ) ) )nw = nw + & 1_${ik}$ end if end if if( ndfl<kexnw ) then ndec = -1_${ik}$ else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then ndec = ndec + 1_${ik}$ if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$ nw = nw - ndec end if ! ==== aggressive early deflation: ! . split workspace under the subdiagonal into ! . - an nw-by-nw work array v in the lower ! . left-hand-corner, ! . - an nw-by-at-least-nw-but-more-is-better ! . (nw-by-nho) horizontal work array along ! . the bottom edge, ! . - an at-least-nw-but-more-is-better (nhv-by-nw) ! . vertical work array along the left-hand-edge. ! . ==== kv = n - nw + 1_${ik}$ kt = nw + 1_${ik}$ nho = ( n-nw-1 ) - kt + 1_${ik}$ kwv = nw + 2_${ik}$ nve = ( n-nw ) - kwv + 1_${ik}$ ! ==== aggressive early deflation ==== call stdlib${ii}$_slaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & ls, ld, wr, wi, h( kv, 1_${ik}$ ), ldh,nho, h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,& work, lwork ) ! ==== adjust kbot accounting for new deflations. ==== kbot = kbot - ld ! ==== ks points to the shifts. ==== ks = kbot - ls + 1_${ik}$ ! ==== skip an expensive qr sweep if there is a (partly ! . heuristic) reason to expect that many eigenvalues ! . will deflate without it. here, the qr sweep is ! . skipped if many eigenvalues have just been deflated ! . or if the remaining active block is small. if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& ) ) ) then ! ==== ns = nominal number of simultaneous shifts. ! . this may be lowered (slightly) if stdlib${ii}$_slaqr3 ! . did not provide that many shifts. ==== ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) ) ns = ns - mod( ns, 2_${ik}$ ) ! ==== if there have been no deflations ! . in a multiple of kexsh iterations, ! . then try exceptional shifts. ! . otherwise use shifts provided by ! . stdlib${ii}$_slaqr3 above or from the eigenvalues ! . of a trailing principal submatrix. ==== if( mod( ndfl, kexsh )==0_${ik}$ ) then ks = kbot - ns + 1_${ik}$ do i = kbot, max( ks+1, ktop+2 ), -2 ss = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) aa = wilk1*ss + h( i, i ) bb = ss cc = wilk2*ss dd = aa call stdlib${ii}$_slanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),wr( i ), wi( i & ), cs, sn ) end do if( ks==ktop ) then wr( ks+1 ) = h( ks+1, ks+1 ) wi( ks+1 ) = zero wr( ks ) = wr( ks+1 ) wi( ks ) = wi( ks+1 ) end if else ! ==== got ns/2 or fewer shifts? use stdlib_slaqr4 or ! . stdlib${ii}$_slahqr on a trailing principal submatrix to ! . get more. (since ns<=nsmax<=(n-3)/6, ! . there is enough space below the subdiagonal ! . to fit an ns-by-ns scratch array.) ==== if( kbot-ks+1<=ns / 2_${ik}$ ) then ks = kbot - ns + 1_${ik}$ kt = n - ns + 1_${ik}$ call stdlib${ii}$_slacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh ) if( ns>nmin ) then call stdlib${ii}$_slaqr4( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, wr( & ks ),wi( ks ), 1_${ik}$, 1_${ik}$, zdum, 1_${ik}$, work,lwork, inf ) else call stdlib${ii}$_slahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, wr( & ks ),wi( ks ), 1_${ik}$, 1_${ik}$, zdum, 1_${ik}$, inf ) end if ks = ks + inf ! ==== in case of a rare qr failure use ! . eigenvalues of the trailing 2-by-2 ! . principal submatrix. ==== if( ks>=kbot ) then aa = h( kbot-1, kbot-1 ) cc = h( kbot, kbot-1 ) bb = h( kbot-1, kbot ) dd = h( kbot, kbot ) call stdlib${ii}$_slanv2( aa, bb, cc, dd, wr( kbot-1 ),wi( kbot-1 ), wr( & kbot ),wi( kbot ), cs, sn ) ks = kbot - 1_${ik}$ end if end if if( kbot-ks+1>ns ) then ! ==== sort the shifts (helps a little) ! . bubble sort keeps complex conjugate ! . pairs together. ==== sorted = .false. do k = kbot, ks + 1, -1 if( sorted )go to 60 sorted = .true. do i = ks, k - 1 if( abs( wr( i ) )+abs( wi( i ) )<abs( wr( i+1 ) )+abs( wi( i+1 ) & ) ) then sorted = .false. swap = wr( i ) wr( i ) = wr( i+1 ) wr( i+1 ) = swap swap = wi( i ) wi( i ) = wi( i+1 ) wi( i+1 ) = swap end if end do end do 60 continue end if ! ==== shuffle shifts into pairs of real shifts ! . and pairs of complex conjugate shifts ! . assuming complex conjugate shifts are ! . already adjacent to one another. (yes, ! . they are.) ==== do i = kbot, ks + 2, -2 if( wi( i )/=-wi( i-1 ) ) then swap = wr( i ) wr( i ) = wr( i-1 ) wr( i-1 ) = wr( i-2 ) wr( i-2 ) = swap swap = wi( i ) wi( i ) = wi( i-1 ) wi( i-1 ) = wi( i-2 ) wi( i-2 ) = swap end if end do end if ! ==== if there are only two shifts and both are ! . real, then use only one. ==== if( kbot-ks+1==2_${ik}$ ) then if( wi( kbot )==zero ) then if( abs( wr( kbot )-h( kbot, kbot ) )<abs( wr( kbot-1 )-h( kbot, kbot ) & ) ) then wr( kbot-1 ) = wr( kbot ) else wr( kbot ) = wr( kbot-1 ) end if end if end if ! ==== use up to ns of the the smallest magnitude ! . shifts. if there aren't ns shifts available, ! . then use them all, possibly dropping one to ! . make the number of shifts even. ==== ns = min( ns, kbot-ks+1 ) ns = ns - mod( ns, 2_${ik}$ ) ks = kbot - ns + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep: ! . split workspace under the subdiagonal into ! . - a kdu-by-kdu work array u in the lower ! . left-hand-corner, ! . - a kdu-by-at-least-kdu-but-more-is-better ! . (kdu-by-nho) horizontal work array wh along ! . the bottom edge, ! . - and an at-least-kdu-but-more-is-better-by-kdu ! . (nve-by-kdu) vertical work wv arrow along ! . the left-hand-edge. ==== kdu = 2_${ik}$*ns ku = n - kdu + 1_${ik}$ kwh = kdu + 1_${ik}$ nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$ kwv = kdu + 4_${ik}$ nve = n - kdu - kwv + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep ==== call stdlib${ii}$_slaqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,wr( ks ), wi( ks )& , h, ldh, iloz, ihiz, z,ldz, work, 3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve,h( kwv, 1_${ik}$ ), ldh, & nho, h( ku, kwh ), ldh ) end if ! ==== note progress (or the lack of it). ==== if( ld>0_${ik}$ ) then ndfl = 1_${ik}$ else ndfl = ndfl + 1_${ik}$ end if ! ==== end of main loop ==== end do loop_80 ! ==== iteration limit exceeded. set info to show where ! . the problem occurred and exit. ==== info = kbot 90 continue end if ! ==== return the optimal value of lwork. ==== work( 1_${ik}$ ) = real( lwkopt,KIND=sp) end subroutine stdlib${ii}$_slaqr0 module subroutine stdlib${ii}$_dlaqr0( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, work,& !! DLAQR0 computes the eigenvalues of a Hessenberg matrix H !! and, optionally, the matrices T and Z from the Schur decomposition !! H = Z T Z**T, where T is an upper quasi-triangular matrix (the !! Schur form), and Z is the orthogonal matrix of Schur vectors. !! Optionally Z may be postmultiplied into an input orthogonal !! matrix Q so that this routine can give the Schur factorization !! of a matrix A which has been reduced to the Hessenberg form H !! by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. lwork, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n integer(${ik}$), intent(out) :: info logical(lk), intent(in) :: wantt, wantz ! Array Arguments real(dp), intent(inout) :: h(ldh,*), z(ldz,*) real(dp), intent(out) :: wi(*), work(*), wr(*) ! ================================================================ ! Parameters integer(${ik}$), parameter :: ntiny = 15_${ik}$ integer(${ik}$), parameter :: kexnw = 5_${ik}$ integer(${ik}$), parameter :: kexsh = 6_${ik}$ real(dp), parameter :: wilk1 = 0.75_dp real(dp), parameter :: wilk2 = -0.4375_dp ! ==== matrices of order ntiny or smaller must be processed by ! . stdlib${ii}$_dlahqr because of insufficient subdiagonal scratch space. ! . (this is a hard limit.) ==== ! ==== exceptional deflation windows: try to cure rare ! . slow convergence by varying the size of the ! . deflation window after kexnw iterations. ==== ! ==== exceptional shifts: try to cure rare slow convergence ! . with ad-hoc exceptional shifts every kexsh iterations. ! . ==== ! ==== the constants wilk1 and wilk2 are used to form the ! . exceptional shifts. ==== ! Local Scalars real(dp) :: aa, bb, cc, cs, dd, sn, ss, swap integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& nwmax, nwr, nwupbd logical(lk) :: sorted character(len=2_${ik}$) :: jbcmpz ! Local Arrays real(dp) :: zdum(1_${ik}$,1_${ik}$) ! Intrinsic Functions ! Executable Statements info = 0_${ik}$ ! ==== quick return for n = 0: nothing to do. ==== if( n==0_${ik}$ ) then work( 1_${ik}$ ) = one return end if if( n<=ntiny ) then ! ==== tiny matrices must use stdlib_dlahqr. ==== lwkopt = 1_${ik}$ if( lwork/=-1_${ik}$ )call stdlib${ii}$_dlahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, & ihiz, z, ldz, info ) else ! ==== use small bulge multi-shift qr with aggressive early ! . deflation on larger-than-tiny matrices. ==== ! ==== hope for the best. ==== info = 0_${ik}$ ! ==== set up job flags for stdlib${ii}$_ilaenv. ==== if( wantt ) then jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S' else jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E' end if if( wantz ) then jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V' else jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N' end if ! ==== nwr = recommended deflation window size. at this ! . point, n > ntiny = 15, so there is enough ! . subdiagonal workspace for nwr>=2 as required. ! . (in fact, there is enough subdiagonal space for ! . nwr>=4.) ==== nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) nwr = max( 2_${ik}$, nwr ) nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr ) ! ==== nsr = recommended number of simultaneous shifts. ! . at this point n > ntiny = 15, so there is at ! . enough subdiagonal workspace for nsr to be even ! . and greater than or equal to two as required. ==== nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo ) nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) ) ! ==== estimate optimal workspace ==== ! ==== workspace query call to stdlib${ii}$_dlaqr3 ==== call stdlib${ii}$_dlaqr3( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& ld, wr, wi, h, ldh, n, h, ldh,n, h, ldh, work, -1_${ik}$ ) ! ==== optimal workspace = max(stdlib${ii}$_dlaqr5, stdlib${ii}$_dlaqr3) ==== lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) ) ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = real( lwkopt,KIND=dp) return end if ! ==== stdlib${ii}$_dlahqr/stdlib${ii}$_dlaqr0 crossover point ==== nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) nmin = max( ntiny, nmin ) ! ==== nibble crossover point ==== nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) nibble = max( 0_${ik}$, nibble ) ! ==== accumulate reflections during ttswp? use block ! . 2-by-2 structure during matrix-matrix multiply? ==== kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) kacc22 = max( 0_${ik}$, kacc22 ) kacc22 = min( 2_${ik}$, kacc22 ) ! ==== nwmax = the largest possible deflation window for ! . which there is sufficient workspace. ==== nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ ) nw = nwmax ! ==== nsmax = the largest number of simultaneous shifts ! . for which there is sufficient workspace. ==== nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ ) nsmax = nsmax - mod( nsmax, 2_${ik}$ ) ! ==== ndfl: an iteration count restarted at deflation. ==== ndfl = 1_${ik}$ ! ==== itmax = iteration limit ==== itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) ) ! ==== last row and column in the active block ==== kbot = ihi ! ==== main loop ==== loop_80: do it = 1, itmax ! ==== done when kbot falls below ilo ==== if( kbot<ilo )go to 90 ! ==== locate active block ==== do k = kbot, ilo + 1, -1 if( h( k, k-1 )==zero )go to 20 end do k = ilo 20 continue ktop = k ! ==== select deflation window size: ! . typical case: ! . if possible and advisable, nibble the entire ! . active block. if not, use size min(nwr,nwmax) ! . or min(nwr+1,nwmax) depending upon which has ! . the smaller corresponding subdiagonal entry ! . (a heuristic). ! . exceptional case: ! . if there have been no deflations in kexnw or ! . more iterations, then vary the deflation window ! . size. at first, because, larger windows are, ! . in general, more powerful than smaller ones, ! . rapidly increase the window to the maximum possible. ! . then, gradually reduce the window size. ==== nh = kbot - ktop + 1_${ik}$ nwupbd = min( nh, nwmax ) if( ndfl<kexnw ) then nw = min( nwupbd, nwr ) else nw = min( nwupbd, 2_${ik}$*nw ) end if if( nw<nwmax ) then if( nw>=nh-1 ) then nw = nh else kwtop = kbot - nw + 1_${ik}$ if( abs( h( kwtop, kwtop-1 ) )>abs( h( kwtop-1, kwtop-2 ) ) )nw = nw + & 1_${ik}$ end if end if if( ndfl<kexnw ) then ndec = -1_${ik}$ else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then ndec = ndec + 1_${ik}$ if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$ nw = nw - ndec end if ! ==== aggressive early deflation: ! . split workspace under the subdiagonal into ! . - an nw-by-nw work array v in the lower ! . left-hand-corner, ! . - an nw-by-at-least-nw-but-more-is-better ! . (nw-by-nho) horizontal work array along ! . the bottom edge, ! . - an at-least-nw-but-more-is-better (nhv-by-nw) ! . vertical work array along the left-hand-edge. ! . ==== kv = n - nw + 1_${ik}$ kt = nw + 1_${ik}$ nho = ( n-nw-1 ) - kt + 1_${ik}$ kwv = nw + 2_${ik}$ nve = ( n-nw ) - kwv + 1_${ik}$ ! ==== aggressive early deflation ==== call stdlib${ii}$_dlaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & ls, ld, wr, wi, h( kv, 1_${ik}$ ), ldh,nho, h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,& work, lwork ) ! ==== adjust kbot accounting for new deflations. ==== kbot = kbot - ld ! ==== ks points to the shifts. ==== ks = kbot - ls + 1_${ik}$ ! ==== skip an expensive qr sweep if there is a (partly ! . heuristic) reason to expect that many eigenvalues ! . will deflate without it. here, the qr sweep is ! . skipped if many eigenvalues have just been deflated ! . or if the remaining active block is small. if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& ) ) ) then ! ==== ns = nominal number of simultaneous shifts. ! . this may be lowered (slightly) if stdlib${ii}$_dlaqr3 ! . did not provide that many shifts. ==== ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) ) ns = ns - mod( ns, 2_${ik}$ ) ! ==== if there have been no deflations ! . in a multiple of kexsh iterations, ! . then try exceptional shifts. ! . otherwise use shifts provided by ! . stdlib${ii}$_dlaqr3 above or from the eigenvalues ! . of a trailing principal submatrix. ==== if( mod( ndfl, kexsh )==0_${ik}$ ) then ks = kbot - ns + 1_${ik}$ do i = kbot, max( ks+1, ktop+2 ), -2 ss = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) aa = wilk1*ss + h( i, i ) bb = ss cc = wilk2*ss dd = aa call stdlib${ii}$_dlanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),wr( i ), wi( i & ), cs, sn ) end do if( ks==ktop ) then wr( ks+1 ) = h( ks+1, ks+1 ) wi( ks+1 ) = zero wr( ks ) = wr( ks+1 ) wi( ks ) = wi( ks+1 ) end if else ! ==== got ns/2 or fewer shifts? use stdlib_dlaqr4 or ! . stdlib${ii}$_dlahqr on a trailing principal submatrix to ! . get more. (since ns<=nsmax<=(n-3)/6, ! . there is enough space below the subdiagonal ! . to fit an ns-by-ns scratch array.) ==== if( kbot-ks+1<=ns / 2_${ik}$ ) then ks = kbot - ns + 1_${ik}$ kt = n - ns + 1_${ik}$ call stdlib${ii}$_dlacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh ) if( ns>nmin ) then call stdlib${ii}$_dlaqr4( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, wr( & ks ),wi( ks ), 1_${ik}$, 1_${ik}$, zdum, 1_${ik}$, work,lwork, inf ) else call stdlib${ii}$_dlahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, wr( & ks ),wi( ks ), 1_${ik}$, 1_${ik}$, zdum, 1_${ik}$, inf ) end if ks = ks + inf ! ==== in case of a rare qr failure use ! . eigenvalues of the trailing 2-by-2 ! . principal submatrix. ==== if( ks>=kbot ) then aa = h( kbot-1, kbot-1 ) cc = h( kbot, kbot-1 ) bb = h( kbot-1, kbot ) dd = h( kbot, kbot ) call stdlib${ii}$_dlanv2( aa, bb, cc, dd, wr( kbot-1 ),wi( kbot-1 ), wr( & kbot ),wi( kbot ), cs, sn ) ks = kbot - 1_${ik}$ end if end if if( kbot-ks+1>ns ) then ! ==== sort the shifts (helps a little) ! . bubble sort keeps complex conjugate ! . pairs together. ==== sorted = .false. do k = kbot, ks + 1, -1 if( sorted )go to 60 sorted = .true. do i = ks, k - 1 if( abs( wr( i ) )+abs( wi( i ) )<abs( wr( i+1 ) )+abs( wi( i+1 ) & ) ) then sorted = .false. swap = wr( i ) wr( i ) = wr( i+1 ) wr( i+1 ) = swap swap = wi( i ) wi( i ) = wi( i+1 ) wi( i+1 ) = swap end if end do end do 60 continue end if ! ==== shuffle shifts into pairs of real shifts ! . and pairs of complex conjugate shifts ! . assuming complex conjugate shifts are ! . already adjacent to one another. (yes, ! . they are.) ==== do i = kbot, ks + 2, -2 if( wi( i )/=-wi( i-1 ) ) then swap = wr( i ) wr( i ) = wr( i-1 ) wr( i-1 ) = wr( i-2 ) wr( i-2 ) = swap swap = wi( i ) wi( i ) = wi( i-1 ) wi( i-1 ) = wi( i-2 ) wi( i-2 ) = swap end if end do end if ! ==== if there are only two shifts and both are ! . real, then use only one. ==== if( kbot-ks+1==2_${ik}$ ) then if( wi( kbot )==zero ) then if( abs( wr( kbot )-h( kbot, kbot ) )<abs( wr( kbot-1 )-h( kbot, kbot ) & ) ) then wr( kbot-1 ) = wr( kbot ) else wr( kbot ) = wr( kbot-1 ) end if end if end if ! ==== use up to ns of the the smallest magnitude ! . shifts. if there aren't ns shifts available, ! . then use them all, possibly dropping one to ! . make the number of shifts even. ==== ns = min( ns, kbot-ks+1 ) ns = ns - mod( ns, 2_${ik}$ ) ks = kbot - ns + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep: ! . split workspace under the subdiagonal into ! . - a kdu-by-kdu work array u in the lower ! . left-hand-corner, ! . - a kdu-by-at-least-kdu-but-more-is-better ! . (kdu-by-nho) horizontal work array wh along ! . the bottom edge, ! . - and an at-least-kdu-but-more-is-better-by-kdu ! . (nve-by-kdu) vertical work wv arrow along ! . the left-hand-edge. ==== kdu = 2_${ik}$*ns ku = n - kdu + 1_${ik}$ kwh = kdu + 1_${ik}$ nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$ kwv = kdu + 4_${ik}$ nve = n - kdu - kwv + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep ==== call stdlib${ii}$_dlaqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,wr( ks ), wi( ks )& , h, ldh, iloz, ihiz, z,ldz, work, 3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve,h( kwv, 1_${ik}$ ), ldh, & nho, h( ku, kwh ), ldh ) end if ! ==== note progress (or the lack of it). ==== if( ld>0_${ik}$ ) then ndfl = 1_${ik}$ else ndfl = ndfl + 1_${ik}$ end if ! ==== end of main loop ==== end do loop_80 ! ==== iteration limit exceeded. set info to show where ! . the problem occurred and exit. ==== info = kbot 90 continue end if ! ==== return the optimal value of lwork. ==== work( 1_${ik}$ ) = real( lwkopt,KIND=dp) end subroutine stdlib${ii}$_dlaqr0 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] module subroutine stdlib${ii}$_${ri}$laqr0( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, work,& !! DLAQR0: computes the eigenvalues of a Hessenberg matrix H !! and, optionally, the matrices T and Z from the Schur decomposition !! H = Z T Z**T, where T is an upper quasi-triangular matrix (the !! Schur form), and Z is the orthogonal matrix of Schur vectors. !! Optionally Z may be postmultiplied into an input orthogonal !! matrix Q so that this routine can give the Schur factorization !! of a matrix A which has been reduced to the Hessenberg form H !! by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. lwork, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n integer(${ik}$), intent(out) :: info logical(lk), intent(in) :: wantt, wantz ! Array Arguments real(${rk}$), intent(inout) :: h(ldh,*), z(ldz,*) real(${rk}$), intent(out) :: wi(*), work(*), wr(*) ! ================================================================ ! Parameters integer(${ik}$), parameter :: ntiny = 15_${ik}$ integer(${ik}$), parameter :: kexnw = 5_${ik}$ integer(${ik}$), parameter :: kexsh = 6_${ik}$ real(${rk}$), parameter :: wilk1 = 0.75_${rk}$ real(${rk}$), parameter :: wilk2 = -0.4375_${rk}$ ! ==== matrices of order ntiny or smaller must be processed by ! . stdlib${ii}$_${ri}$lahqr because of insufficient subdiagonal scratch space. ! . (this is a hard limit.) ==== ! ==== exceptional deflation windows: try to cure rare ! . slow convergence by varying the size of the ! . deflation window after kexnw iterations. ==== ! ==== exceptional shifts: try to cure rare slow convergence ! . with ad-hoc exceptional shifts every kexsh iterations. ! . ==== ! ==== the constants wilk1 and wilk2 are used to form the ! . exceptional shifts. ==== ! Local Scalars real(${rk}$) :: aa, bb, cc, cs, dd, sn, ss, swap integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& nwmax, nwr, nwupbd logical(lk) :: sorted character(len=2_${ik}$) :: jbcmpz ! Local Arrays real(${rk}$) :: zdum(1_${ik}$,1_${ik}$) ! Intrinsic Functions ! Executable Statements info = 0_${ik}$ ! ==== quick return for n = 0: nothing to do. ==== if( n==0_${ik}$ ) then work( 1_${ik}$ ) = one return end if if( n<=ntiny ) then ! ==== tiny matrices must use stdlib_${ri}$lahqr. ==== lwkopt = 1_${ik}$ if( lwork/=-1_${ik}$ )call stdlib${ii}$_${ri}$lahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, & ihiz, z, ldz, info ) else ! ==== use small bulge multi-shift qr with aggressive early ! . deflation on larger-than-tiny matrices. ==== ! ==== hope for the best. ==== info = 0_${ik}$ ! ==== set up job flags for stdlib${ii}$_ilaenv. ==== if( wantt ) then jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S' else jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E' end if if( wantz ) then jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V' else jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N' end if ! ==== nwr = recommended deflation window size. at this ! . point, n > ntiny = 15, so there is enough ! . subdiagonal workspace for nwr>=2 as required. ! . (in fact, there is enough subdiagonal space for ! . nwr>=4.) ==== nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) nwr = max( 2_${ik}$, nwr ) nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr ) ! ==== nsr = recommended number of simultaneous shifts. ! . at this point n > ntiny = 15, so there is at ! . enough subdiagonal workspace for nsr to be even ! . and greater than or equal to two as required. ==== nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo ) nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) ) ! ==== estimate optimal workspace ==== ! ==== workspace query call to stdlib${ii}$_${ri}$laqr3 ==== call stdlib${ii}$_${ri}$laqr3( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& ld, wr, wi, h, ldh, n, h, ldh,n, h, ldh, work, -1_${ik}$ ) ! ==== optimal workspace = max(stdlib${ii}$_${ri}$laqr5, stdlib${ii}$_${ri}$laqr3) ==== lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) ) ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$) return end if ! ==== stdlib${ii}$_${ri}$lahqr/stdlib${ii}$_${ri}$laqr0 crossover point ==== nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) nmin = max( ntiny, nmin ) ! ==== nibble crossover point ==== nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) nibble = max( 0_${ik}$, nibble ) ! ==== accumulate reflections during ttswp? use block ! . 2-by-2 structure during matrix-matrix multiply? ==== kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork ) kacc22 = max( 0_${ik}$, kacc22 ) kacc22 = min( 2_${ik}$, kacc22 ) ! ==== nwmax = the largest possible deflation window for ! . which there is sufficient workspace. ==== nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ ) nw = nwmax ! ==== nsmax = the largest number of simultaneous shifts ! . for which there is sufficient workspace. ==== nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ ) nsmax = nsmax - mod( nsmax, 2_${ik}$ ) ! ==== ndfl: an iteration count restarted at deflation. ==== ndfl = 1_${ik}$ ! ==== itmax = iteration limit ==== itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) ) ! ==== last row and column in the active block ==== kbot = ihi ! ==== main loop ==== loop_80: do it = 1, itmax ! ==== done when kbot falls below ilo ==== if( kbot<ilo )go to 90 ! ==== locate active block ==== do k = kbot, ilo + 1, -1 if( h( k, k-1 )==zero )go to 20 end do k = ilo 20 continue ktop = k ! ==== select deflation window size: ! . typical case: ! . if possible and advisable, nibble the entire ! . active block. if not, use size min(nwr,nwmax) ! . or min(nwr+1,nwmax) depending upon which has ! . the smaller corresponding subdiagonal entry ! . (a heuristic). ! . exceptional case: ! . if there have been no deflations in kexnw or ! . more iterations, then vary the deflation window ! . size. at first, because, larger windows are, ! . in general, more powerful than smaller ones, ! . rapidly increase the window to the maximum possible. ! . then, gradually reduce the window size. ==== nh = kbot - ktop + 1_${ik}$ nwupbd = min( nh, nwmax ) if( ndfl<kexnw ) then nw = min( nwupbd, nwr ) else nw = min( nwupbd, 2_${ik}$*nw ) end if if( nw<nwmax ) then if( nw>=nh-1 ) then nw = nh else kwtop = kbot - nw + 1_${ik}$ if( abs( h( kwtop, kwtop-1 ) )>abs( h( kwtop-1, kwtop-2 ) ) )nw = nw + & 1_${ik}$ end if end if if( ndfl<kexnw ) then ndec = -1_${ik}$ else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then ndec = ndec + 1_${ik}$ if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$ nw = nw - ndec end if ! ==== aggressive early deflation: ! . split workspace under the subdiagonal into ! . - an nw-by-nw work array v in the lower ! . left-hand-corner, ! . - an nw-by-at-least-nw-but-more-is-better ! . (nw-by-nho) horizontal work array along ! . the bottom edge, ! . - an at-least-nw-but-more-is-better (nhv-by-nw) ! . vertical work array along the left-hand-edge. ! . ==== kv = n - nw + 1_${ik}$ kt = nw + 1_${ik}$ nho = ( n-nw-1 ) - kt + 1_${ik}$ kwv = nw + 2_${ik}$ nve = ( n-nw ) - kwv + 1_${ik}$ ! ==== aggressive early deflation ==== call stdlib${ii}$_${ri}$laqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & ls, ld, wr, wi, h( kv, 1_${ik}$ ), ldh,nho, h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,& work, lwork ) ! ==== adjust kbot accounting for new deflations. ==== kbot = kbot - ld ! ==== ks points to the shifts. ==== ks = kbot - ls + 1_${ik}$ ! ==== skip an expensive qr sweep if there is a (partly ! . heuristic) reason to expect that many eigenvalues ! . will deflate without it. here, the qr sweep is ! . skipped if many eigenvalues have just been deflated ! . or if the remaining active block is small. if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& ) ) ) then ! ==== ns = nominal number of simultaneous shifts. ! . this may be lowered (slightly) if stdlib${ii}$_${ri}$laqr3 ! . did not provide that many shifts. ==== ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) ) ns = ns - mod( ns, 2_${ik}$ ) ! ==== if there have been no deflations ! . in a multiple of kexsh iterations, ! . then try exceptional shifts. ! . otherwise use shifts provided by ! . stdlib${ii}$_${ri}$laqr3 above or from the eigenvalues ! . of a trailing principal submatrix. ==== if( mod( ndfl, kexsh )==0_${ik}$ ) then ks = kbot - ns + 1_${ik}$ do i = kbot, max( ks+1, ktop+2 ), -2 ss = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) aa = wilk1*ss + h( i, i ) bb = ss cc = wilk2*ss dd = aa call stdlib${ii}$_${ri}$lanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),wr( i ), wi( i & ), cs, sn ) end do if( ks==ktop ) then wr( ks+1 ) = h( ks+1, ks+1 ) wi( ks+1 ) = zero wr( ks ) = wr( ks+1 ) wi( ks ) = wi( ks+1 ) end if else ! ==== got ns/2 or fewer shifts? use stdlib_${ri}$laqr4 or ! . stdlib${ii}$_${ri}$lahqr on a trailing principal submatrix to ! . get more. (since ns<=nsmax<=(n-3)/6, ! . there is enough space below the subdiagonal ! . to fit an ns-by-ns scratch array.) ==== if( kbot-ks+1<=ns / 2_${ik}$ ) then ks = kbot - ns + 1_${ik}$ kt = n - ns + 1_${ik}$ call stdlib${ii}$_${ri}$lacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh ) if( ns>nmin ) then call stdlib${ii}$_${ri}$laqr4( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, wr( & ks ),wi( ks ), 1_${ik}$, 1_${ik}$, zdum, 1_${ik}$, work,lwork, inf ) else call stdlib${ii}$_${ri}$lahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, wr( & ks ),wi( ks ), 1_${ik}$, 1_${ik}$, zdum, 1_${ik}$, inf ) end if ks = ks + inf ! ==== in case of a rare qr failure use ! . eigenvalues of the trailing 2-by-2 ! . principal submatrix. ==== if( ks>=kbot ) then aa = h( kbot-1, kbot-1 ) cc = h( kbot, kbot-1 ) bb = h( kbot-1, kbot ) dd = h( kbot, kbot ) call stdlib${ii}$_${ri}$lanv2( aa, bb, cc, dd, wr( kbot-1 ),wi( kbot-1 ), wr( & kbot ),wi( kbot ), cs, sn ) ks = kbot - 1_${ik}$ end if end if if( kbot-ks+1>ns ) then ! ==== sort the shifts (helps a little) ! . bubble sort keeps complex conjugate ! . pairs together. ==== sorted = .false. do k = kbot, ks + 1, -1 if( sorted )go to 60 sorted = .true. do i = ks, k - 1 if( abs( wr( i ) )+abs( wi( i ) )<abs( wr( i+1 ) )+abs( wi( i+1 ) & ) ) then sorted = .false. swap = wr( i ) wr( i ) = wr( i+1 ) wr( i+1 ) = swap swap = wi( i ) wi( i ) = wi( i+1 ) wi( i+1 ) = swap end if end do end do 60 continue end if ! ==== shuffle shifts into pairs of real shifts ! . and pairs of complex conjugate shifts ! . assuming complex conjugate shifts are ! . already adjacent to one another. (yes, ! . they are.) ==== do i = kbot, ks + 2, -2 if( wi( i )/=-wi( i-1 ) ) then swap = wr( i ) wr( i ) = wr( i-1 ) wr( i-1 ) = wr( i-2 ) wr( i-2 ) = swap swap = wi( i ) wi( i ) = wi( i-1 ) wi( i-1 ) = wi( i-2 ) wi( i-2 ) = swap end if end do end if ! ==== if there are only two shifts and both are ! . real, then use only one. ==== if( kbot-ks+1==2_${ik}$ ) then if( wi( kbot )==zero ) then if( abs( wr( kbot )-h( kbot, kbot ) )<abs( wr( kbot-1 )-h( kbot, kbot ) & ) ) then wr( kbot-1 ) = wr( kbot ) else wr( kbot ) = wr( kbot-1 ) end if end if end if ! ==== use up to ns of the the smallest magnitude ! . shifts. if there aren't ns shifts available, ! . then use them all, possibly dropping one to ! . make the number of shifts even. ==== ns = min( ns, kbot-ks+1 ) ns = ns - mod( ns, 2_${ik}$ ) ks = kbot - ns + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep: ! . split workspace under the subdiagonal into ! . - a kdu-by-kdu work array u in the lower ! . left-hand-corner, ! . - a kdu-by-at-least-kdu-but-more-is-better ! . (kdu-by-nho) horizontal work array wh along ! . the bottom edge, ! . - and an at-least-kdu-but-more-is-better-by-kdu ! . (nve-by-kdu) vertical work wv arrow along ! . the left-hand-edge. ==== kdu = 2_${ik}$*ns ku = n - kdu + 1_${ik}$ kwh = kdu + 1_${ik}$ nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$ kwv = kdu + 4_${ik}$ nve = n - kdu - kwv + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep ==== call stdlib${ii}$_${ri}$laqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,wr( ks ), wi( ks )& , h, ldh, iloz, ihiz, z,ldz, work, 3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve,h( kwv, 1_${ik}$ ), ldh, & nho, h( ku, kwh ), ldh ) end if ! ==== note progress (or the lack of it). ==== if( ld>0_${ik}$ ) then ndfl = 1_${ik}$ else ndfl = ndfl + 1_${ik}$ end if ! ==== end of main loop ==== end do loop_80 ! ==== iteration limit exceeded. set info to show where ! . the problem occurred and exit. ==== info = kbot 90 continue end if ! ==== return the optimal value of lwork. ==== work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$) end subroutine stdlib${ii}$_${ri}$laqr0 #:endif #:endfor pure module subroutine stdlib${ii}$_claqr0( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, work,& !! CLAQR0 computes the eigenvalues of a Hessenberg matrix H !! and, optionally, the matrices T and Z from the Schur decomposition !! H = Z T Z**H, where T is an upper triangular matrix (the !! Schur form), and Z is the unitary matrix of Schur vectors. !! Optionally Z may be postmultiplied into an input unitary !! matrix Q so that this routine can give the Schur factorization !! of a matrix A which has been reduced to the Hessenberg form H !! by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. lwork, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n integer(${ik}$), intent(out) :: info logical(lk), intent(in) :: wantt, wantz ! Array Arguments complex(sp), intent(inout) :: h(ldh,*), z(ldz,*) complex(sp), intent(out) :: w(*), work(*) ! ================================================================ ! Parameters integer(${ik}$), parameter :: ntiny = 15_${ik}$ integer(${ik}$), parameter :: kexnw = 5_${ik}$ integer(${ik}$), parameter :: kexsh = 6_${ik}$ real(sp), parameter :: wilk1 = 0.75_sp ! ==== matrices of order ntiny or smaller must be processed by ! . stdlib${ii}$_clahqr because of insufficient subdiagonal scratch space. ! . (this is a hard limit.) ==== ! ==== exceptional deflation windows: try to cure rare ! . slow convergence by varying the size of the ! . deflation window after kexnw iterations. ==== ! ==== exceptional shifts: try to cure rare slow convergence ! . with ad-hoc exceptional shifts every kexsh iterations. ! . ==== ! ==== the constant wilk1 is used to form the exceptional ! . shifts. ==== ! Local Scalars complex(sp) :: aa, bb, cc, cdum, dd, det, rtdisc, swap, tr2 real(sp) :: s integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& nwmax, nwr, nwupbd logical(lk) :: sorted character(len=2) :: jbcmpz ! Local Arrays complex(sp) :: zdum(1_${ik}$,1_${ik}$) ! Intrinsic Functions ! Statement Functions real(sp) :: cabs1 ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) ! Executable Statements info = 0_${ik}$ ! ==== quick return for n = 0: nothing to do. ==== if( n==0_${ik}$ ) then work( 1_${ik}$ ) = cone return end if if( n<=ntiny ) then ! ==== tiny matrices must use stdlib_clahqr. ==== lwkopt = 1_${ik}$ if( lwork/=-1_${ik}$ )call stdlib${ii}$_clahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, & z, ldz, info ) else ! ==== use small bulge multi-shift qr with aggressive early ! . deflation on larger-than-tiny matrices. ==== ! ==== hope for the best. ==== info = 0_${ik}$ ! ==== set up job flags for stdlib${ii}$_ilaenv. ==== if( wantt ) then jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S' else jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E' end if if( wantz ) then jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V' else jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N' end if ! ==== nwr = recommended deflation window size. at this ! . point, n > ntiny = 15, so there is enough ! . subdiagonal workspace for nwr>=2 as required. ! . (in fact, there is enough subdiagonal space for ! . nwr>=4.) ==== nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'CLAQR0', jbcmpz, n, ilo, ihi, lwork ) nwr = max( 2_${ik}$, nwr ) nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr ) ! ==== nsr = recommended number of simultaneous shifts. ! . at this point n > ntiny = 15, so there is at ! . enough subdiagonal workspace for nsr to be even ! . and greater than or equal to two as required. ==== nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'CLAQR0', jbcmpz, n, ilo, ihi, lwork ) nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo ) nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) ) ! ==== estimate optimal workspace ==== ! ==== workspace query call to stdlib${ii}$_claqr3 ==== call stdlib${ii}$_claqr3( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& ld, w, h, ldh, n, h, ldh, n, h,ldh, work, -1_${ik}$ ) ! ==== optimal workspace = max(stdlib${ii}$_claqr5, stdlib${ii}$_claqr3) ==== lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) ) ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=sp) return end if ! ==== stdlib${ii}$_clahqr/stdlib${ii}$_claqr0 crossover point ==== nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'CLAQR0', jbcmpz, n, ilo, ihi, lwork ) nmin = max( ntiny, nmin ) ! ==== nibble crossover point ==== nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'CLAQR0', jbcmpz, n, ilo, ihi, lwork ) nibble = max( 0_${ik}$, nibble ) ! ==== accumulate reflections during ttswp? use block ! . 2-by-2 structure during matrix-matrix multiply? ==== kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'CLAQR0', jbcmpz, n, ilo, ihi, lwork ) kacc22 = max( 0_${ik}$, kacc22 ) kacc22 = min( 2_${ik}$, kacc22 ) ! ==== nwmax = the largest possible deflation window for ! . which there is sufficient workspace. ==== nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ ) nw = nwmax ! ==== nsmax = the largest number of simultaneous shifts ! . for which there is sufficient workspace. ==== nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ ) nsmax = nsmax - mod( nsmax, 2_${ik}$ ) ! ==== ndfl: an iteration count restarted at deflation. ==== ndfl = 1_${ik}$ ! ==== itmax = iteration limit ==== itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) ) ! ==== last row and column in the active block ==== kbot = ihi ! ==== main loop ==== loop_70: do it = 1, itmax ! ==== done when kbot falls below ilo ==== if( kbot<ilo )go to 80 ! ==== locate active block ==== do k = kbot, ilo + 1, -1 if( h( k, k-1 )==czero )go to 20 end do k = ilo 20 continue ktop = k ! ==== select deflation window size: ! . typical case: ! . if possible and advisable, nibble the entire ! . active block. if not, use size min(nwr,nwmax) ! . or min(nwr+1,nwmax) depending upon which has ! . the smaller corresponding subdiagonal entry ! . (a heuristic). ! . exceptional case: ! . if there have been no deflations in kexnw or ! . more iterations, then vary the deflation window ! . size. at first, because, larger windows are, ! . in general, more powerful than smaller ones, ! . rapidly increase the window to the maximum possible. ! . then, gradually reduce the window size. ==== nh = kbot - ktop + 1_${ik}$ nwupbd = min( nh, nwmax ) if( ndfl<kexnw ) then nw = min( nwupbd, nwr ) else nw = min( nwupbd, 2_${ik}$*nw ) end if if( nw<nwmax ) then if( nw>=nh-1 ) then nw = nh else kwtop = kbot - nw + 1_${ik}$ if( cabs1( h( kwtop, kwtop-1 ) )>cabs1( h( kwtop-1, kwtop-2 ) ) )nw = nw + & 1_${ik}$ end if end if if( ndfl<kexnw ) then ndec = -1_${ik}$ else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then ndec = ndec + 1_${ik}$ if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$ nw = nw - ndec end if ! ==== aggressive early deflation: ! . split workspace under the subdiagonal into ! . - an nw-by-nw work array v in the lower ! . left-hand-corner, ! . - an nw-by-at-least-nw-but-more-is-better ! . (nw-by-nho) horizontal work array along ! . the bottom edge, ! . - an at-least-nw-but-more-is-better (nhv-by-nw) ! . vertical work array along the left-hand-edge. ! . ==== kv = n - nw + 1_${ik}$ kt = nw + 1_${ik}$ nho = ( n-nw-1 ) - kt + 1_${ik}$ kwv = nw + 2_${ik}$ nve = ( n-nw ) - kwv + 1_${ik}$ ! ==== aggressive early deflation ==== call stdlib${ii}$_claqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & ls, ld, w, h( kv, 1_${ik}$ ), ldh, nho,h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh, work,& lwork ) ! ==== adjust kbot accounting for new deflations. ==== kbot = kbot - ld ! ==== ks points to the shifts. ==== ks = kbot - ls + 1_${ik}$ ! ==== skip an expensive qr sweep if there is a (partly ! . heuristic) reason to expect that many eigenvalues ! . will deflate without it. here, the qr sweep is ! . skipped if many eigenvalues have just been deflated ! . or if the remaining active block is small. if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& ) ) ) then ! ==== ns = nominal number of simultaneous shifts. ! . this may be lowered (slightly) if stdlib${ii}$_claqr3 ! . did not provide that many shifts. ==== ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) ) ns = ns - mod( ns, 2_${ik}$ ) ! ==== if there have been no deflations ! . in a multiple of kexsh iterations, ! . then try exceptional shifts. ! . otherwise use shifts provided by ! . stdlib${ii}$_claqr3 above or from the eigenvalues ! . of a trailing principal submatrix. ==== if( mod( ndfl, kexsh )==0_${ik}$ ) then ks = kbot - ns + 1_${ik}$ do i = kbot, ks + 1, -2 w( i ) = h( i, i ) + wilk1*cabs1( h( i, i-1 ) ) w( i-1 ) = w( i ) end do else ! ==== got ns/2 or fewer shifts? use stdlib_claqr4 or ! . stdlib${ii}$_clahqr on a trailing principal submatrix to ! . get more. (since ns<=nsmax<=(n-3)/6, ! . there is enough space below the subdiagonal ! . to fit an ns-by-ns scratch array.) ==== if( kbot-ks+1<=ns / 2_${ik}$ ) then ks = kbot - ns + 1_${ik}$ kt = n - ns + 1_${ik}$ call stdlib${ii}$_clacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh ) if( ns>nmin ) then call stdlib${ii}$_claqr4( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, w( & ks ), 1_${ik}$, 1_${ik}$,zdum, 1_${ik}$, work, lwork, inf ) else call stdlib${ii}$_clahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, w( & ks ), 1_${ik}$, 1_${ik}$,zdum, 1_${ik}$, inf ) end if ks = ks + inf ! ==== in case of a rare qr failure use ! . eigenvalues of the trailing 2-by-2 ! . principal submatrix. scale to avoid ! . overflows, underflows and subnormals. ! . (the scale factor s can not be czero, ! . because h(kbot,kbot-1) is nonzero.) ==== if( ks>=kbot ) then s = cabs1( h( kbot-1, kbot-1 ) ) +cabs1( h( kbot, kbot-1 ) ) +cabs1( & h( kbot-1, kbot ) ) +cabs1( h( kbot, kbot ) ) aa = h( kbot-1, kbot-1 ) / s cc = h( kbot, kbot-1 ) / s bb = h( kbot-1, kbot ) / s dd = h( kbot, kbot ) / s tr2 = ( aa+dd ) / two det = ( aa-tr2 )*( dd-tr2 ) - bb*cc rtdisc = sqrt( -det ) w( kbot-1 ) = ( tr2+rtdisc )*s w( kbot ) = ( tr2-rtdisc )*s ks = kbot - 1_${ik}$ end if end if if( kbot-ks+1>ns ) then ! ==== sort the shifts (helps a little) ==== sorted = .false. do k = kbot, ks + 1, -1 if( sorted )go to 60 sorted = .true. do i = ks, k - 1 if( cabs1( w( i ) )<cabs1( w( i+1 ) ) )then sorted = .false. swap = w( i ) w( i ) = w( i+1 ) w( i+1 ) = swap end if end do end do 60 continue end if end if ! ==== if there are only two shifts, then use ! . only cone. ==== if( kbot-ks+1==2_${ik}$ ) then if( cabs1( w( kbot )-h( kbot, kbot ) )<cabs1( w( kbot-1 )-h( kbot, kbot ) )& ) then w( kbot-1 ) = w( kbot ) else w( kbot ) = w( kbot-1 ) end if end if ! ==== use up to ns of the the smallest magnitude ! . shifts. if there aren't ns shifts available, ! . then use them all, possibly dropping cone to ! . make the number of shifts even. ==== ns = min( ns, kbot-ks+1 ) ns = ns - mod( ns, 2_${ik}$ ) ks = kbot - ns + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep: ! . split workspace under the subdiagonal into ! . - a kdu-by-kdu work array u in the lower ! . left-hand-corner, ! . - a kdu-by-at-least-kdu-but-more-is-better ! . (kdu-by-nho) horizontal work array wh along ! . the bottom edge, ! . - and an at-least-kdu-but-more-is-better-by-kdu ! . (nve-by-kdu) vertical work wv arrow along ! . the left-hand-edge. ==== kdu = 2_${ik}$*ns ku = n - kdu + 1_${ik}$ kwh = kdu + 1_${ik}$ nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$ kwv = kdu + 4_${ik}$ nve = n - kdu - kwv + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep ==== call stdlib${ii}$_claqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,w( ks ), h, ldh, & iloz, ihiz, z, ldz, work,3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,nho, h( ku,& kwh ), ldh ) end if ! ==== note progress (or the lack of it). ==== if( ld>0_${ik}$ ) then ndfl = 1_${ik}$ else ndfl = ndfl + 1_${ik}$ end if ! ==== end of main loop ==== end do loop_70 ! ==== iteration limit exceeded. set info to show where ! . the problem occurred and exit. ==== info = kbot 80 continue end if ! ==== return the optimal value of lwork. ==== work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=sp) end subroutine stdlib${ii}$_claqr0 pure module subroutine stdlib${ii}$_zlaqr0( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, work,& !! ZLAQR0 computes the eigenvalues of a Hessenberg matrix H !! and, optionally, the matrices T and Z from the Schur decomposition !! H = Z T Z**H, where T is an upper triangular matrix (the !! Schur form), and Z is the unitary matrix of Schur vectors. !! Optionally Z may be postmultiplied into an input unitary !! matrix Q so that this routine can give the Schur factorization !! of a matrix A which has been reduced to the Hessenberg form H !! by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. lwork, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n integer(${ik}$), intent(out) :: info logical(lk), intent(in) :: wantt, wantz ! Array Arguments complex(dp), intent(inout) :: h(ldh,*), z(ldz,*) complex(dp), intent(out) :: w(*), work(*) ! ================================================================ ! Parameters integer(${ik}$), parameter :: ntiny = 15_${ik}$ integer(${ik}$), parameter :: kexnw = 5_${ik}$ integer(${ik}$), parameter :: kexsh = 6_${ik}$ real(dp), parameter :: wilk1 = 0.75_dp ! ==== matrices of order ntiny or smaller must be processed by ! . stdlib${ii}$_zlahqr because of insufficient subdiagonal scratch space. ! . (this is a hard limit.) ==== ! ==== exceptional deflation windows: try to cure rare ! . slow convergence by varying the size of the ! . deflation window after kexnw iterations. ==== ! ==== exceptional shifts: try to cure rare slow convergence ! . with ad-hoc exceptional shifts every kexsh iterations. ! . ==== ! ==== the constant wilk1 is used to form the exceptional ! . shifts. ==== ! Local Scalars complex(dp) :: aa, bb, cc, cdum, dd, det, rtdisc, swap, tr2 real(dp) :: s integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& nwmax, nwr, nwupbd logical(lk) :: sorted character(len=2) :: jbcmpz ! Local Arrays complex(dp) :: zdum(1_${ik}$,1_${ik}$) ! Intrinsic Functions ! Statement Functions real(dp) :: cabs1 ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) ! Executable Statements info = 0_${ik}$ ! ==== quick return for n = 0: nothing to do. ==== if( n==0_${ik}$ ) then work( 1_${ik}$ ) = cone return end if if( n<=ntiny ) then ! ==== tiny matrices must use stdlib_zlahqr. ==== lwkopt = 1_${ik}$ if( lwork/=-1_${ik}$ )call stdlib${ii}$_zlahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, & z, ldz, info ) else ! ==== use small bulge multi-shift qr with aggressive early ! . deflation on larger-than-tiny matrices. ==== ! ==== hope for the best. ==== info = 0_${ik}$ ! ==== set up job flags for stdlib${ii}$_ilaenv. ==== if( wantt ) then jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S' else jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E' end if if( wantz ) then jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V' else jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N' end if ! ==== nwr = recommended deflation window size. at this ! . point, n > ntiny = 15, so there is enough ! . subdiagonal workspace for nwr>=2 as required. ! . (in fact, there is enough subdiagonal space for ! . nwr>=4.) ==== nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) nwr = max( 2_${ik}$, nwr ) nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr ) ! ==== nsr = recommended number of simultaneous shifts. ! . at this point n > ntiny = 15, so there is at ! . enough subdiagonal workspace for nsr to be even ! . and greater than or equal to two as required. ==== nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo ) nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) ) ! ==== estimate optimal workspace ==== ! ==== workspace query call to stdlib${ii}$_zlaqr3 ==== call stdlib${ii}$_zlaqr3( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& ld, w, h, ldh, n, h, ldh, n, h,ldh, work, -1_${ik}$ ) ! ==== optimal workspace = max(stdlib${ii}$_zlaqr5, stdlib${ii}$_zlaqr3) ==== lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) ) ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=dp) return end if ! ==== stdlib${ii}$_zlahqr/stdlib${ii}$_zlaqr0 crossover point ==== nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) nmin = max( ntiny, nmin ) ! ==== nibble crossover point ==== nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) nibble = max( 0_${ik}$, nibble ) ! ==== accumulate reflections during ttswp? use block ! . 2-by-2 structure during matrix-matrix multiply? ==== kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) kacc22 = max( 0_${ik}$, kacc22 ) kacc22 = min( 2_${ik}$, kacc22 ) ! ==== nwmax = the largest possible deflation window for ! . which there is sufficient workspace. ==== nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ ) nw = nwmax ! ==== nsmax = the largest number of simultaneous shifts ! . for which there is sufficient workspace. ==== nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ ) nsmax = nsmax - mod( nsmax, 2_${ik}$ ) ! ==== ndfl: an iteration count restarted at deflation. ==== ndfl = 1_${ik}$ ! ==== itmax = iteration limit ==== itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) ) ! ==== last row and column in the active block ==== kbot = ihi ! ==== main loop ==== loop_70: do it = 1, itmax ! ==== done when kbot falls below ilo ==== if( kbot<ilo )go to 80 ! ==== locate active block ==== do k = kbot, ilo + 1, -1 if( h( k, k-1 )==czero )go to 20 end do k = ilo 20 continue ktop = k ! ==== select deflation window size: ! . typical case: ! . if possible and advisable, nibble the entire ! . active block. if not, use size min(nwr,nwmax) ! . or min(nwr+1,nwmax) depending upon which has ! . the smaller corresponding subdiagonal entry ! . (a heuristic). ! . exceptional case: ! . if there have been no deflations in kexnw or ! . more iterations, then vary the deflation window ! . size. at first, because, larger windows are, ! . in general, more powerful than smaller ones, ! . rapidly increase the window to the maximum possible. ! . then, gradually reduce the window size. ==== nh = kbot - ktop + 1_${ik}$ nwupbd = min( nh, nwmax ) if( ndfl<kexnw ) then nw = min( nwupbd, nwr ) else nw = min( nwupbd, 2_${ik}$*nw ) end if if( nw<nwmax ) then if( nw>=nh-1 ) then nw = nh else kwtop = kbot - nw + 1_${ik}$ if( cabs1( h( kwtop, kwtop-1 ) )>cabs1( h( kwtop-1, kwtop-2 ) ) )nw = nw + & 1_${ik}$ end if end if if( ndfl<kexnw ) then ndec = -1_${ik}$ else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then ndec = ndec + 1_${ik}$ if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$ nw = nw - ndec end if ! ==== aggressive early deflation: ! . split workspace under the subdiagonal into ! . - an nw-by-nw work array v in the lower ! . left-hand-corner, ! . - an nw-by-at-least-nw-but-more-is-better ! . (nw-by-nho) horizontal work array along ! . the bottom edge, ! . - an at-least-nw-but-more-is-better (nhv-by-nw) ! . vertical work array along the left-hand-edge. ! . ==== kv = n - nw + 1_${ik}$ kt = nw + 1_${ik}$ nho = ( n-nw-1 ) - kt + 1_${ik}$ kwv = nw + 2_${ik}$ nve = ( n-nw ) - kwv + 1_${ik}$ ! ==== aggressive early deflation ==== call stdlib${ii}$_zlaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & ls, ld, w, h( kv, 1_${ik}$ ), ldh, nho,h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh, work,& lwork ) ! ==== adjust kbot accounting for new deflations. ==== kbot = kbot - ld ! ==== ks points to the shifts. ==== ks = kbot - ls + 1_${ik}$ ! ==== skip an expensive qr sweep if there is a (partly ! . heuristic) reason to expect that many eigenvalues ! . will deflate without it. here, the qr sweep is ! . skipped if many eigenvalues have just been deflated ! . or if the remaining active block is small. if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& ) ) ) then ! ==== ns = nominal number of simultaneous shifts. ! . this may be lowered (slightly) if stdlib${ii}$_zlaqr3 ! . did not provide that many shifts. ==== ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) ) ns = ns - mod( ns, 2_${ik}$ ) ! ==== if there have been no deflations ! . in a multiple of kexsh iterations, ! . then try exceptional shifts. ! . otherwise use shifts provided by ! . stdlib${ii}$_zlaqr3 above or from the eigenvalues ! . of a trailing principal submatrix. ==== if( mod( ndfl, kexsh )==0_${ik}$ ) then ks = kbot - ns + 1_${ik}$ do i = kbot, ks + 1, -2 w( i ) = h( i, i ) + wilk1*cabs1( h( i, i-1 ) ) w( i-1 ) = w( i ) end do else ! ==== got ns/2 or fewer shifts? use stdlib_zlaqr4 or ! . stdlib${ii}$_zlahqr on a trailing principal submatrix to ! . get more. (since ns<=nsmax<=(n-3)/6, ! . there is enough space below the subdiagonal ! . to fit an ns-by-ns scratch array.) ==== if( kbot-ks+1<=ns / 2_${ik}$ ) then ks = kbot - ns + 1_${ik}$ kt = n - ns + 1_${ik}$ call stdlib${ii}$_zlacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh ) if( ns>nmin ) then call stdlib${ii}$_zlaqr4( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, w( & ks ), 1_${ik}$, 1_${ik}$,zdum, 1_${ik}$, work, lwork, inf ) else call stdlib${ii}$_zlahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, w( & ks ), 1_${ik}$, 1_${ik}$,zdum, 1_${ik}$, inf ) end if ks = ks + inf ! ==== in case of a rare qr failure use ! . eigenvalues of the trailing 2-by-2 ! . principal submatrix. scale to avoid ! . overflows, underflows and subnormals. ! . (the scale factor s can not be czero, ! . because h(kbot,kbot-1) is nonzero.) ==== if( ks>=kbot ) then s = cabs1( h( kbot-1, kbot-1 ) ) +cabs1( h( kbot, kbot-1 ) ) +cabs1( & h( kbot-1, kbot ) ) +cabs1( h( kbot, kbot ) ) aa = h( kbot-1, kbot-1 ) / s cc = h( kbot, kbot-1 ) / s bb = h( kbot-1, kbot ) / s dd = h( kbot, kbot ) / s tr2 = ( aa+dd ) / two det = ( aa-tr2 )*( dd-tr2 ) - bb*cc rtdisc = sqrt( -det ) w( kbot-1 ) = ( tr2+rtdisc )*s w( kbot ) = ( tr2-rtdisc )*s ks = kbot - 1_${ik}$ end if end if if( kbot-ks+1>ns ) then ! ==== sort the shifts (helps a little) ==== sorted = .false. do k = kbot, ks + 1, -1 if( sorted )go to 60 sorted = .true. do i = ks, k - 1 if( cabs1( w( i ) )<cabs1( w( i+1 ) ) )then sorted = .false. swap = w( i ) w( i ) = w( i+1 ) w( i+1 ) = swap end if end do end do 60 continue end if end if ! ==== if there are only two shifts, then use ! . only cone. ==== if( kbot-ks+1==2_${ik}$ ) then if( cabs1( w( kbot )-h( kbot, kbot ) )<cabs1( w( kbot-1 )-h( kbot, kbot ) )& ) then w( kbot-1 ) = w( kbot ) else w( kbot ) = w( kbot-1 ) end if end if ! ==== use up to ns of the the smallest magnitude ! . shifts. if there aren't ns shifts available, ! . then use them all, possibly dropping cone to ! . make the number of shifts even. ==== ns = min( ns, kbot-ks+1 ) ns = ns - mod( ns, 2_${ik}$ ) ks = kbot - ns + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep: ! . split workspace under the subdiagonal into ! . - a kdu-by-kdu work array u in the lower ! . left-hand-corner, ! . - a kdu-by-at-least-kdu-but-more-is-better ! . (kdu-by-nho) horizontal work array wh along ! . the bottom edge, ! . - and an at-least-kdu-but-more-is-better-by-kdu ! . (nve-by-kdu) vertical work wv arrow along ! . the left-hand-edge. ==== kdu = 2_${ik}$*ns ku = n - kdu + 1_${ik}$ kwh = kdu + 1_${ik}$ nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$ kwv = kdu + 4_${ik}$ nve = n - kdu - kwv + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep ==== call stdlib${ii}$_zlaqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,w( ks ), h, ldh, & iloz, ihiz, z, ldz, work,3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,nho, h( ku,& kwh ), ldh ) end if ! ==== note progress (or the lack of it). ==== if( ld>0_${ik}$ ) then ndfl = 1_${ik}$ else ndfl = ndfl + 1_${ik}$ end if ! ==== end of main loop ==== end do loop_70 ! ==== iteration limit exceeded. set info to show where ! . the problem occurred and exit. ==== info = kbot 80 continue end if ! ==== return the optimal value of lwork. ==== work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=dp) end subroutine stdlib${ii}$_zlaqr0 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$laqr0( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, work,& !! ZLAQR0: computes the eigenvalues of a Hessenberg matrix H !! and, optionally, the matrices T and Z from the Schur decomposition !! H = Z T Z**H, where T is an upper triangular matrix (the !! Schur form), and Z is the unitary matrix of Schur vectors. !! Optionally Z may be postmultiplied into an input unitary !! matrix Q so that this routine can give the Schur factorization !! of a matrix A which has been reduced to the Hessenberg form H !! by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. lwork, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n integer(${ik}$), intent(out) :: info logical(lk), intent(in) :: wantt, wantz ! Array Arguments complex(${ck}$), intent(inout) :: h(ldh,*), z(ldz,*) complex(${ck}$), intent(out) :: w(*), work(*) ! ================================================================ ! Parameters integer(${ik}$), parameter :: ntiny = 15_${ik}$ integer(${ik}$), parameter :: kexnw = 5_${ik}$ integer(${ik}$), parameter :: kexsh = 6_${ik}$ real(${ck}$), parameter :: wilk1 = 0.75_${ck}$ ! ==== matrices of order ntiny or smaller must be processed by ! . stdlib${ii}$_${ci}$lahqr because of insufficient subdiagonal scratch space. ! . (this is a hard limit.) ==== ! ==== exceptional deflation windows: try to cure rare ! . slow convergence by varying the size of the ! . deflation window after kexnw iterations. ==== ! ==== exceptional shifts: try to cure rare slow convergence ! . with ad-hoc exceptional shifts every kexsh iterations. ! . ==== ! ==== the constant wilk1 is used to form the exceptional ! . shifts. ==== ! Local Scalars complex(${ck}$) :: aa, bb, cc, cdum, dd, det, rtdisc, swap, tr2 real(${ck}$) :: s integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& nwmax, nwr, nwupbd logical(lk) :: sorted character(len=2) :: jbcmpz ! Local Arrays complex(${ck}$) :: zdum(1_${ik}$,1_${ik}$) ! Intrinsic Functions ! Statement Functions real(${ck}$) :: cabs1 ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=${ck}$) ) + abs( aimag( cdum ) ) ! Executable Statements info = 0_${ik}$ ! ==== quick return for n = 0: nothing to do. ==== if( n==0_${ik}$ ) then work( 1_${ik}$ ) = cone return end if if( n<=ntiny ) then ! ==== tiny matrices must use stdlib_${ci}$lahqr. ==== lwkopt = 1_${ik}$ if( lwork/=-1_${ik}$ )call stdlib${ii}$_${ci}$lahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, & z, ldz, info ) else ! ==== use small bulge multi-shift qr with aggressive early ! . deflation on larger-than-tiny matrices. ==== ! ==== hope for the best. ==== info = 0_${ik}$ ! ==== set up job flags for stdlib${ii}$_ilaenv. ==== if( wantt ) then jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S' else jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E' end if if( wantz ) then jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V' else jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N' end if ! ==== nwr = recommended deflation window size. at this ! . point, n > ntiny = 15, so there is enough ! . subdiagonal workspace for nwr>=2 as required. ! . (in fact, there is enough subdiagonal space for ! . nwr>=4.) ==== nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) nwr = max( 2_${ik}$, nwr ) nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr ) ! ==== nsr = recommended number of simultaneous shifts. ! . at this point n > ntiny = 15, so there is at ! . enough subdiagonal workspace for nsr to be even ! . and greater than or equal to two as required. ==== nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo ) nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) ) ! ==== estimate optimal workspace ==== ! ==== workspace query call to stdlib${ii}$_${ci}$laqr3 ==== call stdlib${ii}$_${ci}$laqr3( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& ld, w, h, ldh, n, h, ldh, n, h,ldh, work, -1_${ik}$ ) ! ==== optimal workspace = max(stdlib${ii}$_${ci}$laqr5, stdlib${ii}$_${ci}$laqr3) ==== lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) ) ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=${ck}$) return end if ! ==== stdlib${ii}$_${ci}$lahqr/stdlib${ii}$_${ci}$laqr0 crossover point ==== nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) nmin = max( ntiny, nmin ) ! ==== nibble crossover point ==== nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) nibble = max( 0_${ik}$, nibble ) ! ==== accumulate reflections during ttswp? use block ! . 2-by-2 structure during matrix-matrix multiply? ==== kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork ) kacc22 = max( 0_${ik}$, kacc22 ) kacc22 = min( 2_${ik}$, kacc22 ) ! ==== nwmax = the largest possible deflation window for ! . which there is sufficient workspace. ==== nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ ) nw = nwmax ! ==== nsmax = the largest number of simultaneous shifts ! . for which there is sufficient workspace. ==== nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ ) nsmax = nsmax - mod( nsmax, 2_${ik}$ ) ! ==== ndfl: an iteration count restarted at deflation. ==== ndfl = 1_${ik}$ ! ==== itmax = iteration limit ==== itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) ) ! ==== last row and column in the active block ==== kbot = ihi ! ==== main loop ==== loop_70: do it = 1, itmax ! ==== done when kbot falls below ilo ==== if( kbot<ilo )go to 80 ! ==== locate active block ==== do k = kbot, ilo + 1, -1 if( h( k, k-1 )==czero )go to 20 end do k = ilo 20 continue ktop = k ! ==== select deflation window size: ! . typical case: ! . if possible and advisable, nibble the entire ! . active block. if not, use size min(nwr,nwmax) ! . or min(nwr+1,nwmax) depending upon which has ! . the smaller corresponding subdiagonal entry ! . (a heuristic). ! . exceptional case: ! . if there have been no deflations in kexnw or ! . more iterations, then vary the deflation window ! . size. at first, because, larger windows are, ! . in general, more powerful than smaller ones, ! . rapidly increase the window to the maximum possible. ! . then, gradually reduce the window size. ==== nh = kbot - ktop + 1_${ik}$ nwupbd = min( nh, nwmax ) if( ndfl<kexnw ) then nw = min( nwupbd, nwr ) else nw = min( nwupbd, 2_${ik}$*nw ) end if if( nw<nwmax ) then if( nw>=nh-1 ) then nw = nh else kwtop = kbot - nw + 1_${ik}$ if( cabs1( h( kwtop, kwtop-1 ) )>cabs1( h( kwtop-1, kwtop-2 ) ) )nw = nw + & 1_${ik}$ end if end if if( ndfl<kexnw ) then ndec = -1_${ik}$ else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then ndec = ndec + 1_${ik}$ if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$ nw = nw - ndec end if ! ==== aggressive early deflation: ! . split workspace under the subdiagonal into ! . - an nw-by-nw work array v in the lower ! . left-hand-corner, ! . - an nw-by-at-least-nw-but-more-is-better ! . (nw-by-nho) horizontal work array along ! . the bottom edge, ! . - an at-least-nw-but-more-is-better (nhv-by-nw) ! . vertical work array along the left-hand-edge. ! . ==== kv = n - nw + 1_${ik}$ kt = nw + 1_${ik}$ nho = ( n-nw-1 ) - kt + 1_${ik}$ kwv = nw + 2_${ik}$ nve = ( n-nw ) - kwv + 1_${ik}$ ! ==== aggressive early deflation ==== call stdlib${ii}$_${ci}$laqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & ls, ld, w, h( kv, 1_${ik}$ ), ldh, nho,h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh, work,& lwork ) ! ==== adjust kbot accounting for new deflations. ==== kbot = kbot - ld ! ==== ks points to the shifts. ==== ks = kbot - ls + 1_${ik}$ ! ==== skip an expensive qr sweep if there is a (partly ! . heuristic) reason to expect that many eigenvalues ! . will deflate without it. here, the qr sweep is ! . skipped if many eigenvalues have just been deflated ! . or if the remaining active block is small. if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& ) ) ) then ! ==== ns = nominal number of simultaneous shifts. ! . this may be lowered (slightly) if stdlib${ii}$_${ci}$laqr3 ! . did not provide that many shifts. ==== ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) ) ns = ns - mod( ns, 2_${ik}$ ) ! ==== if there have been no deflations ! . in a multiple of kexsh iterations, ! . then try exceptional shifts. ! . otherwise use shifts provided by ! . stdlib${ii}$_${ci}$laqr3 above or from the eigenvalues ! . of a trailing principal submatrix. ==== if( mod( ndfl, kexsh )==0_${ik}$ ) then ks = kbot - ns + 1_${ik}$ do i = kbot, ks + 1, -2 w( i ) = h( i, i ) + wilk1*cabs1( h( i, i-1 ) ) w( i-1 ) = w( i ) end do else ! ==== got ns/2 or fewer shifts? use stdlib_${ci}$laqr4 or ! . stdlib${ii}$_${ci}$lahqr on a trailing principal submatrix to ! . get more. (since ns<=nsmax<=(n-3)/6, ! . there is enough space below the subdiagonal ! . to fit an ns-by-ns scratch array.) ==== if( kbot-ks+1<=ns / 2_${ik}$ ) then ks = kbot - ns + 1_${ik}$ kt = n - ns + 1_${ik}$ call stdlib${ii}$_${ci}$lacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh ) if( ns>nmin ) then call stdlib${ii}$_${ci}$laqr4( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, w( & ks ), 1_${ik}$, 1_${ik}$,zdum, 1_${ik}$, work, lwork, inf ) else call stdlib${ii}$_${ci}$lahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, w( & ks ), 1_${ik}$, 1_${ik}$,zdum, 1_${ik}$, inf ) end if ks = ks + inf ! ==== in case of a rare qr failure use ! . eigenvalues of the trailing 2-by-2 ! . principal submatrix. scale to avoid ! . overflows, underflows and subnormals. ! . (the scale factor s can not be czero, ! . because h(kbot,kbot-1) is nonzero.) ==== if( ks>=kbot ) then s = cabs1( h( kbot-1, kbot-1 ) ) +cabs1( h( kbot, kbot-1 ) ) +cabs1( & h( kbot-1, kbot ) ) +cabs1( h( kbot, kbot ) ) aa = h( kbot-1, kbot-1 ) / s cc = h( kbot, kbot-1 ) / s bb = h( kbot-1, kbot ) / s dd = h( kbot, kbot ) / s tr2 = ( aa+dd ) / two det = ( aa-tr2 )*( dd-tr2 ) - bb*cc rtdisc = sqrt( -det ) w( kbot-1 ) = ( tr2+rtdisc )*s w( kbot ) = ( tr2-rtdisc )*s ks = kbot - 1_${ik}$ end if end if if( kbot-ks+1>ns ) then ! ==== sort the shifts (helps a little) ==== sorted = .false. do k = kbot, ks + 1, -1 if( sorted )go to 60 sorted = .true. do i = ks, k - 1 if( cabs1( w( i ) )<cabs1( w( i+1 ) ) )then sorted = .false. swap = w( i ) w( i ) = w( i+1 ) w( i+1 ) = swap end if end do end do 60 continue end if end if ! ==== if there are only two shifts, then use ! . only cone. ==== if( kbot-ks+1==2_${ik}$ ) then if( cabs1( w( kbot )-h( kbot, kbot ) )<cabs1( w( kbot-1 )-h( kbot, kbot ) )& ) then w( kbot-1 ) = w( kbot ) else w( kbot ) = w( kbot-1 ) end if end if ! ==== use up to ns of the the smallest magnitude ! . shifts. if there aren't ns shifts available, ! . then use them all, possibly dropping cone to ! . make the number of shifts even. ==== ns = min( ns, kbot-ks+1 ) ns = ns - mod( ns, 2_${ik}$ ) ks = kbot - ns + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep: ! . split workspace under the subdiagonal into ! . - a kdu-by-kdu work array u in the lower ! . left-hand-corner, ! . - a kdu-by-at-least-kdu-but-more-is-better ! . (kdu-by-nho) horizontal work array wh along ! . the bottom edge, ! . - and an at-least-kdu-but-more-is-better-by-kdu ! . (nve-by-kdu) vertical work wv arrow along ! . the left-hand-edge. ==== kdu = 2_${ik}$*ns ku = n - kdu + 1_${ik}$ kwh = kdu + 1_${ik}$ nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$ kwv = kdu + 4_${ik}$ nve = n - kdu - kwv + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep ==== call stdlib${ii}$_${ci}$laqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,w( ks ), h, ldh, & iloz, ihiz, z, ldz, work,3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,nho, h( ku,& kwh ), ldh ) end if ! ==== note progress (or the lack of it). ==== if( ld>0_${ik}$ ) then ndfl = 1_${ik}$ else ndfl = ndfl + 1_${ik}$ end if ! ==== end of main loop ==== end do loop_70 ! ==== iteration limit exceeded. set info to show where ! . the problem occurred and exit. ==== info = kbot 80 continue end if ! ==== return the optimal value of lwork. ==== work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=${ck}$) end subroutine stdlib${ii}$_${ci}$laqr0 #:endif #:endfor pure module subroutine stdlib${ii}$_slaqr1( n, h, ldh, sr1, si1, sr2, si2, v ) !! Given a 2-by-2 or 3-by-3 matrix H, SLAQR1: sets v to a !! scalar multiple of the first column of the product !! (*) K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I) !! scaling to avoid overflows and most underflows. It !! is assumed that either !! 1) sr1 = sr2 and si1 = -si2 !! or !! 2) si1 = si2 = 0. !! This is useful for starting double implicit shift bulges !! in the QR algorithm. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments real(sp), intent(in) :: si1, si2, sr1, sr2 integer(${ik}$), intent(in) :: ldh, n ! Array Arguments real(sp), intent(in) :: h(ldh,*) real(sp), intent(out) :: v(*) ! ================================================================ ! Local Scalars real(sp) :: h21s, h31s, s ! Intrinsic Functions ! Executable Statements ! quick return if possible if( n/=2_${ik}$ .and. n/=3_${ik}$ ) then return end if if( n==2_${ik}$ ) then s = abs( h( 1_${ik}$, 1_${ik}$ )-sr2 ) + abs( si2 ) + abs( h( 2_${ik}$, 1_${ik}$ ) ) if( s==zero ) then v( 1_${ik}$ ) = zero v( 2_${ik}$ ) = zero else h21s = h( 2_${ik}$, 1_${ik}$ ) / s v( 1_${ik}$ ) = h21s*h( 1_${ik}$, 2_${ik}$ ) + ( h( 1_${ik}$, 1_${ik}$ )-sr1 )*( ( h( 1_${ik}$, 1_${ik}$ )-sr2 ) / s ) - si1*( & si2 / s ) v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-sr1-sr2 ) end if else s = abs( h( 1_${ik}$, 1_${ik}$ )-sr2 ) + abs( si2 ) + abs( h( 2_${ik}$, 1_${ik}$ ) ) +abs( h( 3_${ik}$, 1_${ik}$ ) ) if( s==zero ) then v( 1_${ik}$ ) = zero v( 2_${ik}$ ) = zero v( 3_${ik}$ ) = zero else h21s = h( 2_${ik}$, 1_${ik}$ ) / s h31s = h( 3_${ik}$, 1_${ik}$ ) / s v( 1_${ik}$ ) = ( h( 1_${ik}$, 1_${ik}$ )-sr1 )*( ( h( 1_${ik}$, 1_${ik}$ )-sr2 ) / s ) -si1*( si2 / s ) + h( 1_${ik}$, 2_${ik}$ )& *h21s + h( 1_${ik}$, 3_${ik}$ )*h31s v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-sr1-sr2 ) +h( 2_${ik}$, 3_${ik}$ )*h31s v( 3_${ik}$ ) = h31s*( h( 1_${ik}$, 1_${ik}$ )+h( 3_${ik}$, 3_${ik}$ )-sr1-sr2 ) +h21s*h( 3_${ik}$, 2_${ik}$ ) end if end if end subroutine stdlib${ii}$_slaqr1 pure module subroutine stdlib${ii}$_dlaqr1( n, h, ldh, sr1, si1, sr2, si2, v ) !! Given a 2-by-2 or 3-by-3 matrix H, DLAQR1: sets v to a !! scalar multiple of the first column of the product !! (*) K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I) !! scaling to avoid overflows and most underflows. It !! is assumed that either !! 1) sr1 = sr2 and si1 = -si2 !! or !! 2) si1 = si2 = 0. !! This is useful for starting double implicit shift bulges !! in the QR algorithm. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments real(dp), intent(in) :: si1, si2, sr1, sr2 integer(${ik}$), intent(in) :: ldh, n ! Array Arguments real(dp), intent(in) :: h(ldh,*) real(dp), intent(out) :: v(*) ! ================================================================ ! Local Scalars real(dp) :: h21s, h31s, s ! Intrinsic Functions ! Executable Statements ! quick return if possible if( n/=2_${ik}$ .and. n/=3_${ik}$ ) then return end if if( n==2_${ik}$ ) then s = abs( h( 1_${ik}$, 1_${ik}$ )-sr2 ) + abs( si2 ) + abs( h( 2_${ik}$, 1_${ik}$ ) ) if( s==zero ) then v( 1_${ik}$ ) = zero v( 2_${ik}$ ) = zero else h21s = h( 2_${ik}$, 1_${ik}$ ) / s v( 1_${ik}$ ) = h21s*h( 1_${ik}$, 2_${ik}$ ) + ( h( 1_${ik}$, 1_${ik}$ )-sr1 )*( ( h( 1_${ik}$, 1_${ik}$ )-sr2 ) / s ) - si1*( & si2 / s ) v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-sr1-sr2 ) end if else s = abs( h( 1_${ik}$, 1_${ik}$ )-sr2 ) + abs( si2 ) + abs( h( 2_${ik}$, 1_${ik}$ ) ) +abs( h( 3_${ik}$, 1_${ik}$ ) ) if( s==zero ) then v( 1_${ik}$ ) = zero v( 2_${ik}$ ) = zero v( 3_${ik}$ ) = zero else h21s = h( 2_${ik}$, 1_${ik}$ ) / s h31s = h( 3_${ik}$, 1_${ik}$ ) / s v( 1_${ik}$ ) = ( h( 1_${ik}$, 1_${ik}$ )-sr1 )*( ( h( 1_${ik}$, 1_${ik}$ )-sr2 ) / s ) -si1*( si2 / s ) + h( 1_${ik}$, 2_${ik}$ )& *h21s + h( 1_${ik}$, 3_${ik}$ )*h31s v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-sr1-sr2 ) +h( 2_${ik}$, 3_${ik}$ )*h31s v( 3_${ik}$ ) = h31s*( h( 1_${ik}$, 1_${ik}$ )+h( 3_${ik}$, 3_${ik}$ )-sr1-sr2 ) +h21s*h( 3_${ik}$, 2_${ik}$ ) end if end if end subroutine stdlib${ii}$_dlaqr1 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$laqr1( n, h, ldh, sr1, si1, sr2, si2, v ) !! Given a 2-by-2 or 3-by-3 matrix H, DLAQR1: sets v to a !! scalar multiple of the first column of the product !! (*) K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I) !! scaling to avoid overflows and most underflows. It !! is assumed that either !! 1) sr1 = sr2 and si1 = -si2 !! or !! 2) si1 = si2 = 0. !! This is useful for starting double implicit shift bulges !! in the QR algorithm. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments real(${rk}$), intent(in) :: si1, si2, sr1, sr2 integer(${ik}$), intent(in) :: ldh, n ! Array Arguments real(${rk}$), intent(in) :: h(ldh,*) real(${rk}$), intent(out) :: v(*) ! ================================================================ ! Local Scalars real(${rk}$) :: h21s, h31s, s ! Intrinsic Functions ! Executable Statements ! quick return if possible if( n/=2_${ik}$ .and. n/=3_${ik}$ ) then return end if if( n==2_${ik}$ ) then s = abs( h( 1_${ik}$, 1_${ik}$ )-sr2 ) + abs( si2 ) + abs( h( 2_${ik}$, 1_${ik}$ ) ) if( s==zero ) then v( 1_${ik}$ ) = zero v( 2_${ik}$ ) = zero else h21s = h( 2_${ik}$, 1_${ik}$ ) / s v( 1_${ik}$ ) = h21s*h( 1_${ik}$, 2_${ik}$ ) + ( h( 1_${ik}$, 1_${ik}$ )-sr1 )*( ( h( 1_${ik}$, 1_${ik}$ )-sr2 ) / s ) - si1*( & si2 / s ) v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-sr1-sr2 ) end if else s = abs( h( 1_${ik}$, 1_${ik}$ )-sr2 ) + abs( si2 ) + abs( h( 2_${ik}$, 1_${ik}$ ) ) +abs( h( 3_${ik}$, 1_${ik}$ ) ) if( s==zero ) then v( 1_${ik}$ ) = zero v( 2_${ik}$ ) = zero v( 3_${ik}$ ) = zero else h21s = h( 2_${ik}$, 1_${ik}$ ) / s h31s = h( 3_${ik}$, 1_${ik}$ ) / s v( 1_${ik}$ ) = ( h( 1_${ik}$, 1_${ik}$ )-sr1 )*( ( h( 1_${ik}$, 1_${ik}$ )-sr2 ) / s ) -si1*( si2 / s ) + h( 1_${ik}$, 2_${ik}$ )& *h21s + h( 1_${ik}$, 3_${ik}$ )*h31s v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-sr1-sr2 ) +h( 2_${ik}$, 3_${ik}$ )*h31s v( 3_${ik}$ ) = h31s*( h( 1_${ik}$, 1_${ik}$ )+h( 3_${ik}$, 3_${ik}$ )-sr1-sr2 ) +h21s*h( 3_${ik}$, 2_${ik}$ ) end if end if end subroutine stdlib${ii}$_${ri}$laqr1 #:endif #:endfor pure module subroutine stdlib${ii}$_claqr1( n, h, ldh, s1, s2, v ) !! Given a 2-by-2 or 3-by-3 matrix H, CLAQR1: sets v to a !! scalar multiple of the first column of the product !! (*) K = (H - s1*I)*(H - s2*I) !! scaling to avoid overflows and most underflows. !! This is useful for starting double implicit shift bulges !! in the QR algorithm. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments complex(sp), intent(in) :: s1, s2 integer(${ik}$), intent(in) :: ldh, n ! Array Arguments complex(sp), intent(in) :: h(ldh,*) complex(sp), intent(out) :: v(*) ! ================================================================ ! Parameters ! Local Scalars complex(sp) :: cdum, h21s, h31s real(sp) :: s ! Intrinsic Functions ! Statement Functions real(sp) :: cabs1 ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) ! Executable Statements ! quick return if possible if( n/=2_${ik}$ .and. n/=3_${ik}$ ) then return end if if( n==2_${ik}$ ) then s = cabs1( h( 1_${ik}$, 1_${ik}$ )-s2 ) + cabs1( h( 2_${ik}$, 1_${ik}$ ) ) if( s==zero ) then v( 1_${ik}$ ) = czero v( 2_${ik}$ ) = czero else h21s = h( 2_${ik}$, 1_${ik}$ ) / s v( 1_${ik}$ ) = h21s*h( 1_${ik}$, 2_${ik}$ ) + ( h( 1_${ik}$, 1_${ik}$ )-s1 )*( ( h( 1_${ik}$, 1_${ik}$ )-s2 ) / s ) v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-s1-s2 ) end if else s = cabs1( h( 1_${ik}$, 1_${ik}$ )-s2 ) + cabs1( h( 2_${ik}$, 1_${ik}$ ) ) +cabs1( h( 3_${ik}$, 1_${ik}$ ) ) if( s==czero ) then v( 1_${ik}$ ) = czero v( 2_${ik}$ ) = czero v( 3_${ik}$ ) = czero else h21s = h( 2_${ik}$, 1_${ik}$ ) / s h31s = h( 3_${ik}$, 1_${ik}$ ) / s v( 1_${ik}$ ) = ( h( 1_${ik}$, 1_${ik}$ )-s1 )*( ( h( 1_${ik}$, 1_${ik}$ )-s2 ) / s ) +h( 1_${ik}$, 2_${ik}$ )*h21s + h( 1_${ik}$, 3_${ik}$ )& *h31s v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-s1-s2 ) + h( 2_${ik}$, 3_${ik}$ )*h31s v( 3_${ik}$ ) = h31s*( h( 1_${ik}$, 1_${ik}$ )+h( 3_${ik}$, 3_${ik}$ )-s1-s2 ) + h21s*h( 3_${ik}$, 2_${ik}$ ) end if end if end subroutine stdlib${ii}$_claqr1 pure module subroutine stdlib${ii}$_zlaqr1( n, h, ldh, s1, s2, v ) !! Given a 2-by-2 or 3-by-3 matrix H, ZLAQR1: sets v to a !! scalar multiple of the first column of the product !! (*) K = (H - s1*I)*(H - s2*I) !! scaling to avoid overflows and most underflows. !! This is useful for starting double implicit shift bulges !! in the QR algorithm. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments complex(dp), intent(in) :: s1, s2 integer(${ik}$), intent(in) :: ldh, n ! Array Arguments complex(dp), intent(in) :: h(ldh,*) complex(dp), intent(out) :: v(*) ! ================================================================ ! Parameters ! Local Scalars complex(dp) :: cdum, h21s, h31s real(dp) :: s ! Intrinsic Functions ! Statement Functions real(dp) :: cabs1 ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) ! Executable Statements ! quick return if possible if( n/=2_${ik}$ .and. n/=3_${ik}$ ) then return end if if( n==2_${ik}$ ) then s = cabs1( h( 1_${ik}$, 1_${ik}$ )-s2 ) + cabs1( h( 2_${ik}$, 1_${ik}$ ) ) if( s==zero ) then v( 1_${ik}$ ) = czero v( 2_${ik}$ ) = czero else h21s = h( 2_${ik}$, 1_${ik}$ ) / s v( 1_${ik}$ ) = h21s*h( 1_${ik}$, 2_${ik}$ ) + ( h( 1_${ik}$, 1_${ik}$ )-s1 )*( ( h( 1_${ik}$, 1_${ik}$ )-s2 ) / s ) v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-s1-s2 ) end if else s = cabs1( h( 1_${ik}$, 1_${ik}$ )-s2 ) + cabs1( h( 2_${ik}$, 1_${ik}$ ) ) +cabs1( h( 3_${ik}$, 1_${ik}$ ) ) if( s==czero ) then v( 1_${ik}$ ) = czero v( 2_${ik}$ ) = czero v( 3_${ik}$ ) = czero else h21s = h( 2_${ik}$, 1_${ik}$ ) / s h31s = h( 3_${ik}$, 1_${ik}$ ) / s v( 1_${ik}$ ) = ( h( 1_${ik}$, 1_${ik}$ )-s1 )*( ( h( 1_${ik}$, 1_${ik}$ )-s2 ) / s ) +h( 1_${ik}$, 2_${ik}$ )*h21s + h( 1_${ik}$, 3_${ik}$ )& *h31s v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-s1-s2 ) + h( 2_${ik}$, 3_${ik}$ )*h31s v( 3_${ik}$ ) = h31s*( h( 1_${ik}$, 1_${ik}$ )+h( 3_${ik}$, 3_${ik}$ )-s1-s2 ) + h21s*h( 3_${ik}$, 2_${ik}$ ) end if end if end subroutine stdlib${ii}$_zlaqr1 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$laqr1( n, h, ldh, s1, s2, v ) !! Given a 2-by-2 or 3-by-3 matrix H, ZLAQR1: sets v to a !! scalar multiple of the first column of the product !! (*) K = (H - s1*I)*(H - s2*I) !! scaling to avoid overflows and most underflows. !! This is useful for starting double implicit shift bulges !! in the QR algorithm. ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments complex(${ck}$), intent(in) :: s1, s2 integer(${ik}$), intent(in) :: ldh, n ! Array Arguments complex(${ck}$), intent(in) :: h(ldh,*) complex(${ck}$), intent(out) :: v(*) ! ================================================================ ! Parameters ! Local Scalars complex(${ck}$) :: cdum, h21s, h31s real(${ck}$) :: s ! Intrinsic Functions ! Statement Functions real(${ck}$) :: cabs1 ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=${ck}$) ) + abs( aimag( cdum ) ) ! Executable Statements ! quick return if possible if( n/=2_${ik}$ .and. n/=3_${ik}$ ) then return end if if( n==2_${ik}$ ) then s = cabs1( h( 1_${ik}$, 1_${ik}$ )-s2 ) + cabs1( h( 2_${ik}$, 1_${ik}$ ) ) if( s==zero ) then v( 1_${ik}$ ) = czero v( 2_${ik}$ ) = czero else h21s = h( 2_${ik}$, 1_${ik}$ ) / s v( 1_${ik}$ ) = h21s*h( 1_${ik}$, 2_${ik}$ ) + ( h( 1_${ik}$, 1_${ik}$ )-s1 )*( ( h( 1_${ik}$, 1_${ik}$ )-s2 ) / s ) v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-s1-s2 ) end if else s = cabs1( h( 1_${ik}$, 1_${ik}$ )-s2 ) + cabs1( h( 2_${ik}$, 1_${ik}$ ) ) +cabs1( h( 3_${ik}$, 1_${ik}$ ) ) if( s==czero ) then v( 1_${ik}$ ) = czero v( 2_${ik}$ ) = czero v( 3_${ik}$ ) = czero else h21s = h( 2_${ik}$, 1_${ik}$ ) / s h31s = h( 3_${ik}$, 1_${ik}$ ) / s v( 1_${ik}$ ) = ( h( 1_${ik}$, 1_${ik}$ )-s1 )*( ( h( 1_${ik}$, 1_${ik}$ )-s2 ) / s ) +h( 1_${ik}$, 2_${ik}$ )*h21s + h( 1_${ik}$, 3_${ik}$ )& *h31s v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-s1-s2 ) + h( 2_${ik}$, 3_${ik}$ )*h31s v( 3_${ik}$ ) = h31s*( h( 1_${ik}$, 1_${ik}$ )+h( 3_${ik}$, 3_${ik}$ )-s1-s2 ) + h21s*h( 3_${ik}$, 2_${ik}$ ) end if end if end subroutine stdlib${ii}$_${ci}$laqr1 #:endif #:endfor module subroutine stdlib${ii}$_slaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, ns, nd,& !! SLAQR2 is identical to SLAQR3 except that it avoids !! recursion by calling SLAHQR instead of SLAQR4. !! Aggressive early deflation: !! This subroutine accepts as input an upper Hessenberg matrix !! H and performs an orthogonal similarity transformation !! designed to detect and deflate fully converged eigenvalues from !! a trailing principal submatrix. On output H has been over- !! written by a new Hessenberg matrix that is a perturbation of !! an orthogonal similarity transformation of H. It is to be !! hoped that the final version of H has many zero subdiagonal !! entries. sr, si, v, ldv, nh, t,ldt, nv, wv, ldwv, work, lwork ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& nh, nv, nw integer(${ik}$), intent(out) :: nd, ns logical(lk), intent(in) :: wantt, wantz ! Array Arguments real(sp), intent(inout) :: h(ldh,*), z(ldz,*) real(sp), intent(out) :: si(*), sr(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) ! ================================================================ ! Local Scalars real(sp) :: aa, bb, beta, cc, cs, dd, evi, evk, foo, s, safmax, safmin, smlnum, sn, & tau, ulp integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, k, kcol, kend, kln, krow, kwtop, & ltop, lwk1, lwk2, lwkopt logical(lk) :: bulge, sorted ! Intrinsic Functions ! Executable Statements ! ==== estimate optimal workspace. ==== jw = min( nw, kbot-ktop+1 ) if( jw<=2_${ik}$ ) then lwkopt = 1_${ik}$ else ! ==== workspace query call to stdlib${ii}$_sgehrd ==== call stdlib${ii}$_sgehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info ) lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== workspace query call to stdlib${ii}$_sormhr ==== call stdlib${ii}$_sormhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info ) lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== optimal workspace ==== lwkopt = jw + max( lwk1, lwk2 ) end if ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = real( lwkopt,KIND=sp) return end if ! ==== nothing to do ... ! ... for an empty active block ... ==== ns = 0_${ik}$ nd = 0_${ik}$ work( 1_${ik}$ ) = one if( ktop>kbot )return ! ... nor for an empty deflation window. ==== if( nw<1 )return ! ==== machine constants ==== safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_slabad( safmin, safmax ) ulp = stdlib${ii}$_slamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=sp) / ulp ) ! ==== setup deflation window ==== jw = min( nw, kbot-ktop+1 ) kwtop = kbot - jw + 1_${ik}$ if( kwtop==ktop ) then s = zero else s = h( kwtop, kwtop-1 ) end if if( kbot==kwtop ) then ! ==== 1-by-1 deflation window: not much to do ==== sr( kwtop ) = h( kwtop, kwtop ) si( kwtop ) = zero ns = 1_${ik}$ nd = 0_${ik}$ if( abs( s )<=max( smlnum, ulp*abs( h( kwtop, kwtop ) ) ) )then ns = 0_${ik}$ nd = 1_${ik}$ if( kwtop>ktop )h( kwtop, kwtop-1 ) = zero end if work( 1_${ik}$ ) = one return end if ! ==== convert to spike-triangular form. (in case of a ! . rare qr failure, this routine continues to do ! . aggressive early deflation using that part of ! . the deflation window that converged using infqr ! . here and there to keep track.) ==== call stdlib${ii}$_slacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) call stdlib${ii}$_scopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 ) call stdlib${ii}$_slaset( 'A', jw, jw, zero, one, v, ldv ) call stdlib${ii}$_slahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr( kwtop ),si( kwtop ), 1_${ik}$, jw, & v, ldv, infqr ) ! ==== stdlib${ii}$_strexc needs a clean margin near the diagonal ==== do j = 1, jw - 3 t( j+2, j ) = zero t( j+3, j ) = zero end do if( jw>2_${ik}$ )t( jw, jw-2 ) = zero ! ==== deflation detection loop ==== ns = jw ilst = infqr + 1_${ik}$ 20 continue if( ilst<=ns ) then if( ns==1_${ik}$ ) then bulge = .false. else bulge = t( ns, ns-1 )/=zero end if ! ==== small spike tip test for deflation ==== if( .not.bulge ) then ! ==== real eigenvalue ==== foo = abs( t( ns, ns ) ) if( foo==zero )foo = abs( s ) if( abs( s*v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) ) then ! ==== deflatable ==== ns = ns - 1_${ik}$ else ! ==== undeflatable. move it up out of the way. ! . (stdlib${ii}$_strexc can not fail in this case.) ==== ifst = ns call stdlib${ii}$_strexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) ilst = ilst + 1_${ik}$ end if else ! ==== complex conjugate pair ==== foo = abs( t( ns, ns ) ) + sqrt( abs( t( ns, ns-1 ) ) )*sqrt( abs( t( ns-1, ns ) & ) ) if( foo==zero )foo = abs( s ) if( max( abs( s*v( 1_${ik}$, ns ) ), abs( s*v( 1_${ik}$, ns-1 ) ) )<=max( smlnum, ulp*foo ) ) & then ! ==== deflatable ==== ns = ns - 2_${ik}$ else ! ==== undeflatable. move them up out of the way. ! . fortunately, stdlib${ii}$_strexc does the right thing with ! . ilst in case of a rare exchange failure. ==== ifst = ns call stdlib${ii}$_strexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) ilst = ilst + 2_${ik}$ end if end if ! ==== end deflation detection loop ==== go to 20 end if ! ==== return to hessenberg form ==== if( ns==0_${ik}$ )s = zero if( ns<jw ) then ! ==== sorting diagonal blocks of t improves accuracy for ! . graded matrices. bubble sort deals well with ! . exchange failures. ==== sorted = .false. i = ns + 1_${ik}$ 30 continue if( sorted )go to 50 sorted = .true. kend = i - 1_${ik}$ i = infqr + 1_${ik}$ if( i==ns ) then k = i + 1_${ik}$ else if( t( i+1, i )==zero ) then k = i + 1_${ik}$ else k = i + 2_${ik}$ end if 40 continue if( k<=kend ) then if( k==i+1 ) then evi = abs( t( i, i ) ) else evi = abs( t( i, i ) ) + sqrt( abs( t( i+1, i ) ) )*sqrt( abs( t( i, i+1 ) ) ) end if if( k==kend ) then evk = abs( t( k, k ) ) else if( t( k+1, k )==zero ) then evk = abs( t( k, k ) ) else evk = abs( t( k, k ) ) + sqrt( abs( t( k+1, k ) ) )*sqrt( abs( t( k, k+1 ) ) ) end if if( evi>=evk ) then i = k else sorted = .false. ifst = i ilst = k call stdlib${ii}$_strexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) if( info==0_${ik}$ ) then i = ilst else i = k end if end if if( i==kend ) then k = i + 1_${ik}$ else if( t( i+1, i )==zero ) then k = i + 1_${ik}$ else k = i + 2_${ik}$ end if go to 40 end if go to 30 50 continue end if ! ==== restore shift/eigenvalue array from t ==== i = jw 60 continue if( i>=infqr+1 ) then if( i==infqr+1 ) then sr( kwtop+i-1 ) = t( i, i ) si( kwtop+i-1 ) = zero i = i - 1_${ik}$ else if( t( i, i-1 )==zero ) then sr( kwtop+i-1 ) = t( i, i ) si( kwtop+i-1 ) = zero i = i - 1_${ik}$ else aa = t( i-1, i-1 ) cc = t( i, i-1 ) bb = t( i-1, i ) dd = t( i, i ) call stdlib${ii}$_slanv2( aa, bb, cc, dd, sr( kwtop+i-2 ),si( kwtop+i-2 ), sr( kwtop+i-& 1_${ik}$ ),si( kwtop+i-1 ), cs, sn ) i = i - 2_${ik}$ end if go to 60 end if if( ns<jw .or. s==zero ) then if( ns>1_${ik}$ .and. s/=zero ) then ! ==== reflect spike back into lower triangle ==== call stdlib${ii}$_scopy( ns, v, ldv, work, 1_${ik}$ ) beta = work( 1_${ik}$ ) call stdlib${ii}$_slarfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau ) work( 1_${ik}$ ) = one call stdlib${ii}$_slaset( 'L', jw-2, jw-2, zero, zero, t( 3_${ik}$, 1_${ik}$ ), ldt ) call stdlib${ii}$_slarf( 'L', ns, jw, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) ) call stdlib${ii}$_slarf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) ) call stdlib${ii}$_slarf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) ) call stdlib${ii}$_sgehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) end if ! ==== copy updated reduced window into place ==== if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*v( 1_${ik}$, 1_${ik}$ ) call stdlib${ii}$_slacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) call stdlib${ii}$_scopy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) ! ==== accumulate orthogonal matrix in order update ! . h and z, if requested. ==== if( ns>1_${ik}$ .and. s/=zero )call stdlib${ii}$_sormhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, & v, ldv,work( jw+1 ), lwork-jw, info ) ! ==== update vertical slab in h ==== if( wantt ) then ltop = 1_${ik}$ else ltop = ktop end if do krow = ltop, kwtop - 1, nv kln = min( nv, kwtop-krow ) call stdlib${ii}$_sgemm( 'N', 'N', kln, jw, jw, one, h( krow, kwtop ),ldh, v, ldv, & zero, wv, ldwv ) call stdlib${ii}$_slacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) end do ! ==== update horizontal slab in h ==== if( wantt ) then do kcol = kbot + 1, n, nh kln = min( nh, n-kcol+1 ) call stdlib${ii}$_sgemm( 'C', 'N', jw, kln, jw, one, v, ldv,h( kwtop, kcol ), ldh, & zero, t, ldt ) call stdlib${ii}$_slacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) end do end if ! ==== update vertical slab in z ==== if( wantz ) then do krow = iloz, ihiz, nv kln = min( nv, ihiz-krow+1 ) call stdlib${ii}$_sgemm( 'N', 'N', kln, jw, jw, one, z( krow, kwtop ),ldz, v, ldv, & zero, wv, ldwv ) call stdlib${ii}$_slacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) end do end if end if ! ==== return the number of deflations ... ==== nd = jw - ns ! ==== ... and the number of shifts. (subtracting ! . infqr from the spike length takes care ! . of the case of a rare qr failure while ! . calculating eigenvalues of the deflation ! . window.) ==== ns = ns - infqr ! ==== return optimal workspace. ==== work( 1_${ik}$ ) = real( lwkopt,KIND=sp) end subroutine stdlib${ii}$_slaqr2 module subroutine stdlib${ii}$_dlaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, ns, nd,& !! DLAQR2 is identical to DLAQR3 except that it avoids !! recursion by calling DLAHQR instead of DLAQR4. !! Aggressive early deflation: !! This subroutine accepts as input an upper Hessenberg matrix !! H and performs an orthogonal similarity transformation !! designed to detect and deflate fully converged eigenvalues from !! a trailing principal submatrix. On output H has been over- !! written by a new Hessenberg matrix that is a perturbation of !! an orthogonal similarity transformation of H. It is to be !! hoped that the final version of H has many zero subdiagonal !! entries. sr, si, v, ldv, nh, t,ldt, nv, wv, ldwv, work, lwork ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& nh, nv, nw integer(${ik}$), intent(out) :: nd, ns logical(lk), intent(in) :: wantt, wantz ! Array Arguments real(dp), intent(inout) :: h(ldh,*), z(ldz,*) real(dp), intent(out) :: si(*), sr(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) ! ================================================================ ! Local Scalars real(dp) :: aa, bb, beta, cc, cs, dd, evi, evk, foo, s, safmax, safmin, smlnum, sn, & tau, ulp integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, k, kcol, kend, kln, krow, kwtop, & ltop, lwk1, lwk2, lwkopt logical(lk) :: bulge, sorted ! Intrinsic Functions ! Executable Statements ! ==== estimate optimal workspace. ==== jw = min( nw, kbot-ktop+1 ) if( jw<=2_${ik}$ ) then lwkopt = 1_${ik}$ else ! ==== workspace query call to stdlib${ii}$_dgehrd ==== call stdlib${ii}$_dgehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info ) lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== workspace query call to stdlib${ii}$_dormhr ==== call stdlib${ii}$_dormhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info ) lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== optimal workspace ==== lwkopt = jw + max( lwk1, lwk2 ) end if ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = real( lwkopt,KIND=dp) return end if ! ==== nothing to do ... ! ... for an empty active block ... ==== ns = 0_${ik}$ nd = 0_${ik}$ work( 1_${ik}$ ) = one if( ktop>kbot )return ! ... nor for an empty deflation window. ==== if( nw<1 )return ! ==== machine constants ==== safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_dlabad( safmin, safmax ) ulp = stdlib${ii}$_dlamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=dp) / ulp ) ! ==== setup deflation window ==== jw = min( nw, kbot-ktop+1 ) kwtop = kbot - jw + 1_${ik}$ if( kwtop==ktop ) then s = zero else s = h( kwtop, kwtop-1 ) end if if( kbot==kwtop ) then ! ==== 1-by-1 deflation window: not much to do ==== sr( kwtop ) = h( kwtop, kwtop ) si( kwtop ) = zero ns = 1_${ik}$ nd = 0_${ik}$ if( abs( s )<=max( smlnum, ulp*abs( h( kwtop, kwtop ) ) ) )then ns = 0_${ik}$ nd = 1_${ik}$ if( kwtop>ktop )h( kwtop, kwtop-1 ) = zero end if work( 1_${ik}$ ) = one return end if ! ==== convert to spike-triangular form. (in case of a ! . rare qr failure, this routine continues to do ! . aggressive early deflation using that part of ! . the deflation window that converged using infqr ! . here and there to keep track.) ==== call stdlib${ii}$_dlacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) call stdlib${ii}$_dcopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 ) call stdlib${ii}$_dlaset( 'A', jw, jw, zero, one, v, ldv ) call stdlib${ii}$_dlahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr( kwtop ),si( kwtop ), 1_${ik}$, jw, & v, ldv, infqr ) ! ==== stdlib${ii}$_dtrexc needs a clean margin near the diagonal ==== do j = 1, jw - 3 t( j+2, j ) = zero t( j+3, j ) = zero end do if( jw>2_${ik}$ )t( jw, jw-2 ) = zero ! ==== deflation detection loop ==== ns = jw ilst = infqr + 1_${ik}$ 20 continue if( ilst<=ns ) then if( ns==1_${ik}$ ) then bulge = .false. else bulge = t( ns, ns-1 )/=zero end if ! ==== small spike tip test for deflation ==== if( .not.bulge ) then ! ==== real eigenvalue ==== foo = abs( t( ns, ns ) ) if( foo==zero )foo = abs( s ) if( abs( s*v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) ) then ! ==== deflatable ==== ns = ns - 1_${ik}$ else ! ==== undeflatable. move it up out of the way. ! . (stdlib${ii}$_dtrexc can not fail in this case.) ==== ifst = ns call stdlib${ii}$_dtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) ilst = ilst + 1_${ik}$ end if else ! ==== complex conjugate pair ==== foo = abs( t( ns, ns ) ) + sqrt( abs( t( ns, ns-1 ) ) )*sqrt( abs( t( ns-1, ns ) & ) ) if( foo==zero )foo = abs( s ) if( max( abs( s*v( 1_${ik}$, ns ) ), abs( s*v( 1_${ik}$, ns-1 ) ) )<=max( smlnum, ulp*foo ) ) & then ! ==== deflatable ==== ns = ns - 2_${ik}$ else ! ==== undeflatable. move them up out of the way. ! . fortunately, stdlib${ii}$_dtrexc does the right thing with ! . ilst in case of a rare exchange failure. ==== ifst = ns call stdlib${ii}$_dtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) ilst = ilst + 2_${ik}$ end if end if ! ==== end deflation detection loop ==== go to 20 end if ! ==== return to hessenberg form ==== if( ns==0_${ik}$ )s = zero if( ns<jw ) then ! ==== sorting diagonal blocks of t improves accuracy for ! . graded matrices. bubble sort deals well with ! . exchange failures. ==== sorted = .false. i = ns + 1_${ik}$ 30 continue if( sorted )go to 50 sorted = .true. kend = i - 1_${ik}$ i = infqr + 1_${ik}$ if( i==ns ) then k = i + 1_${ik}$ else if( t( i+1, i )==zero ) then k = i + 1_${ik}$ else k = i + 2_${ik}$ end if 40 continue if( k<=kend ) then if( k==i+1 ) then evi = abs( t( i, i ) ) else evi = abs( t( i, i ) ) + sqrt( abs( t( i+1, i ) ) )*sqrt( abs( t( i, i+1 ) ) ) end if if( k==kend ) then evk = abs( t( k, k ) ) else if( t( k+1, k )==zero ) then evk = abs( t( k, k ) ) else evk = abs( t( k, k ) ) + sqrt( abs( t( k+1, k ) ) )*sqrt( abs( t( k, k+1 ) ) ) end if if( evi>=evk ) then i = k else sorted = .false. ifst = i ilst = k call stdlib${ii}$_dtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) if( info==0_${ik}$ ) then i = ilst else i = k end if end if if( i==kend ) then k = i + 1_${ik}$ else if( t( i+1, i )==zero ) then k = i + 1_${ik}$ else k = i + 2_${ik}$ end if go to 40 end if go to 30 50 continue end if ! ==== restore shift/eigenvalue array from t ==== i = jw 60 continue if( i>=infqr+1 ) then if( i==infqr+1 ) then sr( kwtop+i-1 ) = t( i, i ) si( kwtop+i-1 ) = zero i = i - 1_${ik}$ else if( t( i, i-1 )==zero ) then sr( kwtop+i-1 ) = t( i, i ) si( kwtop+i-1 ) = zero i = i - 1_${ik}$ else aa = t( i-1, i-1 ) cc = t( i, i-1 ) bb = t( i-1, i ) dd = t( i, i ) call stdlib${ii}$_dlanv2( aa, bb, cc, dd, sr( kwtop+i-2 ),si( kwtop+i-2 ), sr( kwtop+i-& 1_${ik}$ ),si( kwtop+i-1 ), cs, sn ) i = i - 2_${ik}$ end if go to 60 end if if( ns<jw .or. s==zero ) then if( ns>1_${ik}$ .and. s/=zero ) then ! ==== reflect spike back into lower triangle ==== call stdlib${ii}$_dcopy( ns, v, ldv, work, 1_${ik}$ ) beta = work( 1_${ik}$ ) call stdlib${ii}$_dlarfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau ) work( 1_${ik}$ ) = one call stdlib${ii}$_dlaset( 'L', jw-2, jw-2, zero, zero, t( 3_${ik}$, 1_${ik}$ ), ldt ) call stdlib${ii}$_dlarf( 'L', ns, jw, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) ) call stdlib${ii}$_dlarf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) ) call stdlib${ii}$_dlarf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) ) call stdlib${ii}$_dgehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) end if ! ==== copy updated reduced window into place ==== if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*v( 1_${ik}$, 1_${ik}$ ) call stdlib${ii}$_dlacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) call stdlib${ii}$_dcopy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) ! ==== accumulate orthogonal matrix in order update ! . h and z, if requested. ==== if( ns>1_${ik}$ .and. s/=zero )call stdlib${ii}$_dormhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, & v, ldv,work( jw+1 ), lwork-jw, info ) ! ==== update vertical slab in h ==== if( wantt ) then ltop = 1_${ik}$ else ltop = ktop end if do krow = ltop, kwtop - 1, nv kln = min( nv, kwtop-krow ) call stdlib${ii}$_dgemm( 'N', 'N', kln, jw, jw, one, h( krow, kwtop ),ldh, v, ldv, & zero, wv, ldwv ) call stdlib${ii}$_dlacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) end do ! ==== update horizontal slab in h ==== if( wantt ) then do kcol = kbot + 1, n, nh kln = min( nh, n-kcol+1 ) call stdlib${ii}$_dgemm( 'C', 'N', jw, kln, jw, one, v, ldv,h( kwtop, kcol ), ldh, & zero, t, ldt ) call stdlib${ii}$_dlacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) end do end if ! ==== update vertical slab in z ==== if( wantz ) then do krow = iloz, ihiz, nv kln = min( nv, ihiz-krow+1 ) call stdlib${ii}$_dgemm( 'N', 'N', kln, jw, jw, one, z( krow, kwtop ),ldz, v, ldv, & zero, wv, ldwv ) call stdlib${ii}$_dlacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) end do end if end if ! ==== return the number of deflations ... ==== nd = jw - ns ! ==== ... and the number of shifts. (subtracting ! . infqr from the spike length takes care ! . of the case of a rare qr failure while ! . calculating eigenvalues of the deflation ! . window.) ==== ns = ns - infqr ! ==== return optimal workspace. ==== work( 1_${ik}$ ) = real( lwkopt,KIND=dp) end subroutine stdlib${ii}$_dlaqr2 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] module subroutine stdlib${ii}$_${ri}$laqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, ns, nd,& !! DLAQR2: is identical to DLAQR3 except that it avoids !! recursion by calling DLAHQR instead of DLAQR4. !! Aggressive early deflation: !! This subroutine accepts as input an upper Hessenberg matrix !! H and performs an orthogonal similarity transformation !! designed to detect and deflate fully converged eigenvalues from !! a trailing principal submatrix. On output H has been over- !! written by a new Hessenberg matrix that is a perturbation of !! an orthogonal similarity transformation of H. It is to be !! hoped that the final version of H has many zero subdiagonal !! entries. sr, si, v, ldv, nh, t,ldt, nv, wv, ldwv, work, lwork ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& nh, nv, nw integer(${ik}$), intent(out) :: nd, ns logical(lk), intent(in) :: wantt, wantz ! Array Arguments real(${rk}$), intent(inout) :: h(ldh,*), z(ldz,*) real(${rk}$), intent(out) :: si(*), sr(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) ! ================================================================ ! Local Scalars real(${rk}$) :: aa, bb, beta, cc, cs, dd, evi, evk, foo, s, safmax, safmin, smlnum, sn, & tau, ulp integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, k, kcol, kend, kln, krow, kwtop, & ltop, lwk1, lwk2, lwkopt logical(lk) :: bulge, sorted ! Intrinsic Functions ! Executable Statements ! ==== estimate optimal workspace. ==== jw = min( nw, kbot-ktop+1 ) if( jw<=2_${ik}$ ) then lwkopt = 1_${ik}$ else ! ==== workspace query call to stdlib${ii}$_${ri}$gehrd ==== call stdlib${ii}$_${ri}$gehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info ) lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== workspace query call to stdlib${ii}$_${ri}$ormhr ==== call stdlib${ii}$_${ri}$ormhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info ) lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== optimal workspace ==== lwkopt = jw + max( lwk1, lwk2 ) end if ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$) return end if ! ==== nothing to do ... ! ... for an empty active block ... ==== ns = 0_${ik}$ nd = 0_${ik}$ work( 1_${ik}$ ) = one if( ktop>kbot )return ! ... nor for an empty deflation window. ==== if( nw<1 )return ! ==== machine constants ==== safmin = stdlib${ii}$_${ri}$lamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_${ri}$labad( safmin, safmax ) ulp = stdlib${ii}$_${ri}$lamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=${rk}$) / ulp ) ! ==== setup deflation window ==== jw = min( nw, kbot-ktop+1 ) kwtop = kbot - jw + 1_${ik}$ if( kwtop==ktop ) then s = zero else s = h( kwtop, kwtop-1 ) end if if( kbot==kwtop ) then ! ==== 1-by-1 deflation window: not much to do ==== sr( kwtop ) = h( kwtop, kwtop ) si( kwtop ) = zero ns = 1_${ik}$ nd = 0_${ik}$ if( abs( s )<=max( smlnum, ulp*abs( h( kwtop, kwtop ) ) ) )then ns = 0_${ik}$ nd = 1_${ik}$ if( kwtop>ktop )h( kwtop, kwtop-1 ) = zero end if work( 1_${ik}$ ) = one return end if ! ==== convert to spike-triangular form. (in case of a ! . rare qr failure, this routine continues to do ! . aggressive early deflation using that part of ! . the deflation window that converged using infqr ! . here and there to keep track.) ==== call stdlib${ii}$_${ri}$lacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) call stdlib${ii}$_${ri}$copy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 ) call stdlib${ii}$_${ri}$laset( 'A', jw, jw, zero, one, v, ldv ) call stdlib${ii}$_${ri}$lahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr( kwtop ),si( kwtop ), 1_${ik}$, jw, & v, ldv, infqr ) ! ==== stdlib${ii}$_${ri}$trexc needs a clean margin near the diagonal ==== do j = 1, jw - 3 t( j+2, j ) = zero t( j+3, j ) = zero end do if( jw>2_${ik}$ )t( jw, jw-2 ) = zero ! ==== deflation detection loop ==== ns = jw ilst = infqr + 1_${ik}$ 20 continue if( ilst<=ns ) then if( ns==1_${ik}$ ) then bulge = .false. else bulge = t( ns, ns-1 )/=zero end if ! ==== small spike tip test for deflation ==== if( .not.bulge ) then ! ==== real eigenvalue ==== foo = abs( t( ns, ns ) ) if( foo==zero )foo = abs( s ) if( abs( s*v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) ) then ! ==== deflatable ==== ns = ns - 1_${ik}$ else ! ==== undeflatable. move it up out of the way. ! . (stdlib${ii}$_${ri}$trexc can not fail in this case.) ==== ifst = ns call stdlib${ii}$_${ri}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) ilst = ilst + 1_${ik}$ end if else ! ==== complex conjugate pair ==== foo = abs( t( ns, ns ) ) + sqrt( abs( t( ns, ns-1 ) ) )*sqrt( abs( t( ns-1, ns ) & ) ) if( foo==zero )foo = abs( s ) if( max( abs( s*v( 1_${ik}$, ns ) ), abs( s*v( 1_${ik}$, ns-1 ) ) )<=max( smlnum, ulp*foo ) ) & then ! ==== deflatable ==== ns = ns - 2_${ik}$ else ! ==== undeflatable. move them up out of the way. ! . fortunately, stdlib${ii}$_${ri}$trexc does the right thing with ! . ilst in case of a rare exchange failure. ==== ifst = ns call stdlib${ii}$_${ri}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) ilst = ilst + 2_${ik}$ end if end if ! ==== end deflation detection loop ==== go to 20 end if ! ==== return to hessenberg form ==== if( ns==0_${ik}$ )s = zero if( ns<jw ) then ! ==== sorting diagonal blocks of t improves accuracy for ! . graded matrices. bubble sort deals well with ! . exchange failures. ==== sorted = .false. i = ns + 1_${ik}$ 30 continue if( sorted )go to 50 sorted = .true. kend = i - 1_${ik}$ i = infqr + 1_${ik}$ if( i==ns ) then k = i + 1_${ik}$ else if( t( i+1, i )==zero ) then k = i + 1_${ik}$ else k = i + 2_${ik}$ end if 40 continue if( k<=kend ) then if( k==i+1 ) then evi = abs( t( i, i ) ) else evi = abs( t( i, i ) ) + sqrt( abs( t( i+1, i ) ) )*sqrt( abs( t( i, i+1 ) ) ) end if if( k==kend ) then evk = abs( t( k, k ) ) else if( t( k+1, k )==zero ) then evk = abs( t( k, k ) ) else evk = abs( t( k, k ) ) + sqrt( abs( t( k+1, k ) ) )*sqrt( abs( t( k, k+1 ) ) ) end if if( evi>=evk ) then i = k else sorted = .false. ifst = i ilst = k call stdlib${ii}$_${ri}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) if( info==0_${ik}$ ) then i = ilst else i = k end if end if if( i==kend ) then k = i + 1_${ik}$ else if( t( i+1, i )==zero ) then k = i + 1_${ik}$ else k = i + 2_${ik}$ end if go to 40 end if go to 30 50 continue end if ! ==== restore shift/eigenvalue array from t ==== i = jw 60 continue if( i>=infqr+1 ) then if( i==infqr+1 ) then sr( kwtop+i-1 ) = t( i, i ) si( kwtop+i-1 ) = zero i = i - 1_${ik}$ else if( t( i, i-1 )==zero ) then sr( kwtop+i-1 ) = t( i, i ) si( kwtop+i-1 ) = zero i = i - 1_${ik}$ else aa = t( i-1, i-1 ) cc = t( i, i-1 ) bb = t( i-1, i ) dd = t( i, i ) call stdlib${ii}$_${ri}$lanv2( aa, bb, cc, dd, sr( kwtop+i-2 ),si( kwtop+i-2 ), sr( kwtop+i-& 1_${ik}$ ),si( kwtop+i-1 ), cs, sn ) i = i - 2_${ik}$ end if go to 60 end if if( ns<jw .or. s==zero ) then if( ns>1_${ik}$ .and. s/=zero ) then ! ==== reflect spike back into lower triangle ==== call stdlib${ii}$_${ri}$copy( ns, v, ldv, work, 1_${ik}$ ) beta = work( 1_${ik}$ ) call stdlib${ii}$_${ri}$larfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau ) work( 1_${ik}$ ) = one call stdlib${ii}$_${ri}$laset( 'L', jw-2, jw-2, zero, zero, t( 3_${ik}$, 1_${ik}$ ), ldt ) call stdlib${ii}$_${ri}$larf( 'L', ns, jw, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) ) call stdlib${ii}$_${ri}$larf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) ) call stdlib${ii}$_${ri}$larf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) ) call stdlib${ii}$_${ri}$gehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) end if ! ==== copy updated reduced window into place ==== if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*v( 1_${ik}$, 1_${ik}$ ) call stdlib${ii}$_${ri}$lacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) call stdlib${ii}$_${ri}$copy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) ! ==== accumulate orthogonal matrix in order update ! . h and z, if requested. ==== if( ns>1_${ik}$ .and. s/=zero )call stdlib${ii}$_${ri}$ormhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, & v, ldv,work( jw+1 ), lwork-jw, info ) ! ==== update vertical slab in h ==== if( wantt ) then ltop = 1_${ik}$ else ltop = ktop end if do krow = ltop, kwtop - 1, nv kln = min( nv, kwtop-krow ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', kln, jw, jw, one, h( krow, kwtop ),ldh, v, ldv, & zero, wv, ldwv ) call stdlib${ii}$_${ri}$lacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) end do ! ==== update horizontal slab in h ==== if( wantt ) then do kcol = kbot + 1, n, nh kln = min( nh, n-kcol+1 ) call stdlib${ii}$_${ri}$gemm( 'C', 'N', jw, kln, jw, one, v, ldv,h( kwtop, kcol ), ldh, & zero, t, ldt ) call stdlib${ii}$_${ri}$lacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) end do end if ! ==== update vertical slab in z ==== if( wantz ) then do krow = iloz, ihiz, nv kln = min( nv, ihiz-krow+1 ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', kln, jw, jw, one, z( krow, kwtop ),ldz, v, ldv, & zero, wv, ldwv ) call stdlib${ii}$_${ri}$lacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) end do end if end if ! ==== return the number of deflations ... ==== nd = jw - ns ! ==== ... and the number of shifts. (subtracting ! . infqr from the spike length takes care ! . of the case of a rare qr failure while ! . calculating eigenvalues of the deflation ! . window.) ==== ns = ns - infqr ! ==== return optimal workspace. ==== work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$) end subroutine stdlib${ii}$_${ri}$laqr2 #:endif #:endfor pure module subroutine stdlib${ii}$_claqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & !! CLAQR2 is identical to CLAQR3 except that it avoids !! recursion by calling CLAHQR instead of CLAQR4. !! Aggressive early deflation: !! This subroutine accepts as input an upper Hessenberg matrix !! H and performs an unitary similarity transformation !! designed to detect and deflate fully converged eigenvalues from !! a trailing principal submatrix. On output H has been over- !! written by a new Hessenberg matrix that is a perturbation of !! an unitary similarity transformation of H. It is to be !! hoped that the final version of H has many zero subdiagonal !! entries. ns, nd, sh, v, ldv, nh, t, ldt,nv, wv, ldwv, work, lwork ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& nh, nv, nw integer(${ik}$), intent(out) :: nd, ns logical(lk), intent(in) :: wantt, wantz ! Array Arguments complex(sp), intent(inout) :: h(ldh,*), z(ldz,*) complex(sp), intent(out) :: sh(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) ! ================================================================ ! Parameters ! Local Scalars complex(sp) :: beta, cdum, s, tau real(sp) :: foo, safmax, safmin, smlnum, ulp integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, kcol, kln, knt, krow, kwtop, ltop, & lwk1, lwk2, lwkopt ! Intrinsic Functions ! Statement Functions real(sp) :: cabs1 ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) ! Executable Statements ! ==== estimate optimal workspace. ==== jw = min( nw, kbot-ktop+1 ) if( jw<=2_${ik}$ ) then lwkopt = 1_${ik}$ else ! ==== workspace query call to stdlib${ii}$_cgehrd ==== call stdlib${ii}$_cgehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info ) lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== workspace query call to stdlib${ii}$_cunmhr ==== call stdlib${ii}$_cunmhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info ) lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== optimal workspace ==== lwkopt = jw + max( lwk1, lwk2 ) end if ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=sp) return end if ! ==== nothing to do ... ! ... for an empty active block ... ==== ns = 0_${ik}$ nd = 0_${ik}$ work( 1_${ik}$ ) = cone if( ktop>kbot )return ! ... nor for an empty deflation window. ==== if( nw<1 )return ! ==== machine constants ==== safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_slabad( safmin, safmax ) ulp = stdlib${ii}$_slamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=sp) / ulp ) ! ==== setup deflation window ==== jw = min( nw, kbot-ktop+1 ) kwtop = kbot - jw + 1_${ik}$ if( kwtop==ktop ) then s = czero else s = h( kwtop, kwtop-1 ) end if if( kbot==kwtop ) then ! ==== 1-by-1 deflation window: not much to do ==== sh( kwtop ) = h( kwtop, kwtop ) ns = 1_${ik}$ nd = 0_${ik}$ if( cabs1( s )<=max( smlnum, ulp*cabs1( h( kwtop,kwtop ) ) ) ) then ns = 0_${ik}$ nd = 1_${ik}$ if( kwtop>ktop )h( kwtop, kwtop-1 ) = czero end if work( 1_${ik}$ ) = cone return end if ! ==== convert to spike-triangular form. (in case of a ! . rare qr failure, this routine continues to do ! . aggressive early deflation using that part of ! . the deflation window that converged using infqr ! . here and there to keep track.) ==== call stdlib${ii}$_clacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) call stdlib${ii}$_ccopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 ) call stdlib${ii}$_claset( 'A', jw, jw, czero, cone, v, ldv ) call stdlib${ii}$_clahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh( kwtop ), 1_${ik}$,jw, v, ldv, & infqr ) ! ==== deflation detection loop ==== ns = jw ilst = infqr + 1_${ik}$ do knt = infqr + 1, jw ! ==== small spike tip deflation test ==== foo = cabs1( t( ns, ns ) ) if( foo==zero )foo = cabs1( s ) if( cabs1( s )*cabs1( v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) )then ! ==== cone more converged eigenvalue ==== ns = ns - 1_${ik}$ else ! ==== cone undeflatable eigenvalue. move it up out of the ! . way. (stdlib${ii}$_ctrexc can not fail in this case.) ==== ifst = ns call stdlib${ii}$_ctrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) ilst = ilst + 1_${ik}$ end if end do ! ==== return to hessenberg form ==== if( ns==0_${ik}$ )s = czero if( ns<jw ) then ! ==== sorting the diagonal of t improves accuracy for ! . graded matrices. ==== do i = infqr + 1, ns ifst = i do j = i + 1, ns if( cabs1( t( j, j ) )>cabs1( t( ifst, ifst ) ) )ifst = j end do ilst = i if( ifst/=ilst )call stdlib${ii}$_ctrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) end do end if ! ==== restore shift/eigenvalue array from t ==== do i = infqr + 1, jw sh( kwtop+i-1 ) = t( i, i ) end do if( ns<jw .or. s==czero ) then if( ns>1_${ik}$ .and. s/=czero ) then ! ==== reflect spike back into lower triangle ==== call stdlib${ii}$_ccopy( ns, v, ldv, work, 1_${ik}$ ) do i = 1, ns work( i ) = conjg( work( i ) ) end do beta = work( 1_${ik}$ ) call stdlib${ii}$_clarfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau ) work( 1_${ik}$ ) = cone call stdlib${ii}$_claset( 'L', jw-2, jw-2, czero, czero, t( 3_${ik}$, 1_${ik}$ ), ldt ) call stdlib${ii}$_clarf( 'L', ns, jw, work, 1_${ik}$, conjg( tau ), t, ldt,work( jw+1 ) ) call stdlib${ii}$_clarf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) ) call stdlib${ii}$_clarf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) ) call stdlib${ii}$_cgehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) end if ! ==== copy updated reduced window into place ==== if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*conjg( v( 1_${ik}$, 1_${ik}$ ) ) call stdlib${ii}$_clacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) call stdlib${ii}$_ccopy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) ! ==== accumulate orthogonal matrix in order update ! . h and z, if requested. ==== if( ns>1_${ik}$ .and. s/=czero )call stdlib${ii}$_cunmhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, & v, ldv,work( jw+1 ), lwork-jw, info ) ! ==== update vertical slab in h ==== if( wantt ) then ltop = 1_${ik}$ else ltop = ktop end if do krow = ltop, kwtop - 1, nv kln = min( nv, kwtop-krow ) call stdlib${ii}$_cgemm( 'N', 'N', kln, jw, jw, cone, h( krow, kwtop ),ldh, v, ldv, & czero, wv, ldwv ) call stdlib${ii}$_clacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) end do ! ==== update horizontal slab in h ==== if( wantt ) then do kcol = kbot + 1, n, nh kln = min( nh, n-kcol+1 ) call stdlib${ii}$_cgemm( 'C', 'N', jw, kln, jw, cone, v, ldv,h( kwtop, kcol ), ldh, & czero, t, ldt ) call stdlib${ii}$_clacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) end do end if ! ==== update vertical slab in z ==== if( wantz ) then do krow = iloz, ihiz, nv kln = min( nv, ihiz-krow+1 ) call stdlib${ii}$_cgemm( 'N', 'N', kln, jw, jw, cone, z( krow, kwtop ),ldz, v, ldv, & czero, wv, ldwv ) call stdlib${ii}$_clacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) end do end if end if ! ==== return the number of deflations ... ==== nd = jw - ns ! ==== ... and the number of shifts. (subtracting ! . infqr from the spike length takes care ! . of the case of a rare qr failure while ! . calculating eigenvalues of the deflation ! . window.) ==== ns = ns - infqr ! ==== return optimal workspace. ==== work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=sp) end subroutine stdlib${ii}$_claqr2 pure module subroutine stdlib${ii}$_zlaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & !! ZLAQR2 is identical to ZLAQR3 except that it avoids !! recursion by calling ZLAHQR instead of ZLAQR4. !! Aggressive early deflation: !! ZLAQR2 accepts as input an upper Hessenberg matrix !! H and performs an unitary similarity transformation !! designed to detect and deflate fully converged eigenvalues from !! a trailing principal submatrix. On output H has been over- !! written by a new Hessenberg matrix that is a perturbation of !! an unitary similarity transformation of H. It is to be !! hoped that the final version of H has many zero subdiagonal !! entries. ns, nd, sh, v, ldv, nh, t, ldt,nv, wv, ldwv, work, lwork ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& nh, nv, nw integer(${ik}$), intent(out) :: nd, ns logical(lk), intent(in) :: wantt, wantz ! Array Arguments complex(dp), intent(inout) :: h(ldh,*), z(ldz,*) complex(dp), intent(out) :: sh(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) ! ================================================================ ! Parameters ! Local Scalars complex(dp) :: beta, cdum, s, tau real(dp) :: foo, safmax, safmin, smlnum, ulp integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, kcol, kln, knt, krow, kwtop, ltop, & lwk1, lwk2, lwkopt ! Intrinsic Functions ! Statement Functions real(dp) :: cabs1 ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) ! Executable Statements ! ==== estimate optimal workspace. ==== jw = min( nw, kbot-ktop+1 ) if( jw<=2_${ik}$ ) then lwkopt = 1_${ik}$ else ! ==== workspace query call to stdlib${ii}$_zgehrd ==== call stdlib${ii}$_zgehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info ) lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== workspace query call to stdlib${ii}$_zunmhr ==== call stdlib${ii}$_zunmhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info ) lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== optimal workspace ==== lwkopt = jw + max( lwk1, lwk2 ) end if ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=dp) return end if ! ==== nothing to do ... ! ... for an empty active block ... ==== ns = 0_${ik}$ nd = 0_${ik}$ work( 1_${ik}$ ) = cone if( ktop>kbot )return ! ... nor for an empty deflation window. ==== if( nw<1 )return ! ==== machine constants ==== safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_dlabad( safmin, safmax ) ulp = stdlib${ii}$_dlamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=dp) / ulp ) ! ==== setup deflation window ==== jw = min( nw, kbot-ktop+1 ) kwtop = kbot - jw + 1_${ik}$ if( kwtop==ktop ) then s = czero else s = h( kwtop, kwtop-1 ) end if if( kbot==kwtop ) then ! ==== 1-by-1 deflation window: not much to do ==== sh( kwtop ) = h( kwtop, kwtop ) ns = 1_${ik}$ nd = 0_${ik}$ if( cabs1( s )<=max( smlnum, ulp*cabs1( h( kwtop,kwtop ) ) ) ) then ns = 0_${ik}$ nd = 1_${ik}$ if( kwtop>ktop )h( kwtop, kwtop-1 ) = czero end if work( 1_${ik}$ ) = cone return end if ! ==== convert to spike-triangular form. (in case of a ! . rare qr failure, this routine continues to do ! . aggressive early deflation using that part of ! . the deflation window that converged using infqr ! . here and there to keep track.) ==== call stdlib${ii}$_zlacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) call stdlib${ii}$_zcopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 ) call stdlib${ii}$_zlaset( 'A', jw, jw, czero, cone, v, ldv ) call stdlib${ii}$_zlahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh( kwtop ), 1_${ik}$,jw, v, ldv, & infqr ) ! ==== deflation detection loop ==== ns = jw ilst = infqr + 1_${ik}$ do knt = infqr + 1, jw ! ==== small spike tip deflation test ==== foo = cabs1( t( ns, ns ) ) if( foo==zero )foo = cabs1( s ) if( cabs1( s )*cabs1( v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) )then ! ==== cone more converged eigenvalue ==== ns = ns - 1_${ik}$ else ! ==== cone undeflatable eigenvalue. move it up out of the ! . way. (stdlib${ii}$_ztrexc can not fail in this case.) ==== ifst = ns call stdlib${ii}$_ztrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) ilst = ilst + 1_${ik}$ end if end do ! ==== return to hessenberg form ==== if( ns==0_${ik}$ )s = czero if( ns<jw ) then ! ==== sorting the diagonal of t improves accuracy for ! . graded matrices. ==== do i = infqr + 1, ns ifst = i do j = i + 1, ns if( cabs1( t( j, j ) )>cabs1( t( ifst, ifst ) ) )ifst = j end do ilst = i if( ifst/=ilst )call stdlib${ii}$_ztrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) end do end if ! ==== restore shift/eigenvalue array from t ==== do i = infqr + 1, jw sh( kwtop+i-1 ) = t( i, i ) end do if( ns<jw .or. s==czero ) then if( ns>1_${ik}$ .and. s/=czero ) then ! ==== reflect spike back into lower triangle ==== call stdlib${ii}$_zcopy( ns, v, ldv, work, 1_${ik}$ ) do i = 1, ns work( i ) = conjg( work( i ) ) end do beta = work( 1_${ik}$ ) call stdlib${ii}$_zlarfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau ) work( 1_${ik}$ ) = cone call stdlib${ii}$_zlaset( 'L', jw-2, jw-2, czero, czero, t( 3_${ik}$, 1_${ik}$ ), ldt ) call stdlib${ii}$_zlarf( 'L', ns, jw, work, 1_${ik}$, conjg( tau ), t, ldt,work( jw+1 ) ) call stdlib${ii}$_zlarf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) ) call stdlib${ii}$_zlarf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) ) call stdlib${ii}$_zgehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) end if ! ==== copy updated reduced window into place ==== if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*conjg( v( 1_${ik}$, 1_${ik}$ ) ) call stdlib${ii}$_zlacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) call stdlib${ii}$_zcopy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) ! ==== accumulate orthogonal matrix in order update ! . h and z, if requested. ==== if( ns>1_${ik}$ .and. s/=czero )call stdlib${ii}$_zunmhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, & v, ldv,work( jw+1 ), lwork-jw, info ) ! ==== update vertical slab in h ==== if( wantt ) then ltop = 1_${ik}$ else ltop = ktop end if do krow = ltop, kwtop - 1, nv kln = min( nv, kwtop-krow ) call stdlib${ii}$_zgemm( 'N', 'N', kln, jw, jw, cone, h( krow, kwtop ),ldh, v, ldv, & czero, wv, ldwv ) call stdlib${ii}$_zlacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) end do ! ==== update horizontal slab in h ==== if( wantt ) then do kcol = kbot + 1, n, nh kln = min( nh, n-kcol+1 ) call stdlib${ii}$_zgemm( 'C', 'N', jw, kln, jw, cone, v, ldv,h( kwtop, kcol ), ldh, & czero, t, ldt ) call stdlib${ii}$_zlacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) end do end if ! ==== update vertical slab in z ==== if( wantz ) then do krow = iloz, ihiz, nv kln = min( nv, ihiz-krow+1 ) call stdlib${ii}$_zgemm( 'N', 'N', kln, jw, jw, cone, z( krow, kwtop ),ldz, v, ldv, & czero, wv, ldwv ) call stdlib${ii}$_zlacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) end do end if end if ! ==== return the number of deflations ... ==== nd = jw - ns ! ==== ... and the number of shifts. (subtracting ! . infqr from the spike length takes care ! . of the case of a rare qr failure while ! . calculating eigenvalues of the deflation ! . window.) ==== ns = ns - infqr ! ==== return optimal workspace. ==== work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=dp) end subroutine stdlib${ii}$_zlaqr2 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$laqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & !! ZLAQR2: is identical to ZLAQR3 except that it avoids !! recursion by calling ZLAHQR instead of ZLAQR4. !! Aggressive early deflation: !! ZLAQR2 accepts as input an upper Hessenberg matrix !! H and performs an unitary similarity transformation !! designed to detect and deflate fully converged eigenvalues from !! a trailing principal submatrix. On output H has been over- !! written by a new Hessenberg matrix that is a perturbation of !! an unitary similarity transformation of H. It is to be !! hoped that the final version of H has many zero subdiagonal !! entries. ns, nd, sh, v, ldv, nh, t, ldt,nv, wv, ldwv, work, lwork ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& nh, nv, nw integer(${ik}$), intent(out) :: nd, ns logical(lk), intent(in) :: wantt, wantz ! Array Arguments complex(${ck}$), intent(inout) :: h(ldh,*), z(ldz,*) complex(${ck}$), intent(out) :: sh(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) ! ================================================================ ! Parameters ! Local Scalars complex(${ck}$) :: beta, cdum, s, tau real(${ck}$) :: foo, safmax, safmin, smlnum, ulp integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, kcol, kln, knt, krow, kwtop, ltop, & lwk1, lwk2, lwkopt ! Intrinsic Functions ! Statement Functions real(${ck}$) :: cabs1 ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=${ck}$) ) + abs( aimag( cdum ) ) ! Executable Statements ! ==== estimate optimal workspace. ==== jw = min( nw, kbot-ktop+1 ) if( jw<=2_${ik}$ ) then lwkopt = 1_${ik}$ else ! ==== workspace query call to stdlib${ii}$_${ci}$gehrd ==== call stdlib${ii}$_${ci}$gehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info ) lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== workspace query call to stdlib${ii}$_${ci}$unmhr ==== call stdlib${ii}$_${ci}$unmhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info ) lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== optimal workspace ==== lwkopt = jw + max( lwk1, lwk2 ) end if ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=${ck}$) return end if ! ==== nothing to do ... ! ... for an empty active block ... ==== ns = 0_${ik}$ nd = 0_${ik}$ work( 1_${ik}$ ) = cone if( ktop>kbot )return ! ... nor for an empty deflation window. ==== if( nw<1 )return ! ==== machine constants ==== safmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_${c2ri(ci)}$labad( safmin, safmax ) ulp = stdlib${ii}$_${c2ri(ci)}$lamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=${ck}$) / ulp ) ! ==== setup deflation window ==== jw = min( nw, kbot-ktop+1 ) kwtop = kbot - jw + 1_${ik}$ if( kwtop==ktop ) then s = czero else s = h( kwtop, kwtop-1 ) end if if( kbot==kwtop ) then ! ==== 1-by-1 deflation window: not much to do ==== sh( kwtop ) = h( kwtop, kwtop ) ns = 1_${ik}$ nd = 0_${ik}$ if( cabs1( s )<=max( smlnum, ulp*cabs1( h( kwtop,kwtop ) ) ) ) then ns = 0_${ik}$ nd = 1_${ik}$ if( kwtop>ktop )h( kwtop, kwtop-1 ) = czero end if work( 1_${ik}$ ) = cone return end if ! ==== convert to spike-triangular form. (in case of a ! . rare qr failure, this routine continues to do ! . aggressive early deflation using that part of ! . the deflation window that converged using infqr ! . here and there to keep track.) ==== call stdlib${ii}$_${ci}$lacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) call stdlib${ii}$_${ci}$copy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 ) call stdlib${ii}$_${ci}$laset( 'A', jw, jw, czero, cone, v, ldv ) call stdlib${ii}$_${ci}$lahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh( kwtop ), 1_${ik}$,jw, v, ldv, & infqr ) ! ==== deflation detection loop ==== ns = jw ilst = infqr + 1_${ik}$ do knt = infqr + 1, jw ! ==== small spike tip deflation test ==== foo = cabs1( t( ns, ns ) ) if( foo==zero )foo = cabs1( s ) if( cabs1( s )*cabs1( v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) )then ! ==== cone more converged eigenvalue ==== ns = ns - 1_${ik}$ else ! ==== cone undeflatable eigenvalue. move it up out of the ! . way. (stdlib${ii}$_${ci}$trexc can not fail in this case.) ==== ifst = ns call stdlib${ii}$_${ci}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) ilst = ilst + 1_${ik}$ end if end do ! ==== return to hessenberg form ==== if( ns==0_${ik}$ )s = czero if( ns<jw ) then ! ==== sorting the diagonal of t improves accuracy for ! . graded matrices. ==== do i = infqr + 1, ns ifst = i do j = i + 1, ns if( cabs1( t( j, j ) )>cabs1( t( ifst, ifst ) ) )ifst = j end do ilst = i if( ifst/=ilst )call stdlib${ii}$_${ci}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) end do end if ! ==== restore shift/eigenvalue array from t ==== do i = infqr + 1, jw sh( kwtop+i-1 ) = t( i, i ) end do if( ns<jw .or. s==czero ) then if( ns>1_${ik}$ .and. s/=czero ) then ! ==== reflect spike back into lower triangle ==== call stdlib${ii}$_${ci}$copy( ns, v, ldv, work, 1_${ik}$ ) do i = 1, ns work( i ) = conjg( work( i ) ) end do beta = work( 1_${ik}$ ) call stdlib${ii}$_${ci}$larfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau ) work( 1_${ik}$ ) = cone call stdlib${ii}$_${ci}$laset( 'L', jw-2, jw-2, czero, czero, t( 3_${ik}$, 1_${ik}$ ), ldt ) call stdlib${ii}$_${ci}$larf( 'L', ns, jw, work, 1_${ik}$, conjg( tau ), t, ldt,work( jw+1 ) ) call stdlib${ii}$_${ci}$larf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) ) call stdlib${ii}$_${ci}$larf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) ) call stdlib${ii}$_${ci}$gehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) end if ! ==== copy updated reduced window into place ==== if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*conjg( v( 1_${ik}$, 1_${ik}$ ) ) call stdlib${ii}$_${ci}$lacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) call stdlib${ii}$_${ci}$copy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) ! ==== accumulate orthogonal matrix in order update ! . h and z, if requested. ==== if( ns>1_${ik}$ .and. s/=czero )call stdlib${ii}$_${ci}$unmhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, & v, ldv,work( jw+1 ), lwork-jw, info ) ! ==== update vertical slab in h ==== if( wantt ) then ltop = 1_${ik}$ else ltop = ktop end if do krow = ltop, kwtop - 1, nv kln = min( nv, kwtop-krow ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', kln, jw, jw, cone, h( krow, kwtop ),ldh, v, ldv, & czero, wv, ldwv ) call stdlib${ii}$_${ci}$lacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) end do ! ==== update horizontal slab in h ==== if( wantt ) then do kcol = kbot + 1, n, nh kln = min( nh, n-kcol+1 ) call stdlib${ii}$_${ci}$gemm( 'C', 'N', jw, kln, jw, cone, v, ldv,h( kwtop, kcol ), ldh, & czero, t, ldt ) call stdlib${ii}$_${ci}$lacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) end do end if ! ==== update vertical slab in z ==== if( wantz ) then do krow = iloz, ihiz, nv kln = min( nv, ihiz-krow+1 ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', kln, jw, jw, cone, z( krow, kwtop ),ldz, v, ldv, & czero, wv, ldwv ) call stdlib${ii}$_${ci}$lacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) end do end if end if ! ==== return the number of deflations ... ==== nd = jw - ns ! ==== ... and the number of shifts. (subtracting ! . infqr from the spike length takes care ! . of the case of a rare qr failure while ! . calculating eigenvalues of the deflation ! . window.) ==== ns = ns - infqr ! ==== return optimal workspace. ==== work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=${ck}$) end subroutine stdlib${ii}$_${ci}$laqr2 #:endif #:endfor module subroutine stdlib${ii}$_slaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, ns, nd,& !! Aggressive early deflation: !! SLAQR3 accepts as input an upper Hessenberg matrix !! H and performs an orthogonal similarity transformation !! designed to detect and deflate fully converged eigenvalues from !! a trailing principal submatrix. On output H has been over- !! written by a new Hessenberg matrix that is a perturbation of !! an orthogonal similarity transformation of H. It is to be !! hoped that the final version of H has many zero subdiagonal !! entries. sr, si, v, ldv, nh, t,ldt, nv, wv, ldwv, work, lwork ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& nh, nv, nw integer(${ik}$), intent(out) :: nd, ns logical(lk), intent(in) :: wantt, wantz ! Array Arguments real(sp), intent(inout) :: h(ldh,*), z(ldz,*) real(sp), intent(out) :: si(*), sr(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) ! ================================================================ ! Local Scalars real(sp) :: aa, bb, beta, cc, cs, dd, evi, evk, foo, s, safmax, safmin, smlnum, sn, & tau, ulp integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, k, kcol, kend, kln, krow, kwtop, & ltop, lwk1, lwk2, lwk3, lwkopt, nmin logical(lk) :: bulge, sorted ! Intrinsic Functions ! Executable Statements ! ==== estimate optimal workspace. ==== jw = min( nw, kbot-ktop+1 ) if( jw<=2_${ik}$ ) then lwkopt = 1_${ik}$ else ! ==== workspace query call to stdlib${ii}$_sgehrd ==== call stdlib${ii}$_sgehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info ) lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== workspace query call to stdlib${ii}$_sormhr ==== call stdlib${ii}$_sormhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info ) lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== workspace query call to stdlib${ii}$_slaqr4 ==== call stdlib${ii}$_slaqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr, si, 1_${ik}$, jw,v, ldv, work, -& 1_${ik}$, infqr ) lwk3 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== optimal workspace ==== lwkopt = max( jw+max( lwk1, lwk2 ), lwk3 ) end if ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = real( lwkopt,KIND=sp) return end if ! ==== nothing to do ... ! ... for an empty active block ... ==== ns = 0_${ik}$ nd = 0_${ik}$ work( 1_${ik}$ ) = one if( ktop>kbot )return ! ... nor for an empty deflation window. ==== if( nw<1 )return ! ==== machine constants ==== safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_slabad( safmin, safmax ) ulp = stdlib${ii}$_slamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=sp) / ulp ) ! ==== setup deflation window ==== jw = min( nw, kbot-ktop+1 ) kwtop = kbot - jw + 1_${ik}$ if( kwtop==ktop ) then s = zero else s = h( kwtop, kwtop-1 ) end if if( kbot==kwtop ) then ! ==== 1-by-1 deflation window: not much to do ==== sr( kwtop ) = h( kwtop, kwtop ) si( kwtop ) = zero ns = 1_${ik}$ nd = 0_${ik}$ if( abs( s )<=max( smlnum, ulp*abs( h( kwtop, kwtop ) ) ) )then ns = 0_${ik}$ nd = 1_${ik}$ if( kwtop>ktop )h( kwtop, kwtop-1 ) = zero end if work( 1_${ik}$ ) = one return end if ! ==== convert to spike-triangular form. (in case of a ! . rare qr failure, this routine continues to do ! . aggressive early deflation using that part of ! . the deflation window that converged using infqr ! . here and there to keep track.) ==== call stdlib${ii}$_slacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) call stdlib${ii}$_scopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 ) call stdlib${ii}$_slaset( 'A', jw, jw, zero, one, v, ldv ) nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'SLAQR3', 'SV', jw, 1_${ik}$, jw, lwork ) if( jw>nmin ) then call stdlib${ii}$_slaqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr( kwtop ),si( kwtop ), 1_${ik}$, & jw, v, ldv, work, lwork, infqr ) else call stdlib${ii}$_slahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr( kwtop ),si( kwtop ), 1_${ik}$, & jw, v, ldv, infqr ) end if ! ==== stdlib${ii}$_strexc needs a clean margin near the diagonal ==== do j = 1, jw - 3 t( j+2, j ) = zero t( j+3, j ) = zero end do if( jw>2_${ik}$ )t( jw, jw-2 ) = zero ! ==== deflation detection loop ==== ns = jw ilst = infqr + 1_${ik}$ 20 continue if( ilst<=ns ) then if( ns==1_${ik}$ ) then bulge = .false. else bulge = t( ns, ns-1 )/=zero end if ! ==== small spike tip test for deflation ==== if( .not. bulge ) then ! ==== real eigenvalue ==== foo = abs( t( ns, ns ) ) if( foo==zero )foo = abs( s ) if( abs( s*v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) ) then ! ==== deflatable ==== ns = ns - 1_${ik}$ else ! ==== undeflatable. move it up out of the way. ! . (stdlib${ii}$_strexc can not fail in this case.) ==== ifst = ns call stdlib${ii}$_strexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) ilst = ilst + 1_${ik}$ end if else ! ==== complex conjugate pair ==== foo = abs( t( ns, ns ) ) + sqrt( abs( t( ns, ns-1 ) ) )*sqrt( abs( t( ns-1, ns ) & ) ) if( foo==zero )foo = abs( s ) if( max( abs( s*v( 1_${ik}$, ns ) ), abs( s*v( 1_${ik}$, ns-1 ) ) )<=max( smlnum, ulp*foo ) ) & then ! ==== deflatable ==== ns = ns - 2_${ik}$ else ! ==== undeflatable. move them up out of the way. ! . fortunately, stdlib${ii}$_strexc does the right thing with ! . ilst in case of a rare exchange failure. ==== ifst = ns call stdlib${ii}$_strexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) ilst = ilst + 2_${ik}$ end if end if ! ==== end deflation detection loop ==== go to 20 end if ! ==== return to hessenberg form ==== if( ns==0_${ik}$ )s = zero if( ns<jw ) then ! ==== sorting diagonal blocks of t improves accuracy for ! . graded matrices. bubble sort deals well with ! . exchange failures. ==== sorted = .false. i = ns + 1_${ik}$ 30 continue if( sorted )go to 50 sorted = .true. kend = i - 1_${ik}$ i = infqr + 1_${ik}$ if( i==ns ) then k = i + 1_${ik}$ else if( t( i+1, i )==zero ) then k = i + 1_${ik}$ else k = i + 2_${ik}$ end if 40 continue if( k<=kend ) then if( k==i+1 ) then evi = abs( t( i, i ) ) else evi = abs( t( i, i ) ) + sqrt( abs( t( i+1, i ) ) )*sqrt( abs( t( i, i+1 ) ) ) end if if( k==kend ) then evk = abs( t( k, k ) ) else if( t( k+1, k )==zero ) then evk = abs( t( k, k ) ) else evk = abs( t( k, k ) ) + sqrt( abs( t( k+1, k ) ) )*sqrt( abs( t( k, k+1 ) ) ) end if if( evi>=evk ) then i = k else sorted = .false. ifst = i ilst = k call stdlib${ii}$_strexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) if( info==0_${ik}$ ) then i = ilst else i = k end if end if if( i==kend ) then k = i + 1_${ik}$ else if( t( i+1, i )==zero ) then k = i + 1_${ik}$ else k = i + 2_${ik}$ end if go to 40 end if go to 30 50 continue end if ! ==== restore shift/eigenvalue array from t ==== i = jw 60 continue if( i>=infqr+1 ) then if( i==infqr+1 ) then sr( kwtop+i-1 ) = t( i, i ) si( kwtop+i-1 ) = zero i = i - 1_${ik}$ else if( t( i, i-1 )==zero ) then sr( kwtop+i-1 ) = t( i, i ) si( kwtop+i-1 ) = zero i = i - 1_${ik}$ else aa = t( i-1, i-1 ) cc = t( i, i-1 ) bb = t( i-1, i ) dd = t( i, i ) call stdlib${ii}$_slanv2( aa, bb, cc, dd, sr( kwtop+i-2 ),si( kwtop+i-2 ), sr( kwtop+i-& 1_${ik}$ ),si( kwtop+i-1 ), cs, sn ) i = i - 2_${ik}$ end if go to 60 end if if( ns<jw .or. s==zero ) then if( ns>1_${ik}$ .and. s/=zero ) then ! ==== reflect spike back into lower triangle ==== call stdlib${ii}$_scopy( ns, v, ldv, work, 1_${ik}$ ) beta = work( 1_${ik}$ ) call stdlib${ii}$_slarfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau ) work( 1_${ik}$ ) = one call stdlib${ii}$_slaset( 'L', jw-2, jw-2, zero, zero, t( 3_${ik}$, 1_${ik}$ ), ldt ) call stdlib${ii}$_slarf( 'L', ns, jw, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) ) call stdlib${ii}$_slarf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) ) call stdlib${ii}$_slarf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) ) call stdlib${ii}$_sgehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) end if ! ==== copy updated reduced window into place ==== if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*v( 1_${ik}$, 1_${ik}$ ) call stdlib${ii}$_slacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) call stdlib${ii}$_scopy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) ! ==== accumulate orthogonal matrix in order update ! . h and z, if requested. ==== if( ns>1_${ik}$ .and. s/=zero )call stdlib${ii}$_sormhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, & v, ldv,work( jw+1 ), lwork-jw, info ) ! ==== update vertical slab in h ==== if( wantt ) then ltop = 1_${ik}$ else ltop = ktop end if do krow = ltop, kwtop - 1, nv kln = min( nv, kwtop-krow ) call stdlib${ii}$_sgemm( 'N', 'N', kln, jw, jw, one, h( krow, kwtop ),ldh, v, ldv, & zero, wv, ldwv ) call stdlib${ii}$_slacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) end do ! ==== update horizontal slab in h ==== if( wantt ) then do kcol = kbot + 1, n, nh kln = min( nh, n-kcol+1 ) call stdlib${ii}$_sgemm( 'C', 'N', jw, kln, jw, one, v, ldv,h( kwtop, kcol ), ldh, & zero, t, ldt ) call stdlib${ii}$_slacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) end do end if ! ==== update vertical slab in z ==== if( wantz ) then do krow = iloz, ihiz, nv kln = min( nv, ihiz-krow+1 ) call stdlib${ii}$_sgemm( 'N', 'N', kln, jw, jw, one, z( krow, kwtop ),ldz, v, ldv, & zero, wv, ldwv ) call stdlib${ii}$_slacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) end do end if end if ! ==== return the number of deflations ... ==== nd = jw - ns ! ==== ... and the number of shifts. (subtracting ! . infqr from the spike length takes care ! . of the case of a rare qr failure while ! . calculating eigenvalues of the deflation ! . window.) ==== ns = ns - infqr ! ==== return optimal workspace. ==== work( 1_${ik}$ ) = real( lwkopt,KIND=sp) end subroutine stdlib${ii}$_slaqr3 module subroutine stdlib${ii}$_dlaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, ns, nd,& !! Aggressive early deflation: !! DLAQR3 accepts as input an upper Hessenberg matrix !! H and performs an orthogonal similarity transformation !! designed to detect and deflate fully converged eigenvalues from !! a trailing principal submatrix. On output H has been over- !! written by a new Hessenberg matrix that is a perturbation of !! an orthogonal similarity transformation of H. It is to be !! hoped that the final version of H has many zero subdiagonal !! entries. sr, si, v, ldv, nh, t,ldt, nv, wv, ldwv, work, lwork ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& nh, nv, nw integer(${ik}$), intent(out) :: nd, ns logical(lk), intent(in) :: wantt, wantz ! Array Arguments real(dp), intent(inout) :: h(ldh,*), z(ldz,*) real(dp), intent(out) :: si(*), sr(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) ! ================================================================ ! Local Scalars real(dp) :: aa, bb, beta, cc, cs, dd, evi, evk, foo, s, safmax, safmin, smlnum, sn, & tau, ulp integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, k, kcol, kend, kln, krow, kwtop, & ltop, lwk1, lwk2, lwk3, lwkopt, nmin logical(lk) :: bulge, sorted ! Intrinsic Functions ! Executable Statements ! ==== estimate optimal workspace. ==== jw = min( nw, kbot-ktop+1 ) if( jw<=2_${ik}$ ) then lwkopt = 1_${ik}$ else ! ==== workspace query call to stdlib${ii}$_dgehrd ==== call stdlib${ii}$_dgehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info ) lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== workspace query call to stdlib${ii}$_dormhr ==== call stdlib${ii}$_dormhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info ) lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== workspace query call to stdlib${ii}$_dlaqr4 ==== call stdlib${ii}$_dlaqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr, si, 1_${ik}$, jw,v, ldv, work, -& 1_${ik}$, infqr ) lwk3 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== optimal workspace ==== lwkopt = max( jw+max( lwk1, lwk2 ), lwk3 ) end if ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = real( lwkopt,KIND=dp) return end if ! ==== nothing to do ... ! ... for an empty active block ... ==== ns = 0_${ik}$ nd = 0_${ik}$ work( 1_${ik}$ ) = one if( ktop>kbot )return ! ... nor for an empty deflation window. ==== if( nw<1 )return ! ==== machine constants ==== safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_dlabad( safmin, safmax ) ulp = stdlib${ii}$_dlamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=dp) / ulp ) ! ==== setup deflation window ==== jw = min( nw, kbot-ktop+1 ) kwtop = kbot - jw + 1_${ik}$ if( kwtop==ktop ) then s = zero else s = h( kwtop, kwtop-1 ) end if if( kbot==kwtop ) then ! ==== 1-by-1 deflation window: not much to do ==== sr( kwtop ) = h( kwtop, kwtop ) si( kwtop ) = zero ns = 1_${ik}$ nd = 0_${ik}$ if( abs( s )<=max( smlnum, ulp*abs( h( kwtop, kwtop ) ) ) )then ns = 0_${ik}$ nd = 1_${ik}$ if( kwtop>ktop )h( kwtop, kwtop-1 ) = zero end if work( 1_${ik}$ ) = one return end if ! ==== convert to spike-triangular form. (in case of a ! . rare qr failure, this routine continues to do ! . aggressive early deflation using that part of ! . the deflation window that converged using infqr ! . here and there to keep track.) ==== call stdlib${ii}$_dlacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) call stdlib${ii}$_dcopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 ) call stdlib${ii}$_dlaset( 'A', jw, jw, zero, one, v, ldv ) nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'DLAQR3', 'SV', jw, 1_${ik}$, jw, lwork ) if( jw>nmin ) then call stdlib${ii}$_dlaqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr( kwtop ),si( kwtop ), 1_${ik}$, & jw, v, ldv, work, lwork, infqr ) else call stdlib${ii}$_dlahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr( kwtop ),si( kwtop ), 1_${ik}$, & jw, v, ldv, infqr ) end if ! ==== stdlib${ii}$_dtrexc needs a clean margin near the diagonal ==== do j = 1, jw - 3 t( j+2, j ) = zero t( j+3, j ) = zero end do if( jw>2_${ik}$ )t( jw, jw-2 ) = zero ! ==== deflation detection loop ==== ns = jw ilst = infqr + 1_${ik}$ 20 continue if( ilst<=ns ) then if( ns==1_${ik}$ ) then bulge = .false. else bulge = t( ns, ns-1 )/=zero end if ! ==== small spike tip test for deflation ==== if( .not. bulge ) then ! ==== real eigenvalue ==== foo = abs( t( ns, ns ) ) if( foo==zero )foo = abs( s ) if( abs( s*v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) ) then ! ==== deflatable ==== ns = ns - 1_${ik}$ else ! ==== undeflatable. move it up out of the way. ! . (stdlib${ii}$_dtrexc can not fail in this case.) ==== ifst = ns call stdlib${ii}$_dtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) ilst = ilst + 1_${ik}$ end if else ! ==== complex conjugate pair ==== foo = abs( t( ns, ns ) ) + sqrt( abs( t( ns, ns-1 ) ) )*sqrt( abs( t( ns-1, ns ) & ) ) if( foo==zero )foo = abs( s ) if( max( abs( s*v( 1_${ik}$, ns ) ), abs( s*v( 1_${ik}$, ns-1 ) ) )<=max( smlnum, ulp*foo ) ) & then ! ==== deflatable ==== ns = ns - 2_${ik}$ else ! ==== undeflatable. move them up out of the way. ! . fortunately, stdlib${ii}$_dtrexc does the right thing with ! . ilst in case of a rare exchange failure. ==== ifst = ns call stdlib${ii}$_dtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) ilst = ilst + 2_${ik}$ end if end if ! ==== end deflation detection loop ==== go to 20 end if ! ==== return to hessenberg form ==== if( ns==0_${ik}$ )s = zero if( ns<jw ) then ! ==== sorting diagonal blocks of t improves accuracy for ! . graded matrices. bubble sort deals well with ! . exchange failures. ==== sorted = .false. i = ns + 1_${ik}$ 30 continue if( sorted )go to 50 sorted = .true. kend = i - 1_${ik}$ i = infqr + 1_${ik}$ if( i==ns ) then k = i + 1_${ik}$ else if( t( i+1, i )==zero ) then k = i + 1_${ik}$ else k = i + 2_${ik}$ end if 40 continue if( k<=kend ) then if( k==i+1 ) then evi = abs( t( i, i ) ) else evi = abs( t( i, i ) ) + sqrt( abs( t( i+1, i ) ) )*sqrt( abs( t( i, i+1 ) ) ) end if if( k==kend ) then evk = abs( t( k, k ) ) else if( t( k+1, k )==zero ) then evk = abs( t( k, k ) ) else evk = abs( t( k, k ) ) + sqrt( abs( t( k+1, k ) ) )*sqrt( abs( t( k, k+1 ) ) ) end if if( evi>=evk ) then i = k else sorted = .false. ifst = i ilst = k call stdlib${ii}$_dtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) if( info==0_${ik}$ ) then i = ilst else i = k end if end if if( i==kend ) then k = i + 1_${ik}$ else if( t( i+1, i )==zero ) then k = i + 1_${ik}$ else k = i + 2_${ik}$ end if go to 40 end if go to 30 50 continue end if ! ==== restore shift/eigenvalue array from t ==== i = jw 60 continue if( i>=infqr+1 ) then if( i==infqr+1 ) then sr( kwtop+i-1 ) = t( i, i ) si( kwtop+i-1 ) = zero i = i - 1_${ik}$ else if( t( i, i-1 )==zero ) then sr( kwtop+i-1 ) = t( i, i ) si( kwtop+i-1 ) = zero i = i - 1_${ik}$ else aa = t( i-1, i-1 ) cc = t( i, i-1 ) bb = t( i-1, i ) dd = t( i, i ) call stdlib${ii}$_dlanv2( aa, bb, cc, dd, sr( kwtop+i-2 ),si( kwtop+i-2 ), sr( kwtop+i-& 1_${ik}$ ),si( kwtop+i-1 ), cs, sn ) i = i - 2_${ik}$ end if go to 60 end if if( ns<jw .or. s==zero ) then if( ns>1_${ik}$ .and. s/=zero ) then ! ==== reflect spike back into lower triangle ==== call stdlib${ii}$_dcopy( ns, v, ldv, work, 1_${ik}$ ) beta = work( 1_${ik}$ ) call stdlib${ii}$_dlarfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau ) work( 1_${ik}$ ) = one call stdlib${ii}$_dlaset( 'L', jw-2, jw-2, zero, zero, t( 3_${ik}$, 1_${ik}$ ), ldt ) call stdlib${ii}$_dlarf( 'L', ns, jw, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) ) call stdlib${ii}$_dlarf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) ) call stdlib${ii}$_dlarf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) ) call stdlib${ii}$_dgehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) end if ! ==== copy updated reduced window into place ==== if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*v( 1_${ik}$, 1_${ik}$ ) call stdlib${ii}$_dlacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) call stdlib${ii}$_dcopy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) ! ==== accumulate orthogonal matrix in order update ! . h and z, if requested. ==== if( ns>1_${ik}$ .and. s/=zero )call stdlib${ii}$_dormhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, & v, ldv,work( jw+1 ), lwork-jw, info ) ! ==== update vertical slab in h ==== if( wantt ) then ltop = 1_${ik}$ else ltop = ktop end if do krow = ltop, kwtop - 1, nv kln = min( nv, kwtop-krow ) call stdlib${ii}$_dgemm( 'N', 'N', kln, jw, jw, one, h( krow, kwtop ),ldh, v, ldv, & zero, wv, ldwv ) call stdlib${ii}$_dlacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) end do ! ==== update horizontal slab in h ==== if( wantt ) then do kcol = kbot + 1, n, nh kln = min( nh, n-kcol+1 ) call stdlib${ii}$_dgemm( 'C', 'N', jw, kln, jw, one, v, ldv,h( kwtop, kcol ), ldh, & zero, t, ldt ) call stdlib${ii}$_dlacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) end do end if ! ==== update vertical slab in z ==== if( wantz ) then do krow = iloz, ihiz, nv kln = min( nv, ihiz-krow+1 ) call stdlib${ii}$_dgemm( 'N', 'N', kln, jw, jw, one, z( krow, kwtop ),ldz, v, ldv, & zero, wv, ldwv ) call stdlib${ii}$_dlacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) end do end if end if ! ==== return the number of deflations ... ==== nd = jw - ns ! ==== ... and the number of shifts. (subtracting ! . infqr from the spike length takes care ! . of the case of a rare qr failure while ! . calculating eigenvalues of the deflation ! . window.) ==== ns = ns - infqr ! ==== return optimal workspace. ==== work( 1_${ik}$ ) = real( lwkopt,KIND=dp) end subroutine stdlib${ii}$_dlaqr3 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] module subroutine stdlib${ii}$_${ri}$laqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, ns, nd,& !! Aggressive early deflation: !! DLAQR3: accepts as input an upper Hessenberg matrix !! H and performs an orthogonal similarity transformation !! designed to detect and deflate fully converged eigenvalues from !! a trailing principal submatrix. On output H has been over- !! written by a new Hessenberg matrix that is a perturbation of !! an orthogonal similarity transformation of H. It is to be !! hoped that the final version of H has many zero subdiagonal !! entries. sr, si, v, ldv, nh, t,ldt, nv, wv, ldwv, work, lwork ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& nh, nv, nw integer(${ik}$), intent(out) :: nd, ns logical(lk), intent(in) :: wantt, wantz ! Array Arguments real(${rk}$), intent(inout) :: h(ldh,*), z(ldz,*) real(${rk}$), intent(out) :: si(*), sr(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) ! ================================================================ ! Local Scalars real(${rk}$) :: aa, bb, beta, cc, cs, dd, evi, evk, foo, s, safmax, safmin, smlnum, sn, & tau, ulp integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, k, kcol, kend, kln, krow, kwtop, & ltop, lwk1, lwk2, lwk3, lwkopt, nmin logical(lk) :: bulge, sorted ! Intrinsic Functions ! Executable Statements ! ==== estimate optimal workspace. ==== jw = min( nw, kbot-ktop+1 ) if( jw<=2_${ik}$ ) then lwkopt = 1_${ik}$ else ! ==== workspace query call to stdlib${ii}$_${ri}$gehrd ==== call stdlib${ii}$_${ri}$gehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info ) lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== workspace query call to stdlib${ii}$_${ri}$ormhr ==== call stdlib${ii}$_${ri}$ormhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info ) lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== workspace query call to stdlib${ii}$_${ri}$laqr4 ==== call stdlib${ii}$_${ri}$laqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr, si, 1_${ik}$, jw,v, ldv, work, -& 1_${ik}$, infqr ) lwk3 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== optimal workspace ==== lwkopt = max( jw+max( lwk1, lwk2 ), lwk3 ) end if ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$) return end if ! ==== nothing to do ... ! ... for an empty active block ... ==== ns = 0_${ik}$ nd = 0_${ik}$ work( 1_${ik}$ ) = one if( ktop>kbot )return ! ... nor for an empty deflation window. ==== if( nw<1 )return ! ==== machine constants ==== safmin = stdlib${ii}$_${ri}$lamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_${ri}$labad( safmin, safmax ) ulp = stdlib${ii}$_${ri}$lamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=${rk}$) / ulp ) ! ==== setup deflation window ==== jw = min( nw, kbot-ktop+1 ) kwtop = kbot - jw + 1_${ik}$ if( kwtop==ktop ) then s = zero else s = h( kwtop, kwtop-1 ) end if if( kbot==kwtop ) then ! ==== 1-by-1 deflation window: not much to do ==== sr( kwtop ) = h( kwtop, kwtop ) si( kwtop ) = zero ns = 1_${ik}$ nd = 0_${ik}$ if( abs( s )<=max( smlnum, ulp*abs( h( kwtop, kwtop ) ) ) )then ns = 0_${ik}$ nd = 1_${ik}$ if( kwtop>ktop )h( kwtop, kwtop-1 ) = zero end if work( 1_${ik}$ ) = one return end if ! ==== convert to spike-triangular form. (in case of a ! . rare qr failure, this routine continues to do ! . aggressive early deflation using that part of ! . the deflation window that converged using infqr ! . here and there to keep track.) ==== call stdlib${ii}$_${ri}$lacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) call stdlib${ii}$_${ri}$copy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 ) call stdlib${ii}$_${ri}$laset( 'A', jw, jw, zero, one, v, ldv ) nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'DLAQR3', 'SV', jw, 1_${ik}$, jw, lwork ) if( jw>nmin ) then call stdlib${ii}$_${ri}$laqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr( kwtop ),si( kwtop ), 1_${ik}$, & jw, v, ldv, work, lwork, infqr ) else call stdlib${ii}$_${ri}$lahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr( kwtop ),si( kwtop ), 1_${ik}$, & jw, v, ldv, infqr ) end if ! ==== stdlib${ii}$_${ri}$trexc needs a clean margin near the diagonal ==== do j = 1, jw - 3 t( j+2, j ) = zero t( j+3, j ) = zero end do if( jw>2_${ik}$ )t( jw, jw-2 ) = zero ! ==== deflation detection loop ==== ns = jw ilst = infqr + 1_${ik}$ 20 continue if( ilst<=ns ) then if( ns==1_${ik}$ ) then bulge = .false. else bulge = t( ns, ns-1 )/=zero end if ! ==== small spike tip test for deflation ==== if( .not. bulge ) then ! ==== real eigenvalue ==== foo = abs( t( ns, ns ) ) if( foo==zero )foo = abs( s ) if( abs( s*v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) ) then ! ==== deflatable ==== ns = ns - 1_${ik}$ else ! ==== undeflatable. move it up out of the way. ! . (stdlib${ii}$_${ri}$trexc can not fail in this case.) ==== ifst = ns call stdlib${ii}$_${ri}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) ilst = ilst + 1_${ik}$ end if else ! ==== complex conjugate pair ==== foo = abs( t( ns, ns ) ) + sqrt( abs( t( ns, ns-1 ) ) )*sqrt( abs( t( ns-1, ns ) & ) ) if( foo==zero )foo = abs( s ) if( max( abs( s*v( 1_${ik}$, ns ) ), abs( s*v( 1_${ik}$, ns-1 ) ) )<=max( smlnum, ulp*foo ) ) & then ! ==== deflatable ==== ns = ns - 2_${ik}$ else ! ==== undeflatable. move them up out of the way. ! . fortunately, stdlib${ii}$_${ri}$trexc does the right thing with ! . ilst in case of a rare exchange failure. ==== ifst = ns call stdlib${ii}$_${ri}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) ilst = ilst + 2_${ik}$ end if end if ! ==== end deflation detection loop ==== go to 20 end if ! ==== return to hessenberg form ==== if( ns==0_${ik}$ )s = zero if( ns<jw ) then ! ==== sorting diagonal blocks of t improves accuracy for ! . graded matrices. bubble sort deals well with ! . exchange failures. ==== sorted = .false. i = ns + 1_${ik}$ 30 continue if( sorted )go to 50 sorted = .true. kend = i - 1_${ik}$ i = infqr + 1_${ik}$ if( i==ns ) then k = i + 1_${ik}$ else if( t( i+1, i )==zero ) then k = i + 1_${ik}$ else k = i + 2_${ik}$ end if 40 continue if( k<=kend ) then if( k==i+1 ) then evi = abs( t( i, i ) ) else evi = abs( t( i, i ) ) + sqrt( abs( t( i+1, i ) ) )*sqrt( abs( t( i, i+1 ) ) ) end if if( k==kend ) then evk = abs( t( k, k ) ) else if( t( k+1, k )==zero ) then evk = abs( t( k, k ) ) else evk = abs( t( k, k ) ) + sqrt( abs( t( k+1, k ) ) )*sqrt( abs( t( k, k+1 ) ) ) end if if( evi>=evk ) then i = k else sorted = .false. ifst = i ilst = k call stdlib${ii}$_${ri}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info ) if( info==0_${ik}$ ) then i = ilst else i = k end if end if if( i==kend ) then k = i + 1_${ik}$ else if( t( i+1, i )==zero ) then k = i + 1_${ik}$ else k = i + 2_${ik}$ end if go to 40 end if go to 30 50 continue end if ! ==== restore shift/eigenvalue array from t ==== i = jw 60 continue if( i>=infqr+1 ) then if( i==infqr+1 ) then sr( kwtop+i-1 ) = t( i, i ) si( kwtop+i-1 ) = zero i = i - 1_${ik}$ else if( t( i, i-1 )==zero ) then sr( kwtop+i-1 ) = t( i, i ) si( kwtop+i-1 ) = zero i = i - 1_${ik}$ else aa = t( i-1, i-1 ) cc = t( i, i-1 ) bb = t( i-1, i ) dd = t( i, i ) call stdlib${ii}$_${ri}$lanv2( aa, bb, cc, dd, sr( kwtop+i-2 ),si( kwtop+i-2 ), sr( kwtop+i-& 1_${ik}$ ),si( kwtop+i-1 ), cs, sn ) i = i - 2_${ik}$ end if go to 60 end if if( ns<jw .or. s==zero ) then if( ns>1_${ik}$ .and. s/=zero ) then ! ==== reflect spike back into lower triangle ==== call stdlib${ii}$_${ri}$copy( ns, v, ldv, work, 1_${ik}$ ) beta = work( 1_${ik}$ ) call stdlib${ii}$_${ri}$larfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau ) work( 1_${ik}$ ) = one call stdlib${ii}$_${ri}$laset( 'L', jw-2, jw-2, zero, zero, t( 3_${ik}$, 1_${ik}$ ), ldt ) call stdlib${ii}$_${ri}$larf( 'L', ns, jw, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) ) call stdlib${ii}$_${ri}$larf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) ) call stdlib${ii}$_${ri}$larf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) ) call stdlib${ii}$_${ri}$gehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) end if ! ==== copy updated reduced window into place ==== if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*v( 1_${ik}$, 1_${ik}$ ) call stdlib${ii}$_${ri}$lacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) call stdlib${ii}$_${ri}$copy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) ! ==== accumulate orthogonal matrix in order update ! . h and z, if requested. ==== if( ns>1_${ik}$ .and. s/=zero )call stdlib${ii}$_${ri}$ormhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, & v, ldv,work( jw+1 ), lwork-jw, info ) ! ==== update vertical slab in h ==== if( wantt ) then ltop = 1_${ik}$ else ltop = ktop end if do krow = ltop, kwtop - 1, nv kln = min( nv, kwtop-krow ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', kln, jw, jw, one, h( krow, kwtop ),ldh, v, ldv, & zero, wv, ldwv ) call stdlib${ii}$_${ri}$lacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) end do ! ==== update horizontal slab in h ==== if( wantt ) then do kcol = kbot + 1, n, nh kln = min( nh, n-kcol+1 ) call stdlib${ii}$_${ri}$gemm( 'C', 'N', jw, kln, jw, one, v, ldv,h( kwtop, kcol ), ldh, & zero, t, ldt ) call stdlib${ii}$_${ri}$lacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) end do end if ! ==== update vertical slab in z ==== if( wantz ) then do krow = iloz, ihiz, nv kln = min( nv, ihiz-krow+1 ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', kln, jw, jw, one, z( krow, kwtop ),ldz, v, ldv, & zero, wv, ldwv ) call stdlib${ii}$_${ri}$lacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) end do end if end if ! ==== return the number of deflations ... ==== nd = jw - ns ! ==== ... and the number of shifts. (subtracting ! . infqr from the spike length takes care ! . of the case of a rare qr failure while ! . calculating eigenvalues of the deflation ! . window.) ==== ns = ns - infqr ! ==== return optimal workspace. ==== work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$) end subroutine stdlib${ii}$_${ri}$laqr3 #:endif #:endfor pure module subroutine stdlib${ii}$_claqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & !! Aggressive early deflation: !! CLAQR3 accepts as input an upper Hessenberg matrix !! H and performs an unitary similarity transformation !! designed to detect and deflate fully converged eigenvalues from !! a trailing principal submatrix. On output H has been over- !! written by a new Hessenberg matrix that is a perturbation of !! an unitary similarity transformation of H. It is to be !! hoped that the final version of H has many zero subdiagonal !! entries. ns, nd, sh, v, ldv, nh, t, ldt,nv, wv, ldwv, work, lwork ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& nh, nv, nw integer(${ik}$), intent(out) :: nd, ns logical(lk), intent(in) :: wantt, wantz ! Array Arguments complex(sp), intent(inout) :: h(ldh,*), z(ldz,*) complex(sp), intent(out) :: sh(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) ! ================================================================ ! Parameters ! Local Scalars complex(sp) :: beta, cdum, s, tau real(sp) :: foo, safmax, safmin, smlnum, ulp integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, kcol, kln, knt, krow, kwtop, ltop, & lwk1, lwk2, lwk3, lwkopt, nmin ! Intrinsic Functions ! Statement Functions real(sp) :: cabs1 ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) ! Executable Statements ! ==== estimate optimal workspace. ==== jw = min( nw, kbot-ktop+1 ) if( jw<=2_${ik}$ ) then lwkopt = 1_${ik}$ else ! ==== workspace query call to stdlib${ii}$_cgehrd ==== call stdlib${ii}$_cgehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info ) lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== workspace query call to stdlib${ii}$_cunmhr ==== call stdlib${ii}$_cunmhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info ) lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== workspace query call to stdlib${ii}$_claqr4 ==== call stdlib${ii}$_claqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh, 1_${ik}$, jw, v,ldv, work, -1_${ik}$, & infqr ) lwk3 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== optimal workspace ==== lwkopt = max( jw+max( lwk1, lwk2 ), lwk3 ) end if ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=sp) return end if ! ==== nothing to do ... ! ... for an empty active block ... ==== ns = 0_${ik}$ nd = 0_${ik}$ work( 1_${ik}$ ) = cone if( ktop>kbot )return ! ... nor for an empty deflation window. ==== if( nw<1 )return ! ==== machine constants ==== safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_slabad( safmin, safmax ) ulp = stdlib${ii}$_slamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=sp) / ulp ) ! ==== setup deflation window ==== jw = min( nw, kbot-ktop+1 ) kwtop = kbot - jw + 1_${ik}$ if( kwtop==ktop ) then s = czero else s = h( kwtop, kwtop-1 ) end if if( kbot==kwtop ) then ! ==== 1-by-1 deflation window: not much to do ==== sh( kwtop ) = h( kwtop, kwtop ) ns = 1_${ik}$ nd = 0_${ik}$ if( cabs1( s )<=max( smlnum, ulp*cabs1( h( kwtop,kwtop ) ) ) ) then ns = 0_${ik}$ nd = 1_${ik}$ if( kwtop>ktop )h( kwtop, kwtop-1 ) = czero end if work( 1_${ik}$ ) = cone return end if ! ==== convert to spike-triangular form. (in case of a ! . rare qr failure, this routine continues to do ! . aggressive early deflation using that part of ! . the deflation window that converged using infqr ! . here and there to keep track.) ==== call stdlib${ii}$_clacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) call stdlib${ii}$_ccopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 ) call stdlib${ii}$_claset( 'A', jw, jw, czero, cone, v, ldv ) nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'CLAQR3', 'SV', jw, 1_${ik}$, jw, lwork ) if( jw>nmin ) then call stdlib${ii}$_claqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh( kwtop ), 1_${ik}$,jw, v, ldv, & work, lwork, infqr ) else call stdlib${ii}$_clahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh( kwtop ), 1_${ik}$,jw, v, ldv, & infqr ) end if ! ==== deflation detection loop ==== ns = jw ilst = infqr + 1_${ik}$ do knt = infqr + 1, jw ! ==== small spike tip deflation test ==== foo = cabs1( t( ns, ns ) ) if( foo==zero )foo = cabs1( s ) if( cabs1( s )*cabs1( v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) )then ! ==== cone more converged eigenvalue ==== ns = ns - 1_${ik}$ else ! ==== cone undeflatable eigenvalue. move it up out of the ! . way. (stdlib${ii}$_ctrexc can not fail in this case.) ==== ifst = ns call stdlib${ii}$_ctrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) ilst = ilst + 1_${ik}$ end if end do ! ==== return to hessenberg form ==== if( ns==0_${ik}$ )s = czero if( ns<jw ) then ! ==== sorting the diagonal of t improves accuracy for ! . graded matrices. ==== do i = infqr + 1, ns ifst = i do j = i + 1, ns if( cabs1( t( j, j ) )>cabs1( t( ifst, ifst ) ) )ifst = j end do ilst = i if( ifst/=ilst )call stdlib${ii}$_ctrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) end do end if ! ==== restore shift/eigenvalue array from t ==== do i = infqr + 1, jw sh( kwtop+i-1 ) = t( i, i ) end do if( ns<jw .or. s==czero ) then if( ns>1_${ik}$ .and. s/=czero ) then ! ==== reflect spike back into lower triangle ==== call stdlib${ii}$_ccopy( ns, v, ldv, work, 1_${ik}$ ) do i = 1, ns work( i ) = conjg( work( i ) ) end do beta = work( 1_${ik}$ ) call stdlib${ii}$_clarfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau ) work( 1_${ik}$ ) = cone call stdlib${ii}$_claset( 'L', jw-2, jw-2, czero, czero, t( 3_${ik}$, 1_${ik}$ ), ldt ) call stdlib${ii}$_clarf( 'L', ns, jw, work, 1_${ik}$, conjg( tau ), t, ldt,work( jw+1 ) ) call stdlib${ii}$_clarf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) ) call stdlib${ii}$_clarf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) ) call stdlib${ii}$_cgehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) end if ! ==== copy updated reduced window into place ==== if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*conjg( v( 1_${ik}$, 1_${ik}$ ) ) call stdlib${ii}$_clacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) call stdlib${ii}$_ccopy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) ! ==== accumulate orthogonal matrix in order update ! . h and z, if requested. ==== if( ns>1_${ik}$ .and. s/=czero )call stdlib${ii}$_cunmhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, & v, ldv,work( jw+1 ), lwork-jw, info ) ! ==== update vertical slab in h ==== if( wantt ) then ltop = 1_${ik}$ else ltop = ktop end if do krow = ltop, kwtop - 1, nv kln = min( nv, kwtop-krow ) call stdlib${ii}$_cgemm( 'N', 'N', kln, jw, jw, cone, h( krow, kwtop ),ldh, v, ldv, & czero, wv, ldwv ) call stdlib${ii}$_clacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) end do ! ==== update horizontal slab in h ==== if( wantt ) then do kcol = kbot + 1, n, nh kln = min( nh, n-kcol+1 ) call stdlib${ii}$_cgemm( 'C', 'N', jw, kln, jw, cone, v, ldv,h( kwtop, kcol ), ldh, & czero, t, ldt ) call stdlib${ii}$_clacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) end do end if ! ==== update vertical slab in z ==== if( wantz ) then do krow = iloz, ihiz, nv kln = min( nv, ihiz-krow+1 ) call stdlib${ii}$_cgemm( 'N', 'N', kln, jw, jw, cone, z( krow, kwtop ),ldz, v, ldv, & czero, wv, ldwv ) call stdlib${ii}$_clacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) end do end if end if ! ==== return the number of deflations ... ==== nd = jw - ns ! ==== ... and the number of shifts. (subtracting ! . infqr from the spike length takes care ! . of the case of a rare qr failure while ! . calculating eigenvalues of the deflation ! . window.) ==== ns = ns - infqr ! ==== return optimal workspace. ==== work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=sp) end subroutine stdlib${ii}$_claqr3 pure module subroutine stdlib${ii}$_zlaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & !! Aggressive early deflation: !! ZLAQR3 accepts as input an upper Hessenberg matrix !! H and performs an unitary similarity transformation !! designed to detect and deflate fully converged eigenvalues from !! a trailing principal submatrix. On output H has been over- !! written by a new Hessenberg matrix that is a perturbation of !! an unitary similarity transformation of H. It is to be !! hoped that the final version of H has many zero subdiagonal !! entries. ns, nd, sh, v, ldv, nh, t, ldt,nv, wv, ldwv, work, lwork ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& nh, nv, nw integer(${ik}$), intent(out) :: nd, ns logical(lk), intent(in) :: wantt, wantz ! Array Arguments complex(dp), intent(inout) :: h(ldh,*), z(ldz,*) complex(dp), intent(out) :: sh(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) ! ================================================================ ! Parameters ! Local Scalars complex(dp) :: beta, cdum, s, tau real(dp) :: foo, safmax, safmin, smlnum, ulp integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, kcol, kln, knt, krow, kwtop, ltop, & lwk1, lwk2, lwk3, lwkopt, nmin ! Intrinsic Functions ! Statement Functions real(dp) :: cabs1 ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) ! Executable Statements ! ==== estimate optimal workspace. ==== jw = min( nw, kbot-ktop+1 ) if( jw<=2_${ik}$ ) then lwkopt = 1_${ik}$ else ! ==== workspace query call to stdlib${ii}$_zgehrd ==== call stdlib${ii}$_zgehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info ) lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== workspace query call to stdlib${ii}$_zunmhr ==== call stdlib${ii}$_zunmhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info ) lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== workspace query call to stdlib${ii}$_zlaqr4 ==== call stdlib${ii}$_zlaqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh, 1_${ik}$, jw, v,ldv, work, -1_${ik}$, & infqr ) lwk3 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== optimal workspace ==== lwkopt = max( jw+max( lwk1, lwk2 ), lwk3 ) end if ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=dp) return end if ! ==== nothing to do ... ! ... for an empty active block ... ==== ns = 0_${ik}$ nd = 0_${ik}$ work( 1_${ik}$ ) = cone if( ktop>kbot )return ! ... nor for an empty deflation window. ==== if( nw<1 )return ! ==== machine constants ==== safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_dlabad( safmin, safmax ) ulp = stdlib${ii}$_dlamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=dp) / ulp ) ! ==== setup deflation window ==== jw = min( nw, kbot-ktop+1 ) kwtop = kbot - jw + 1_${ik}$ if( kwtop==ktop ) then s = czero else s = h( kwtop, kwtop-1 ) end if if( kbot==kwtop ) then ! ==== 1-by-1 deflation window: not much to do ==== sh( kwtop ) = h( kwtop, kwtop ) ns = 1_${ik}$ nd = 0_${ik}$ if( cabs1( s )<=max( smlnum, ulp*cabs1( h( kwtop,kwtop ) ) ) ) then ns = 0_${ik}$ nd = 1_${ik}$ if( kwtop>ktop )h( kwtop, kwtop-1 ) = czero end if work( 1_${ik}$ ) = cone return end if ! ==== convert to spike-triangular form. (in case of a ! . rare qr failure, this routine continues to do ! . aggressive early deflation using that part of ! . the deflation window that converged using infqr ! . here and there to keep track.) ==== call stdlib${ii}$_zlacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) call stdlib${ii}$_zcopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 ) call stdlib${ii}$_zlaset( 'A', jw, jw, czero, cone, v, ldv ) nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'ZLAQR3', 'SV', jw, 1_${ik}$, jw, lwork ) if( jw>nmin ) then call stdlib${ii}$_zlaqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh( kwtop ), 1_${ik}$,jw, v, ldv, & work, lwork, infqr ) else call stdlib${ii}$_zlahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh( kwtop ), 1_${ik}$,jw, v, ldv, & infqr ) end if ! ==== deflation detection loop ==== ns = jw ilst = infqr + 1_${ik}$ do knt = infqr + 1, jw ! ==== small spike tip deflation test ==== foo = cabs1( t( ns, ns ) ) if( foo==zero )foo = cabs1( s ) if( cabs1( s )*cabs1( v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) )then ! ==== cone more converged eigenvalue ==== ns = ns - 1_${ik}$ else ! ==== cone undeflatable eigenvalue. move it up out of the ! . way. (stdlib${ii}$_ztrexc can not fail in this case.) ==== ifst = ns call stdlib${ii}$_ztrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) ilst = ilst + 1_${ik}$ end if end do ! ==== return to hessenberg form ==== if( ns==0_${ik}$ )s = czero if( ns<jw ) then ! ==== sorting the diagonal of t improves accuracy for ! . graded matrices. ==== do i = infqr + 1, ns ifst = i do j = i + 1, ns if( cabs1( t( j, j ) )>cabs1( t( ifst, ifst ) ) )ifst = j end do ilst = i if( ifst/=ilst )call stdlib${ii}$_ztrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) end do end if ! ==== restore shift/eigenvalue array from t ==== do i = infqr + 1, jw sh( kwtop+i-1 ) = t( i, i ) end do if( ns<jw .or. s==czero ) then if( ns>1_${ik}$ .and. s/=czero ) then ! ==== reflect spike back into lower triangle ==== call stdlib${ii}$_zcopy( ns, v, ldv, work, 1_${ik}$ ) do i = 1, ns work( i ) = conjg( work( i ) ) end do beta = work( 1_${ik}$ ) call stdlib${ii}$_zlarfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau ) work( 1_${ik}$ ) = cone call stdlib${ii}$_zlaset( 'L', jw-2, jw-2, czero, czero, t( 3_${ik}$, 1_${ik}$ ), ldt ) call stdlib${ii}$_zlarf( 'L', ns, jw, work, 1_${ik}$, conjg( tau ), t, ldt,work( jw+1 ) ) call stdlib${ii}$_zlarf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) ) call stdlib${ii}$_zlarf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) ) call stdlib${ii}$_zgehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) end if ! ==== copy updated reduced window into place ==== if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*conjg( v( 1_${ik}$, 1_${ik}$ ) ) call stdlib${ii}$_zlacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) call stdlib${ii}$_zcopy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) ! ==== accumulate orthogonal matrix in order update ! . h and z, if requested. ==== if( ns>1_${ik}$ .and. s/=czero )call stdlib${ii}$_zunmhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, & v, ldv,work( jw+1 ), lwork-jw, info ) ! ==== update vertical slab in h ==== if( wantt ) then ltop = 1_${ik}$ else ltop = ktop end if do krow = ltop, kwtop - 1, nv kln = min( nv, kwtop-krow ) call stdlib${ii}$_zgemm( 'N', 'N', kln, jw, jw, cone, h( krow, kwtop ),ldh, v, ldv, & czero, wv, ldwv ) call stdlib${ii}$_zlacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) end do ! ==== update horizontal slab in h ==== if( wantt ) then do kcol = kbot + 1, n, nh kln = min( nh, n-kcol+1 ) call stdlib${ii}$_zgemm( 'C', 'N', jw, kln, jw, cone, v, ldv,h( kwtop, kcol ), ldh, & czero, t, ldt ) call stdlib${ii}$_zlacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) end do end if ! ==== update vertical slab in z ==== if( wantz ) then do krow = iloz, ihiz, nv kln = min( nv, ihiz-krow+1 ) call stdlib${ii}$_zgemm( 'N', 'N', kln, jw, jw, cone, z( krow, kwtop ),ldz, v, ldv, & czero, wv, ldwv ) call stdlib${ii}$_zlacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) end do end if end if ! ==== return the number of deflations ... ==== nd = jw - ns ! ==== ... and the number of shifts. (subtracting ! . infqr from the spike length takes care ! . of the case of a rare qr failure while ! . calculating eigenvalues of the deflation ! . window.) ==== ns = ns - infqr ! ==== return optimal workspace. ==== work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=dp) end subroutine stdlib${ii}$_zlaqr3 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$laqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & !! Aggressive early deflation: !! ZLAQR3: accepts as input an upper Hessenberg matrix !! H and performs an unitary similarity transformation !! designed to detect and deflate fully converged eigenvalues from !! a trailing principal submatrix. On output H has been over- !! written by a new Hessenberg matrix that is a perturbation of !! an unitary similarity transformation of H. It is to be !! hoped that the final version of H has many zero subdiagonal !! entries. ns, nd, sh, v, ldv, nh, t, ldt,nv, wv, ldwv, work, lwork ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,& nh, nv, nw integer(${ik}$), intent(out) :: nd, ns logical(lk), intent(in) :: wantt, wantz ! Array Arguments complex(${ck}$), intent(inout) :: h(ldh,*), z(ldz,*) complex(${ck}$), intent(out) :: sh(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*) ! ================================================================ ! Parameters ! Local Scalars complex(${ck}$) :: beta, cdum, s, tau real(${ck}$) :: foo, safmax, safmin, smlnum, ulp integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, kcol, kln, knt, krow, kwtop, ltop, & lwk1, lwk2, lwk3, lwkopt, nmin ! Intrinsic Functions ! Statement Functions real(${ck}$) :: cabs1 ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=${ck}$) ) + abs( aimag( cdum ) ) ! Executable Statements ! ==== estimate optimal workspace. ==== jw = min( nw, kbot-ktop+1 ) if( jw<=2_${ik}$ ) then lwkopt = 1_${ik}$ else ! ==== workspace query call to stdlib${ii}$_${ci}$gehrd ==== call stdlib${ii}$_${ci}$gehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info ) lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== workspace query call to stdlib${ii}$_${ci}$unmhr ==== call stdlib${ii}$_${ci}$unmhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info ) lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== workspace query call to stdlib${ii}$_${ci}$laqr4 ==== call stdlib${ii}$_${ci}$laqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh, 1_${ik}$, jw, v,ldv, work, -1_${ik}$, & infqr ) lwk3 = int( work( 1_${ik}$ ),KIND=${ik}$) ! ==== optimal workspace ==== lwkopt = max( jw+max( lwk1, lwk2 ), lwk3 ) end if ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=${ck}$) return end if ! ==== nothing to do ... ! ... for an empty active block ... ==== ns = 0_${ik}$ nd = 0_${ik}$ work( 1_${ik}$ ) = cone if( ktop>kbot )return ! ... nor for an empty deflation window. ==== if( nw<1 )return ! ==== machine constants ==== safmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_${c2ri(ci)}$labad( safmin, safmax ) ulp = stdlib${ii}$_${c2ri(ci)}$lamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=${ck}$) / ulp ) ! ==== setup deflation window ==== jw = min( nw, kbot-ktop+1 ) kwtop = kbot - jw + 1_${ik}$ if( kwtop==ktop ) then s = czero else s = h( kwtop, kwtop-1 ) end if if( kbot==kwtop ) then ! ==== 1-by-1 deflation window: not much to do ==== sh( kwtop ) = h( kwtop, kwtop ) ns = 1_${ik}$ nd = 0_${ik}$ if( cabs1( s )<=max( smlnum, ulp*cabs1( h( kwtop,kwtop ) ) ) ) then ns = 0_${ik}$ nd = 1_${ik}$ if( kwtop>ktop )h( kwtop, kwtop-1 ) = czero end if work( 1_${ik}$ ) = cone return end if ! ==== convert to spike-triangular form. (in case of a ! . rare qr failure, this routine continues to do ! . aggressive early deflation using that part of ! . the deflation window that converged using infqr ! . here and there to keep track.) ==== call stdlib${ii}$_${ci}$lacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt ) call stdlib${ii}$_${ci}$copy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 ) call stdlib${ii}$_${ci}$laset( 'A', jw, jw, czero, cone, v, ldv ) nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'ZLAQR3', 'SV', jw, 1_${ik}$, jw, lwork ) if( jw>nmin ) then call stdlib${ii}$_${ci}$laqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh( kwtop ), 1_${ik}$,jw, v, ldv, & work, lwork, infqr ) else call stdlib${ii}$_${ci}$lahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh( kwtop ), 1_${ik}$,jw, v, ldv, & infqr ) end if ! ==== deflation detection loop ==== ns = jw ilst = infqr + 1_${ik}$ do knt = infqr + 1, jw ! ==== small spike tip deflation test ==== foo = cabs1( t( ns, ns ) ) if( foo==zero )foo = cabs1( s ) if( cabs1( s )*cabs1( v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) )then ! ==== cone more converged eigenvalue ==== ns = ns - 1_${ik}$ else ! ==== cone undeflatable eigenvalue. move it up out of the ! . way. (stdlib${ii}$_${ci}$trexc can not fail in this case.) ==== ifst = ns call stdlib${ii}$_${ci}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) ilst = ilst + 1_${ik}$ end if end do ! ==== return to hessenberg form ==== if( ns==0_${ik}$ )s = czero if( ns<jw ) then ! ==== sorting the diagonal of t improves accuracy for ! . graded matrices. ==== do i = infqr + 1, ns ifst = i do j = i + 1, ns if( cabs1( t( j, j ) )>cabs1( t( ifst, ifst ) ) )ifst = j end do ilst = i if( ifst/=ilst )call stdlib${ii}$_${ci}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info ) end do end if ! ==== restore shift/eigenvalue array from t ==== do i = infqr + 1, jw sh( kwtop+i-1 ) = t( i, i ) end do if( ns<jw .or. s==czero ) then if( ns>1_${ik}$ .and. s/=czero ) then ! ==== reflect spike back into lower triangle ==== call stdlib${ii}$_${ci}$copy( ns, v, ldv, work, 1_${ik}$ ) do i = 1, ns work( i ) = conjg( work( i ) ) end do beta = work( 1_${ik}$ ) call stdlib${ii}$_${ci}$larfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau ) work( 1_${ik}$ ) = cone call stdlib${ii}$_${ci}$laset( 'L', jw-2, jw-2, czero, czero, t( 3_${ik}$, 1_${ik}$ ), ldt ) call stdlib${ii}$_${ci}$larf( 'L', ns, jw, work, 1_${ik}$, conjg( tau ), t, ldt,work( jw+1 ) ) call stdlib${ii}$_${ci}$larf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) ) call stdlib${ii}$_${ci}$larf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) ) call stdlib${ii}$_${ci}$gehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info ) end if ! ==== copy updated reduced window into place ==== if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*conjg( v( 1_${ik}$, 1_${ik}$ ) ) call stdlib${ii}$_${ci}$lacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh ) call stdlib${ii}$_${ci}$copy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 ) ! ==== accumulate orthogonal matrix in order update ! . h and z, if requested. ==== if( ns>1_${ik}$ .and. s/=czero )call stdlib${ii}$_${ci}$unmhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, & v, ldv,work( jw+1 ), lwork-jw, info ) ! ==== update vertical slab in h ==== if( wantt ) then ltop = 1_${ik}$ else ltop = ktop end if do krow = ltop, kwtop - 1, nv kln = min( nv, kwtop-krow ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', kln, jw, jw, cone, h( krow, kwtop ),ldh, v, ldv, & czero, wv, ldwv ) call stdlib${ii}$_${ci}$lacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh ) end do ! ==== update horizontal slab in h ==== if( wantt ) then do kcol = kbot + 1, n, nh kln = min( nh, n-kcol+1 ) call stdlib${ii}$_${ci}$gemm( 'C', 'N', jw, kln, jw, cone, v, ldv,h( kwtop, kcol ), ldh, & czero, t, ldt ) call stdlib${ii}$_${ci}$lacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh ) end do end if ! ==== update vertical slab in z ==== if( wantz ) then do krow = iloz, ihiz, nv kln = min( nv, ihiz-krow+1 ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', kln, jw, jw, cone, z( krow, kwtop ),ldz, v, ldv, & czero, wv, ldwv ) call stdlib${ii}$_${ci}$lacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz ) end do end if end if ! ==== return the number of deflations ... ==== nd = jw - ns ! ==== ... and the number of shifts. (subtracting ! . infqr from the spike length takes care ! . of the case of a rare qr failure while ! . calculating eigenvalues of the deflation ! . window.) ==== ns = ns - infqr ! ==== return optimal workspace. ==== work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=${ck}$) end subroutine stdlib${ii}$_${ci}$laqr3 #:endif #:endfor module subroutine stdlib${ii}$_slaqr4( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, work,& !! SLAQR4 implements one level of recursion for SLAQR0. !! It is a complete implementation of the small bulge multi-shift !! QR algorithm. It may be called by SLAQR0 and, for large enough !! deflation window size, it may be called by SLAQR3. This !! subroutine is identical to SLAQR0 except that it calls SLAQR2 !! instead of SLAQR3. !! SLAQR4 computes the eigenvalues of a Hessenberg matrix H !! and, optionally, the matrices T and Z from the Schur decomposition !! H = Z T Z**T, where T is an upper quasi-triangular matrix (the !! Schur form), and Z is the orthogonal matrix of Schur vectors. !! Optionally Z may be postmultiplied into an input orthogonal !! matrix Q so that this routine can give the Schur factorization !! of a matrix A which has been reduced to the Hessenberg form H !! by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. lwork, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n integer(${ik}$), intent(out) :: info logical(lk), intent(in) :: wantt, wantz ! Array Arguments real(sp), intent(inout) :: h(ldh,*), z(ldz,*) real(sp), intent(out) :: wi(*), work(*), wr(*) ! ================================================================ ! Parameters integer(${ik}$), parameter :: ntiny = 15_${ik}$ integer(${ik}$), parameter :: kexnw = 5_${ik}$ integer(${ik}$), parameter :: kexsh = 6_${ik}$ real(sp), parameter :: wilk1 = 0.75_sp real(sp), parameter :: wilk2 = -0.4375_sp ! ==== matrices of order ntiny or smaller must be processed by ! . stdlib${ii}$_slahqr because of insufficient subdiagonal scratch space. ! . (this is a hard limit.) ==== ! ==== exceptional deflation windows: try to cure rare ! . slow convergence by varying the size of the ! . deflation window after kexnw iterations. ==== ! ==== exceptional shifts: try to cure rare slow convergence ! . with ad-hoc exceptional shifts every kexsh iterations. ! . ==== ! ==== the constants wilk1 and wilk2 are used to form the ! . exceptional shifts. ==== ! Local Scalars real(sp) :: aa, bb, cc, cs, dd, sn, ss, swap integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& nwmax, nwr, nwupbd logical(lk) :: sorted character(len=2_${ik}$) :: jbcmpz ! Local Arrays real(sp) :: zdum(1_${ik}$,1_${ik}$) ! Intrinsic Functions ! Executable Statements info = 0_${ik}$ ! ==== quick return for n = 0: nothing to do. ==== if( n==0_${ik}$ ) then work( 1_${ik}$ ) = one return end if if( n<=ntiny ) then ! ==== tiny matrices must use stdlib_slahqr. ==== lwkopt = 1_${ik}$ if( lwork/=-1_${ik}$ )call stdlib${ii}$_slahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, & ihiz, z, ldz, info ) else ! ==== use small bulge multi-shift qr with aggressive early ! . deflation on larger-than-tiny matrices. ==== ! ==== hope for the best. ==== info = 0_${ik}$ ! ==== set up job flags for stdlib${ii}$_ilaenv. ==== if( wantt ) then jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S' else jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E' end if if( wantz ) then jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V' else jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N' end if ! ==== nwr = recommended deflation window size. at this ! . point, n > ntiny = 15, so there is enough ! . subdiagonal workspace for nwr>=2 as required. ! . (in fact, there is enough subdiagonal space for ! . nwr>=4.) ==== nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'SLAQR4', jbcmpz, n, ilo, ihi, lwork ) nwr = max( 2_${ik}$, nwr ) nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr ) ! ==== nsr = recommended number of simultaneous shifts. ! . at this point n > ntiny = 15, so there is at ! . enough subdiagonal workspace for nsr to be even ! . and greater than or equal to two as required. ==== nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'SLAQR4', jbcmpz, n, ilo, ihi, lwork ) nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo ) nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) ) ! ==== estimate optimal workspace ==== ! ==== workspace query call to stdlib${ii}$_slaqr2 ==== call stdlib${ii}$_slaqr2( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& ld, wr, wi, h, ldh, n, h, ldh,n, h, ldh, work, -1_${ik}$ ) ! ==== optimal workspace = max(stdlib${ii}$_slaqr5, stdlib${ii}$_slaqr2) ==== lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) ) ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = real( lwkopt,KIND=sp) return end if ! ==== stdlib${ii}$_slahqr/stdlib${ii}$_slaqr0 crossover point ==== nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'SLAQR4', jbcmpz, n, ilo, ihi, lwork ) nmin = max( ntiny, nmin ) ! ==== nibble crossover point ==== nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'SLAQR4', jbcmpz, n, ilo, ihi, lwork ) nibble = max( 0_${ik}$, nibble ) ! ==== accumulate reflections during ttswp? use block ! . 2-by-2 structure during matrix-matrix multiply? ==== kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'SLAQR4', jbcmpz, n, ilo, ihi, lwork ) kacc22 = max( 0_${ik}$, kacc22 ) kacc22 = min( 2_${ik}$, kacc22 ) ! ==== nwmax = the largest possible deflation window for ! . which there is sufficient workspace. ==== nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ ) nw = nwmax ! ==== nsmax = the largest number of simultaneous shifts ! . for which there is sufficient workspace. ==== nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ ) nsmax = nsmax - mod( nsmax, 2_${ik}$ ) ! ==== ndfl: an iteration count restarted at deflation. ==== ndfl = 1_${ik}$ ! ==== itmax = iteration limit ==== itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) ) ! ==== last row and column in the active block ==== kbot = ihi ! ==== main loop ==== loop_80: do it = 1, itmax ! ==== done when kbot falls below ilo ==== if( kbot<ilo )go to 90 ! ==== locate active block ==== do k = kbot, ilo + 1, -1 if( h( k, k-1 )==zero )go to 20 end do k = ilo 20 continue ktop = k ! ==== select deflation window size: ! . typical case: ! . if possible and advisable, nibble the entire ! . active block. if not, use size min(nwr,nwmax) ! . or min(nwr+1,nwmax) depending upon which has ! . the smaller corresponding subdiagonal entry ! . (a heuristic). ! . exceptional case: ! . if there have been no deflations in kexnw or ! . more iterations, then vary the deflation window ! . size. at first, because, larger windows are, ! . in general, more powerful than smaller ones, ! . rapidly increase the window to the maximum possible. ! . then, gradually reduce the window size. ==== nh = kbot - ktop + 1_${ik}$ nwupbd = min( nh, nwmax ) if( ndfl<kexnw ) then nw = min( nwupbd, nwr ) else nw = min( nwupbd, 2_${ik}$*nw ) end if if( nw<nwmax ) then if( nw>=nh-1 ) then nw = nh else kwtop = kbot - nw + 1_${ik}$ if( abs( h( kwtop, kwtop-1 ) )>abs( h( kwtop-1, kwtop-2 ) ) )nw = nw + & 1_${ik}$ end if end if if( ndfl<kexnw ) then ndec = -1_${ik}$ else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then ndec = ndec + 1_${ik}$ if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$ nw = nw - ndec end if ! ==== aggressive early deflation: ! . split workspace under the subdiagonal into ! . - an nw-by-nw work array v in the lower ! . left-hand-corner, ! . - an nw-by-at-least-nw-but-more-is-better ! . (nw-by-nho) horizontal work array along ! . the bottom edge, ! . - an at-least-nw-but-more-is-better (nhv-by-nw) ! . vertical work array along the left-hand-edge. ! . ==== kv = n - nw + 1_${ik}$ kt = nw + 1_${ik}$ nho = ( n-nw-1 ) - kt + 1_${ik}$ kwv = nw + 2_${ik}$ nve = ( n-nw ) - kwv + 1_${ik}$ ! ==== aggressive early deflation ==== call stdlib${ii}$_slaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & ls, ld, wr, wi, h( kv, 1_${ik}$ ), ldh,nho, h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,& work, lwork ) ! ==== adjust kbot accounting for new deflations. ==== kbot = kbot - ld ! ==== ks points to the shifts. ==== ks = kbot - ls + 1_${ik}$ ! ==== skip an expensive qr sweep if there is a (partly ! . heuristic) reason to expect that many eigenvalues ! . will deflate without it. here, the qr sweep is ! . skipped if many eigenvalues have just been deflated ! . or if the remaining active block is small. if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& ) ) ) then ! ==== ns = nominal number of simultaneous shifts. ! . this may be lowered (slightly) if stdlib${ii}$_slaqr2 ! . did not provide that many shifts. ==== ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) ) ns = ns - mod( ns, 2_${ik}$ ) ! ==== if there have been no deflations ! . in a multiple of kexsh iterations, ! . then try exceptional shifts. ! . otherwise use shifts provided by ! . stdlib${ii}$_slaqr2 above or from the eigenvalues ! . of a trailing principal submatrix. ==== if( mod( ndfl, kexsh )==0_${ik}$ ) then ks = kbot - ns + 1_${ik}$ do i = kbot, max( ks+1, ktop+2 ), -2 ss = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) aa = wilk1*ss + h( i, i ) bb = ss cc = wilk2*ss dd = aa call stdlib${ii}$_slanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),wr( i ), wi( i & ), cs, sn ) end do if( ks==ktop ) then wr( ks+1 ) = h( ks+1, ks+1 ) wi( ks+1 ) = zero wr( ks ) = wr( ks+1 ) wi( ks ) = wi( ks+1 ) end if else ! ==== got ns/2 or fewer shifts? use stdlib_slahqr ! . on a trailing principal submatrix to ! . get more. (since ns<=nsmax<=(n-3)/6, ! . there is enough space below the subdiagonal ! . to fit an ns-by-ns scratch array.) ==== if( kbot-ks+1<=ns / 2_${ik}$ ) then ks = kbot - ns + 1_${ik}$ kt = n - ns + 1_${ik}$ call stdlib${ii}$_slacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh ) call stdlib${ii}$_slahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, wr( ks & ), wi( ks ),1_${ik}$, 1_${ik}$, zdum, 1_${ik}$, inf ) ks = ks + inf ! ==== in case of a rare qr failure use ! . eigenvalues of the trailing 2-by-2 ! . principal submatrix. ==== if( ks>=kbot ) then aa = h( kbot-1, kbot-1 ) cc = h( kbot, kbot-1 ) bb = h( kbot-1, kbot ) dd = h( kbot, kbot ) call stdlib${ii}$_slanv2( aa, bb, cc, dd, wr( kbot-1 ),wi( kbot-1 ), wr( & kbot ),wi( kbot ), cs, sn ) ks = kbot - 1_${ik}$ end if end if if( kbot-ks+1>ns ) then ! ==== sort the shifts (helps a little) ! . bubble sort keeps complex conjugate ! . pairs together. ==== sorted = .false. do k = kbot, ks + 1, -1 if( sorted )go to 60 sorted = .true. do i = ks, k - 1 if( abs( wr( i ) )+abs( wi( i ) )<abs( wr( i+1 ) )+abs( wi( i+1 ) & ) ) then sorted = .false. swap = wr( i ) wr( i ) = wr( i+1 ) wr( i+1 ) = swap swap = wi( i ) wi( i ) = wi( i+1 ) wi( i+1 ) = swap end if end do end do 60 continue end if ! ==== shuffle shifts into pairs of real shifts ! . and pairs of complex conjugate shifts ! . assuming complex conjugate shifts are ! . already adjacent to one another. (yes, ! . they are.) ==== do i = kbot, ks + 2, -2 if( wi( i )/=-wi( i-1 ) ) then swap = wr( i ) wr( i ) = wr( i-1 ) wr( i-1 ) = wr( i-2 ) wr( i-2 ) = swap swap = wi( i ) wi( i ) = wi( i-1 ) wi( i-1 ) = wi( i-2 ) wi( i-2 ) = swap end if end do end if ! ==== if there are only two shifts and both are ! . real, then use only one. ==== if( kbot-ks+1==2_${ik}$ ) then if( wi( kbot )==zero ) then if( abs( wr( kbot )-h( kbot, kbot ) )<abs( wr( kbot-1 )-h( kbot, kbot ) & ) ) then wr( kbot-1 ) = wr( kbot ) else wr( kbot ) = wr( kbot-1 ) end if end if end if ! ==== use up to ns of the the smallest magnitude ! . shifts. if there aren't ns shifts available, ! . then use them all, possibly dropping one to ! . make the number of shifts even. ==== ns = min( ns, kbot-ks+1 ) ns = ns - mod( ns, 2_${ik}$ ) ks = kbot - ns + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep: ! . split workspace under the subdiagonal into ! . - a kdu-by-kdu work array u in the lower ! . left-hand-corner, ! . - a kdu-by-at-least-kdu-but-more-is-better ! . (kdu-by-nho) horizontal work array wh along ! . the bottom edge, ! . - and an at-least-kdu-but-more-is-better-by-kdu ! . (nve-by-kdu) vertical work wv arrow along ! . the left-hand-edge. ==== kdu = 2_${ik}$*ns ku = n - kdu + 1_${ik}$ kwh = kdu + 1_${ik}$ nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$ kwv = kdu + 4_${ik}$ nve = n - kdu - kwv + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep ==== call stdlib${ii}$_slaqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,wr( ks ), wi( ks )& , h, ldh, iloz, ihiz, z,ldz, work, 3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve,h( kwv, 1_${ik}$ ), ldh, & nho, h( ku, kwh ), ldh ) end if ! ==== note progress (or the lack of it). ==== if( ld>0_${ik}$ ) then ndfl = 1_${ik}$ else ndfl = ndfl + 1_${ik}$ end if ! ==== end of main loop ==== end do loop_80 ! ==== iteration limit exceeded. set info to show where ! . the problem occurred and exit. ==== info = kbot 90 continue end if ! ==== return the optimal value of lwork. ==== work( 1_${ik}$ ) = real( lwkopt,KIND=sp) end subroutine stdlib${ii}$_slaqr4 module subroutine stdlib${ii}$_dlaqr4( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, work,& !! DLAQR4 implements one level of recursion for DLAQR0. !! It is a complete implementation of the small bulge multi-shift !! QR algorithm. It may be called by DLAQR0 and, for large enough !! deflation window size, it may be called by DLAQR3. This !! subroutine is identical to DLAQR0 except that it calls DLAQR2 !! instead of DLAQR3. !! DLAQR4 computes the eigenvalues of a Hessenberg matrix H !! and, optionally, the matrices T and Z from the Schur decomposition !! H = Z T Z**T, where T is an upper quasi-triangular matrix (the !! Schur form), and Z is the orthogonal matrix of Schur vectors. !! Optionally Z may be postmultiplied into an input orthogonal !! matrix Q so that this routine can give the Schur factorization !! of a matrix A which has been reduced to the Hessenberg form H !! by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. lwork, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n integer(${ik}$), intent(out) :: info logical(lk), intent(in) :: wantt, wantz ! Array Arguments real(dp), intent(inout) :: h(ldh,*), z(ldz,*) real(dp), intent(out) :: wi(*), work(*), wr(*) ! ================================================================ ! Parameters integer(${ik}$), parameter :: ntiny = 15_${ik}$ integer(${ik}$), parameter :: kexnw = 5_${ik}$ integer(${ik}$), parameter :: kexsh = 6_${ik}$ real(dp), parameter :: wilk1 = 0.75_dp real(dp), parameter :: wilk2 = -0.4375_dp ! ==== matrices of order ntiny or smaller must be processed by ! . stdlib${ii}$_dlahqr because of insufficient subdiagonal scratch space. ! . (this is a hard limit.) ==== ! ==== exceptional deflation windows: try to cure rare ! . slow convergence by varying the size of the ! . deflation window after kexnw iterations. ==== ! ==== exceptional shifts: try to cure rare slow convergence ! . with ad-hoc exceptional shifts every kexsh iterations. ! . ==== ! ==== the constants wilk1 and wilk2 are used to form the ! . exceptional shifts. ==== ! Local Scalars real(dp) :: aa, bb, cc, cs, dd, sn, ss, swap integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& nwmax, nwr, nwupbd logical(lk) :: sorted character(len=2_${ik}$) :: jbcmpz ! Local Arrays real(dp) :: zdum(1_${ik}$,1_${ik}$) ! Intrinsic Functions ! Executable Statements info = 0_${ik}$ ! ==== quick return for n = 0: nothing to do. ==== if( n==0_${ik}$ ) then work( 1_${ik}$ ) = one return end if if( n<=ntiny ) then ! ==== tiny matrices must use stdlib_dlahqr. ==== lwkopt = 1_${ik}$ if( lwork/=-1_${ik}$ )call stdlib${ii}$_dlahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, & ihiz, z, ldz, info ) else ! ==== use small bulge multi-shift qr with aggressive early ! . deflation on larger-than-tiny matrices. ==== ! ==== hope for the best. ==== info = 0_${ik}$ ! ==== set up job flags for stdlib${ii}$_ilaenv. ==== if( wantt ) then jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S' else jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E' end if if( wantz ) then jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V' else jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N' end if ! ==== nwr = recommended deflation window size. at this ! . point, n > ntiny = 15, so there is enough ! . subdiagonal workspace for nwr>=2 as required. ! . (in fact, there is enough subdiagonal space for ! . nwr>=4.) ==== nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) nwr = max( 2_${ik}$, nwr ) nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr ) ! ==== nsr = recommended number of simultaneous shifts. ! . at this point n > ntiny = 15, so there is at ! . enough subdiagonal workspace for nsr to be even ! . and greater than or equal to two as required. ==== nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo ) nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) ) ! ==== estimate optimal workspace ==== ! ==== workspace query call to stdlib${ii}$_dlaqr2 ==== call stdlib${ii}$_dlaqr2( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& ld, wr, wi, h, ldh, n, h, ldh,n, h, ldh, work, -1_${ik}$ ) ! ==== optimal workspace = max(stdlib${ii}$_dlaqr5, stdlib${ii}$_dlaqr2) ==== lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) ) ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = real( lwkopt,KIND=dp) return end if ! ==== stdlib${ii}$_dlahqr/stdlib${ii}$_dlaqr0 crossover point ==== nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) nmin = max( ntiny, nmin ) ! ==== nibble crossover point ==== nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) nibble = max( 0_${ik}$, nibble ) ! ==== accumulate reflections during ttswp? use block ! . 2-by-2 structure during matrix-matrix multiply? ==== kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) kacc22 = max( 0_${ik}$, kacc22 ) kacc22 = min( 2_${ik}$, kacc22 ) ! ==== nwmax = the largest possible deflation window for ! . which there is sufficient workspace. ==== nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ ) nw = nwmax ! ==== nsmax = the largest number of simultaneous shifts ! . for which there is sufficient workspace. ==== nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ ) nsmax = nsmax - mod( nsmax, 2_${ik}$ ) ! ==== ndfl: an iteration count restarted at deflation. ==== ndfl = 1_${ik}$ ! ==== itmax = iteration limit ==== itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) ) ! ==== last row and column in the active block ==== kbot = ihi ! ==== main loop ==== loop_80: do it = 1, itmax ! ==== done when kbot falls below ilo ==== if( kbot<ilo )go to 90 ! ==== locate active block ==== do k = kbot, ilo + 1, -1 if( h( k, k-1 )==zero )go to 20 end do k = ilo 20 continue ktop = k ! ==== select deflation window size: ! . typical case: ! . if possible and advisable, nibble the entire ! . active block. if not, use size min(nwr,nwmax) ! . or min(nwr+1,nwmax) depending upon which has ! . the smaller corresponding subdiagonal entry ! . (a heuristic). ! . exceptional case: ! . if there have been no deflations in kexnw or ! . more iterations, then vary the deflation window ! . size. at first, because, larger windows are, ! . in general, more powerful than smaller ones, ! . rapidly increase the window to the maximum possible. ! . then, gradually reduce the window size. ==== nh = kbot - ktop + 1_${ik}$ nwupbd = min( nh, nwmax ) if( ndfl<kexnw ) then nw = min( nwupbd, nwr ) else nw = min( nwupbd, 2_${ik}$*nw ) end if if( nw<nwmax ) then if( nw>=nh-1 ) then nw = nh else kwtop = kbot - nw + 1_${ik}$ if( abs( h( kwtop, kwtop-1 ) )>abs( h( kwtop-1, kwtop-2 ) ) )nw = nw + & 1_${ik}$ end if end if if( ndfl<kexnw ) then ndec = -1_${ik}$ else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then ndec = ndec + 1_${ik}$ if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$ nw = nw - ndec end if ! ==== aggressive early deflation: ! . split workspace under the subdiagonal into ! . - an nw-by-nw work array v in the lower ! . left-hand-corner, ! . - an nw-by-at-least-nw-but-more-is-better ! . (nw-by-nho) horizontal work array along ! . the bottom edge, ! . - an at-least-nw-but-more-is-better (nhv-by-nw) ! . vertical work array along the left-hand-edge. ! . ==== kv = n - nw + 1_${ik}$ kt = nw + 1_${ik}$ nho = ( n-nw-1 ) - kt + 1_${ik}$ kwv = nw + 2_${ik}$ nve = ( n-nw ) - kwv + 1_${ik}$ ! ==== aggressive early deflation ==== call stdlib${ii}$_dlaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & ls, ld, wr, wi, h( kv, 1_${ik}$ ), ldh,nho, h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,& work, lwork ) ! ==== adjust kbot accounting for new deflations. ==== kbot = kbot - ld ! ==== ks points to the shifts. ==== ks = kbot - ls + 1_${ik}$ ! ==== skip an expensive qr sweep if there is a (partly ! . heuristic) reason to expect that many eigenvalues ! . will deflate without it. here, the qr sweep is ! . skipped if many eigenvalues have just been deflated ! . or if the remaining active block is small. if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& ) ) ) then ! ==== ns = nominal number of simultaneous shifts. ! . this may be lowered (slightly) if stdlib${ii}$_dlaqr2 ! . did not provide that many shifts. ==== ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) ) ns = ns - mod( ns, 2_${ik}$ ) ! ==== if there have been no deflations ! . in a multiple of kexsh iterations, ! . then try exceptional shifts. ! . otherwise use shifts provided by ! . stdlib${ii}$_dlaqr2 above or from the eigenvalues ! . of a trailing principal submatrix. ==== if( mod( ndfl, kexsh )==0_${ik}$ ) then ks = kbot - ns + 1_${ik}$ do i = kbot, max( ks+1, ktop+2 ), -2 ss = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) aa = wilk1*ss + h( i, i ) bb = ss cc = wilk2*ss dd = aa call stdlib${ii}$_dlanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),wr( i ), wi( i & ), cs, sn ) end do if( ks==ktop ) then wr( ks+1 ) = h( ks+1, ks+1 ) wi( ks+1 ) = zero wr( ks ) = wr( ks+1 ) wi( ks ) = wi( ks+1 ) end if else ! ==== got ns/2 or fewer shifts? use stdlib_dlahqr ! . on a trailing principal submatrix to ! . get more. (since ns<=nsmax<=(n-3)/6, ! . there is enough space below the subdiagonal ! . to fit an ns-by-ns scratch array.) ==== if( kbot-ks+1<=ns / 2_${ik}$ ) then ks = kbot - ns + 1_${ik}$ kt = n - ns + 1_${ik}$ call stdlib${ii}$_dlacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh ) call stdlib${ii}$_dlahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, wr( ks & ), wi( ks ),1_${ik}$, 1_${ik}$, zdum, 1_${ik}$, inf ) ks = ks + inf ! ==== in case of a rare qr failure use ! . eigenvalues of the trailing 2-by-2 ! . principal submatrix. ==== if( ks>=kbot ) then aa = h( kbot-1, kbot-1 ) cc = h( kbot, kbot-1 ) bb = h( kbot-1, kbot ) dd = h( kbot, kbot ) call stdlib${ii}$_dlanv2( aa, bb, cc, dd, wr( kbot-1 ),wi( kbot-1 ), wr( & kbot ),wi( kbot ), cs, sn ) ks = kbot - 1_${ik}$ end if end if if( kbot-ks+1>ns ) then ! ==== sort the shifts (helps a little) ! . bubble sort keeps complex conjugate ! . pairs together. ==== sorted = .false. do k = kbot, ks + 1, -1 if( sorted )go to 60 sorted = .true. do i = ks, k - 1 if( abs( wr( i ) )+abs( wi( i ) )<abs( wr( i+1 ) )+abs( wi( i+1 ) & ) ) then sorted = .false. swap = wr( i ) wr( i ) = wr( i+1 ) wr( i+1 ) = swap swap = wi( i ) wi( i ) = wi( i+1 ) wi( i+1 ) = swap end if end do end do 60 continue end if ! ==== shuffle shifts into pairs of real shifts ! . and pairs of complex conjugate shifts ! . assuming complex conjugate shifts are ! . already adjacent to one another. (yes, ! . they are.) ==== do i = kbot, ks + 2, -2 if( wi( i )/=-wi( i-1 ) ) then swap = wr( i ) wr( i ) = wr( i-1 ) wr( i-1 ) = wr( i-2 ) wr( i-2 ) = swap swap = wi( i ) wi( i ) = wi( i-1 ) wi( i-1 ) = wi( i-2 ) wi( i-2 ) = swap end if end do end if ! ==== if there are only two shifts and both are ! . real, then use only one. ==== if( kbot-ks+1==2_${ik}$ ) then if( wi( kbot )==zero ) then if( abs( wr( kbot )-h( kbot, kbot ) )<abs( wr( kbot-1 )-h( kbot, kbot ) & ) ) then wr( kbot-1 ) = wr( kbot ) else wr( kbot ) = wr( kbot-1 ) end if end if end if ! ==== use up to ns of the the smallest magnitude ! . shifts. if there aren't ns shifts available, ! . then use them all, possibly dropping one to ! . make the number of shifts even. ==== ns = min( ns, kbot-ks+1 ) ns = ns - mod( ns, 2_${ik}$ ) ks = kbot - ns + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep: ! . split workspace under the subdiagonal into ! . - a kdu-by-kdu work array u in the lower ! . left-hand-corner, ! . - a kdu-by-at-least-kdu-but-more-is-better ! . (kdu-by-nho) horizontal work array wh along ! . the bottom edge, ! . - and an at-least-kdu-but-more-is-better-by-kdu ! . (nve-by-kdu) vertical work wv arrow along ! . the left-hand-edge. ==== kdu = 2_${ik}$*ns ku = n - kdu + 1_${ik}$ kwh = kdu + 1_${ik}$ nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$ kwv = kdu + 4_${ik}$ nve = n - kdu - kwv + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep ==== call stdlib${ii}$_dlaqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,wr( ks ), wi( ks )& , h, ldh, iloz, ihiz, z,ldz, work, 3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve,h( kwv, 1_${ik}$ ), ldh, & nho, h( ku, kwh ), ldh ) end if ! ==== note progress (or the lack of it). ==== if( ld>0_${ik}$ ) then ndfl = 1_${ik}$ else ndfl = ndfl + 1_${ik}$ end if ! ==== end of main loop ==== end do loop_80 ! ==== iteration limit exceeded. set info to show where ! . the problem occurred and exit. ==== info = kbot 90 continue end if ! ==== return the optimal value of lwork. ==== work( 1_${ik}$ ) = real( lwkopt,KIND=dp) end subroutine stdlib${ii}$_dlaqr4 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] module subroutine stdlib${ii}$_${ri}$laqr4( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, work,& !! DLAQR4: implements one level of recursion for DLAQR0. !! It is a complete implementation of the small bulge multi-shift !! QR algorithm. It may be called by DLAQR0 and, for large enough !! deflation window size, it may be called by DLAQR3. This !! subroutine is identical to DLAQR0 except that it calls DLAQR2 !! instead of DLAQR3. !! DLAQR4 computes the eigenvalues of a Hessenberg matrix H !! and, optionally, the matrices T and Z from the Schur decomposition !! H = Z T Z**T, where T is an upper quasi-triangular matrix (the !! Schur form), and Z is the orthogonal matrix of Schur vectors. !! Optionally Z may be postmultiplied into an input orthogonal !! matrix Q so that this routine can give the Schur factorization !! of a matrix A which has been reduced to the Hessenberg form H !! by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. lwork, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n integer(${ik}$), intent(out) :: info logical(lk), intent(in) :: wantt, wantz ! Array Arguments real(${rk}$), intent(inout) :: h(ldh,*), z(ldz,*) real(${rk}$), intent(out) :: wi(*), work(*), wr(*) ! ================================================================ ! Parameters integer(${ik}$), parameter :: ntiny = 15_${ik}$ integer(${ik}$), parameter :: kexnw = 5_${ik}$ integer(${ik}$), parameter :: kexsh = 6_${ik}$ real(${rk}$), parameter :: wilk1 = 0.75_${rk}$ real(${rk}$), parameter :: wilk2 = -0.4375_${rk}$ ! ==== matrices of order ntiny or smaller must be processed by ! . stdlib${ii}$_${ri}$lahqr because of insufficient subdiagonal scratch space. ! . (this is a hard limit.) ==== ! ==== exceptional deflation windows: try to cure rare ! . slow convergence by varying the size of the ! . deflation window after kexnw iterations. ==== ! ==== exceptional shifts: try to cure rare slow convergence ! . with ad-hoc exceptional shifts every kexsh iterations. ! . ==== ! ==== the constants wilk1 and wilk2 are used to form the ! . exceptional shifts. ==== ! Local Scalars real(${rk}$) :: aa, bb, cc, cs, dd, sn, ss, swap integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& nwmax, nwr, nwupbd logical(lk) :: sorted character(len=2_${ik}$) :: jbcmpz ! Local Arrays real(${rk}$) :: zdum(1_${ik}$,1_${ik}$) ! Intrinsic Functions ! Executable Statements info = 0_${ik}$ ! ==== quick return for n = 0: nothing to do. ==== if( n==0_${ik}$ ) then work( 1_${ik}$ ) = one return end if if( n<=ntiny ) then ! ==== tiny matrices must use stdlib_${ri}$lahqr. ==== lwkopt = 1_${ik}$ if( lwork/=-1_${ik}$ )call stdlib${ii}$_${ri}$lahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, & ihiz, z, ldz, info ) else ! ==== use small bulge multi-shift qr with aggressive early ! . deflation on larger-than-tiny matrices. ==== ! ==== hope for the best. ==== info = 0_${ik}$ ! ==== set up job flags for stdlib${ii}$_ilaenv. ==== if( wantt ) then jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S' else jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E' end if if( wantz ) then jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V' else jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N' end if ! ==== nwr = recommended deflation window size. at this ! . point, n > ntiny = 15, so there is enough ! . subdiagonal workspace for nwr>=2 as required. ! . (in fact, there is enough subdiagonal space for ! . nwr>=4.) ==== nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) nwr = max( 2_${ik}$, nwr ) nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr ) ! ==== nsr = recommended number of simultaneous shifts. ! . at this point n > ntiny = 15, so there is at ! . enough subdiagonal workspace for nsr to be even ! . and greater than or equal to two as required. ==== nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo ) nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) ) ! ==== estimate optimal workspace ==== ! ==== workspace query call to stdlib${ii}$_${ri}$laqr2 ==== call stdlib${ii}$_${ri}$laqr2( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& ld, wr, wi, h, ldh, n, h, ldh,n, h, ldh, work, -1_${ik}$ ) ! ==== optimal workspace = max(stdlib${ii}$_${ri}$laqr5, stdlib${ii}$_${ri}$laqr2) ==== lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) ) ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$) return end if ! ==== stdlib${ii}$_${ri}$lahqr/stdlib${ii}$_${ri}$laqr0 crossover point ==== nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) nmin = max( ntiny, nmin ) ! ==== nibble crossover point ==== nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) nibble = max( 0_${ik}$, nibble ) ! ==== accumulate reflections during ttswp? use block ! . 2-by-2 structure during matrix-matrix multiply? ==== kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork ) kacc22 = max( 0_${ik}$, kacc22 ) kacc22 = min( 2_${ik}$, kacc22 ) ! ==== nwmax = the largest possible deflation window for ! . which there is sufficient workspace. ==== nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ ) nw = nwmax ! ==== nsmax = the largest number of simultaneous shifts ! . for which there is sufficient workspace. ==== nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ ) nsmax = nsmax - mod( nsmax, 2_${ik}$ ) ! ==== ndfl: an iteration count restarted at deflation. ==== ndfl = 1_${ik}$ ! ==== itmax = iteration limit ==== itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) ) ! ==== last row and column in the active block ==== kbot = ihi ! ==== main loop ==== loop_80: do it = 1, itmax ! ==== done when kbot falls below ilo ==== if( kbot<ilo )go to 90 ! ==== locate active block ==== do k = kbot, ilo + 1, -1 if( h( k, k-1 )==zero )go to 20 end do k = ilo 20 continue ktop = k ! ==== select deflation window size: ! . typical case: ! . if possible and advisable, nibble the entire ! . active block. if not, use size min(nwr,nwmax) ! . or min(nwr+1,nwmax) depending upon which has ! . the smaller corresponding subdiagonal entry ! . (a heuristic). ! . exceptional case: ! . if there have been no deflations in kexnw or ! . more iterations, then vary the deflation window ! . size. at first, because, larger windows are, ! . in general, more powerful than smaller ones, ! . rapidly increase the window to the maximum possible. ! . then, gradually reduce the window size. ==== nh = kbot - ktop + 1_${ik}$ nwupbd = min( nh, nwmax ) if( ndfl<kexnw ) then nw = min( nwupbd, nwr ) else nw = min( nwupbd, 2_${ik}$*nw ) end if if( nw<nwmax ) then if( nw>=nh-1 ) then nw = nh else kwtop = kbot - nw + 1_${ik}$ if( abs( h( kwtop, kwtop-1 ) )>abs( h( kwtop-1, kwtop-2 ) ) )nw = nw + & 1_${ik}$ end if end if if( ndfl<kexnw ) then ndec = -1_${ik}$ else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then ndec = ndec + 1_${ik}$ if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$ nw = nw - ndec end if ! ==== aggressive early deflation: ! . split workspace under the subdiagonal into ! . - an nw-by-nw work array v in the lower ! . left-hand-corner, ! . - an nw-by-at-least-nw-but-more-is-better ! . (nw-by-nho) horizontal work array along ! . the bottom edge, ! . - an at-least-nw-but-more-is-better (nhv-by-nw) ! . vertical work array along the left-hand-edge. ! . ==== kv = n - nw + 1_${ik}$ kt = nw + 1_${ik}$ nho = ( n-nw-1 ) - kt + 1_${ik}$ kwv = nw + 2_${ik}$ nve = ( n-nw ) - kwv + 1_${ik}$ ! ==== aggressive early deflation ==== call stdlib${ii}$_${ri}$laqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & ls, ld, wr, wi, h( kv, 1_${ik}$ ), ldh,nho, h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,& work, lwork ) ! ==== adjust kbot accounting for new deflations. ==== kbot = kbot - ld ! ==== ks points to the shifts. ==== ks = kbot - ls + 1_${ik}$ ! ==== skip an expensive qr sweep if there is a (partly ! . heuristic) reason to expect that many eigenvalues ! . will deflate without it. here, the qr sweep is ! . skipped if many eigenvalues have just been deflated ! . or if the remaining active block is small. if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& ) ) ) then ! ==== ns = nominal number of simultaneous shifts. ! . this may be lowered (slightly) if stdlib${ii}$_${ri}$laqr2 ! . did not provide that many shifts. ==== ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) ) ns = ns - mod( ns, 2_${ik}$ ) ! ==== if there have been no deflations ! . in a multiple of kexsh iterations, ! . then try exceptional shifts. ! . otherwise use shifts provided by ! . stdlib${ii}$_${ri}$laqr2 above or from the eigenvalues ! . of a trailing principal submatrix. ==== if( mod( ndfl, kexsh )==0_${ik}$ ) then ks = kbot - ns + 1_${ik}$ do i = kbot, max( ks+1, ktop+2 ), -2 ss = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) ) aa = wilk1*ss + h( i, i ) bb = ss cc = wilk2*ss dd = aa call stdlib${ii}$_${ri}$lanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),wr( i ), wi( i & ), cs, sn ) end do if( ks==ktop ) then wr( ks+1 ) = h( ks+1, ks+1 ) wi( ks+1 ) = zero wr( ks ) = wr( ks+1 ) wi( ks ) = wi( ks+1 ) end if else ! ==== got ns/2 or fewer shifts? use stdlib_${ri}$lahqr ! . on a trailing principal submatrix to ! . get more. (since ns<=nsmax<=(n-3)/6, ! . there is enough space below the subdiagonal ! . to fit an ns-by-ns scratch array.) ==== if( kbot-ks+1<=ns / 2_${ik}$ ) then ks = kbot - ns + 1_${ik}$ kt = n - ns + 1_${ik}$ call stdlib${ii}$_${ri}$lacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh ) call stdlib${ii}$_${ri}$lahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, wr( ks & ), wi( ks ),1_${ik}$, 1_${ik}$, zdum, 1_${ik}$, inf ) ks = ks + inf ! ==== in case of a rare qr failure use ! . eigenvalues of the trailing 2-by-2 ! . principal submatrix. ==== if( ks>=kbot ) then aa = h( kbot-1, kbot-1 ) cc = h( kbot, kbot-1 ) bb = h( kbot-1, kbot ) dd = h( kbot, kbot ) call stdlib${ii}$_${ri}$lanv2( aa, bb, cc, dd, wr( kbot-1 ),wi( kbot-1 ), wr( & kbot ),wi( kbot ), cs, sn ) ks = kbot - 1_${ik}$ end if end if if( kbot-ks+1>ns ) then ! ==== sort the shifts (helps a little) ! . bubble sort keeps complex conjugate ! . pairs together. ==== sorted = .false. do k = kbot, ks + 1, -1 if( sorted )go to 60 sorted = .true. do i = ks, k - 1 if( abs( wr( i ) )+abs( wi( i ) )<abs( wr( i+1 ) )+abs( wi( i+1 ) & ) ) then sorted = .false. swap = wr( i ) wr( i ) = wr( i+1 ) wr( i+1 ) = swap swap = wi( i ) wi( i ) = wi( i+1 ) wi( i+1 ) = swap end if end do end do 60 continue end if ! ==== shuffle shifts into pairs of real shifts ! . and pairs of complex conjugate shifts ! . assuming complex conjugate shifts are ! . already adjacent to one another. (yes, ! . they are.) ==== do i = kbot, ks + 2, -2 if( wi( i )/=-wi( i-1 ) ) then swap = wr( i ) wr( i ) = wr( i-1 ) wr( i-1 ) = wr( i-2 ) wr( i-2 ) = swap swap = wi( i ) wi( i ) = wi( i-1 ) wi( i-1 ) = wi( i-2 ) wi( i-2 ) = swap end if end do end if ! ==== if there are only two shifts and both are ! . real, then use only one. ==== if( kbot-ks+1==2_${ik}$ ) then if( wi( kbot )==zero ) then if( abs( wr( kbot )-h( kbot, kbot ) )<abs( wr( kbot-1 )-h( kbot, kbot ) & ) ) then wr( kbot-1 ) = wr( kbot ) else wr( kbot ) = wr( kbot-1 ) end if end if end if ! ==== use up to ns of the the smallest magnitude ! . shifts. if there aren't ns shifts available, ! . then use them all, possibly dropping one to ! . make the number of shifts even. ==== ns = min( ns, kbot-ks+1 ) ns = ns - mod( ns, 2_${ik}$ ) ks = kbot - ns + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep: ! . split workspace under the subdiagonal into ! . - a kdu-by-kdu work array u in the lower ! . left-hand-corner, ! . - a kdu-by-at-least-kdu-but-more-is-better ! . (kdu-by-nho) horizontal work array wh along ! . the bottom edge, ! . - and an at-least-kdu-but-more-is-better-by-kdu ! . (nve-by-kdu) vertical work wv arrow along ! . the left-hand-edge. ==== kdu = 2_${ik}$*ns ku = n - kdu + 1_${ik}$ kwh = kdu + 1_${ik}$ nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$ kwv = kdu + 4_${ik}$ nve = n - kdu - kwv + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep ==== call stdlib${ii}$_${ri}$laqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,wr( ks ), wi( ks )& , h, ldh, iloz, ihiz, z,ldz, work, 3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve,h( kwv, 1_${ik}$ ), ldh, & nho, h( ku, kwh ), ldh ) end if ! ==== note progress (or the lack of it). ==== if( ld>0_${ik}$ ) then ndfl = 1_${ik}$ else ndfl = ndfl + 1_${ik}$ end if ! ==== end of main loop ==== end do loop_80 ! ==== iteration limit exceeded. set info to show where ! . the problem occurred and exit. ==== info = kbot 90 continue end if ! ==== return the optimal value of lwork. ==== work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$) end subroutine stdlib${ii}$_${ri}$laqr4 #:endif #:endfor pure module subroutine stdlib${ii}$_claqr4( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, work,& !! CLAQR4 implements one level of recursion for CLAQR0. !! It is a complete implementation of the small bulge multi-shift !! QR algorithm. It may be called by CLAQR0 and, for large enough !! deflation window size, it may be called by CLAQR3. This !! subroutine is identical to CLAQR0 except that it calls CLAQR2 !! instead of CLAQR3. !! CLAQR4 computes the eigenvalues of a Hessenberg matrix H !! and, optionally, the matrices T and Z from the Schur decomposition !! H = Z T Z**H, where T is an upper triangular matrix (the !! Schur form), and Z is the unitary matrix of Schur vectors. !! Optionally Z may be postmultiplied into an input unitary !! matrix Q so that this routine can give the Schur factorization !! of a matrix A which has been reduced to the Hessenberg form H !! by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. lwork, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n integer(${ik}$), intent(out) :: info logical(lk), intent(in) :: wantt, wantz ! Array Arguments complex(sp), intent(inout) :: h(ldh,*), z(ldz,*) complex(sp), intent(out) :: w(*), work(*) ! ================================================================ ! Parameters integer(${ik}$), parameter :: ntiny = 15_${ik}$ integer(${ik}$), parameter :: kexnw = 5_${ik}$ integer(${ik}$), parameter :: kexsh = 6_${ik}$ real(sp), parameter :: wilk1 = 0.75_sp ! ==== matrices of order ntiny or smaller must be processed by ! . stdlib${ii}$_clahqr because of insufficient subdiagonal scratch space. ! . (this is a hard limit.) ==== ! ==== exceptional deflation windows: try to cure rare ! . slow convergence by varying the size of the ! . deflation window after kexnw iterations. ==== ! ==== exceptional shifts: try to cure rare slow convergence ! . with ad-hoc exceptional shifts every kexsh iterations. ! . ==== ! ==== the constant wilk1 is used to form the exceptional ! . shifts. ==== ! Local Scalars complex(sp) :: aa, bb, cc, cdum, dd, det, rtdisc, swap, tr2 real(sp) :: s integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& nwmax, nwr, nwupbd logical(lk) :: sorted character(len=2) :: jbcmpz ! Local Arrays complex(sp) :: zdum(1_${ik}$,1_${ik}$) ! Intrinsic Functions ! Statement Functions real(sp) :: cabs1 ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) ! Executable Statements info = 0_${ik}$ ! ==== quick return for n = 0: nothing to do. ==== if( n==0_${ik}$ ) then work( 1_${ik}$ ) = cone return end if if( n<=ntiny ) then ! ==== tiny matrices must use stdlib_clahqr. ==== lwkopt = 1_${ik}$ if( lwork/=-1_${ik}$ )call stdlib${ii}$_clahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, & z, ldz, info ) else ! ==== use small bulge multi-shift qr with aggressive early ! . deflation on larger-than-tiny matrices. ==== ! ==== hope for the best. ==== info = 0_${ik}$ ! ==== set up job flags for stdlib${ii}$_ilaenv. ==== if( wantt ) then jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S' else jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E' end if if( wantz ) then jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V' else jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N' end if ! ==== nwr = recommended deflation window size. at this ! . point, n > ntiny = 15, so there is enough ! . subdiagonal workspace for nwr>=2 as required. ! . (in fact, there is enough subdiagonal space for ! . nwr>=4.) ==== nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'CLAQR4', jbcmpz, n, ilo, ihi, lwork ) nwr = max( 2_${ik}$, nwr ) nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr ) ! ==== nsr = recommended number of simultaneous shifts. ! . at this point n > ntiny = 15, so there is at ! . enough subdiagonal workspace for nsr to be even ! . and greater than or equal to two as required. ==== nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'CLAQR4', jbcmpz, n, ilo, ihi, lwork ) nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo ) nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) ) ! ==== estimate optimal workspace ==== ! ==== workspace query call to stdlib${ii}$_claqr2 ==== call stdlib${ii}$_claqr2( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& ld, w, h, ldh, n, h, ldh, n, h,ldh, work, -1_${ik}$ ) ! ==== optimal workspace = max(stdlib${ii}$_claqr5, stdlib${ii}$_claqr2) ==== lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) ) ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=sp) return end if ! ==== stdlib${ii}$_clahqr/stdlib${ii}$_claqr0 crossover point ==== nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'CLAQR4', jbcmpz, n, ilo, ihi, lwork ) nmin = max( ntiny, nmin ) ! ==== nibble crossover point ==== nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'CLAQR4', jbcmpz, n, ilo, ihi, lwork ) nibble = max( 0_${ik}$, nibble ) ! ==== accumulate reflections during ttswp? use block ! . 2-by-2 structure during matrix-matrix multiply? ==== kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'CLAQR4', jbcmpz, n, ilo, ihi, lwork ) kacc22 = max( 0_${ik}$, kacc22 ) kacc22 = min( 2_${ik}$, kacc22 ) ! ==== nwmax = the largest possible deflation window for ! . which there is sufficient workspace. ==== nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ ) nw = nwmax ! ==== nsmax = the largest number of simultaneous shifts ! . for which there is sufficient workspace. ==== nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ ) nsmax = nsmax - mod( nsmax, 2_${ik}$ ) ! ==== ndfl: an iteration count restarted at deflation. ==== ndfl = 1_${ik}$ ! ==== itmax = iteration limit ==== itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) ) ! ==== last row and column in the active block ==== kbot = ihi ! ==== main loop ==== loop_70: do it = 1, itmax ! ==== done when kbot falls below ilo ==== if( kbot<ilo )go to 80 ! ==== locate active block ==== do k = kbot, ilo + 1, -1 if( h( k, k-1 )==czero )go to 20 end do k = ilo 20 continue ktop = k ! ==== select deflation window size: ! . typical case: ! . if possible and advisable, nibble the entire ! . active block. if not, use size min(nwr,nwmax) ! . or min(nwr+1,nwmax) depending upon which has ! . the smaller corresponding subdiagonal entry ! . (a heuristic). ! . exceptional case: ! . if there have been no deflations in kexnw or ! . more iterations, then vary the deflation window ! . size. at first, because, larger windows are, ! . in general, more powerful than smaller ones, ! . rapidly increase the window to the maximum possible. ! . then, gradually reduce the window size. ==== nh = kbot - ktop + 1_${ik}$ nwupbd = min( nh, nwmax ) if( ndfl<kexnw ) then nw = min( nwupbd, nwr ) else nw = min( nwupbd, 2_${ik}$*nw ) end if if( nw<nwmax ) then if( nw>=nh-1 ) then nw = nh else kwtop = kbot - nw + 1_${ik}$ if( cabs1( h( kwtop, kwtop-1 ) )>cabs1( h( kwtop-1, kwtop-2 ) ) )nw = nw + & 1_${ik}$ end if end if if( ndfl<kexnw ) then ndec = -1_${ik}$ else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then ndec = ndec + 1_${ik}$ if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$ nw = nw - ndec end if ! ==== aggressive early deflation: ! . split workspace under the subdiagonal into ! . - an nw-by-nw work array v in the lower ! . left-hand-corner, ! . - an nw-by-at-least-nw-but-more-is-better ! . (nw-by-nho) horizontal work array along ! . the bottom edge, ! . - an at-least-nw-but-more-is-better (nhv-by-nw) ! . vertical work array along the left-hand-edge. ! . ==== kv = n - nw + 1_${ik}$ kt = nw + 1_${ik}$ nho = ( n-nw-1 ) - kt + 1_${ik}$ kwv = nw + 2_${ik}$ nve = ( n-nw ) - kwv + 1_${ik}$ ! ==== aggressive early deflation ==== call stdlib${ii}$_claqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & ls, ld, w, h( kv, 1_${ik}$ ), ldh, nho,h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh, work,& lwork ) ! ==== adjust kbot accounting for new deflations. ==== kbot = kbot - ld ! ==== ks points to the shifts. ==== ks = kbot - ls + 1_${ik}$ ! ==== skip an expensive qr sweep if there is a (partly ! . heuristic) reason to expect that many eigenvalues ! . will deflate without it. here, the qr sweep is ! . skipped if many eigenvalues have just been deflated ! . or if the remaining active block is small. if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& ) ) ) then ! ==== ns = nominal number of simultaneous shifts. ! . this may be lowered (slightly) if stdlib${ii}$_claqr2 ! . did not provide that many shifts. ==== ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) ) ns = ns - mod( ns, 2_${ik}$ ) ! ==== if there have been no deflations ! . in a multiple of kexsh iterations, ! . then try exceptional shifts. ! . otherwise use shifts provided by ! . stdlib${ii}$_claqr2 above or from the eigenvalues ! . of a trailing principal submatrix. ==== if( mod( ndfl, kexsh )==0_${ik}$ ) then ks = kbot - ns + 1_${ik}$ do i = kbot, ks + 1, -2 w( i ) = h( i, i ) + wilk1*cabs1( h( i, i-1 ) ) w( i-1 ) = w( i ) end do else ! ==== got ns/2 or fewer shifts? use stdlib_clahqr ! . on a trailing principal submatrix to ! . get more. (since ns<=nsmax<=(n-3)/6, ! . there is enough space below the subdiagonal ! . to fit an ns-by-ns scratch array.) ==== if( kbot-ks+1<=ns / 2_${ik}$ ) then ks = kbot - ns + 1_${ik}$ kt = n - ns + 1_${ik}$ call stdlib${ii}$_clacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh ) call stdlib${ii}$_clahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, w( ks )& , 1_${ik}$, 1_${ik}$, zdum,1_${ik}$, inf ) ks = ks + inf ! ==== in case of a rare qr failure use ! . eigenvalues of the trailing 2-by-2 ! . principal submatrix. scale to avoid ! . overflows, underflows and subnormals. ! . (the scale factor s can not be czero, ! . because h(kbot,kbot-1) is nonzero.) ==== if( ks>=kbot ) then s = cabs1( h( kbot-1, kbot-1 ) ) +cabs1( h( kbot, kbot-1 ) ) +cabs1( & h( kbot-1, kbot ) ) +cabs1( h( kbot, kbot ) ) aa = h( kbot-1, kbot-1 ) / s cc = h( kbot, kbot-1 ) / s bb = h( kbot-1, kbot ) / s dd = h( kbot, kbot ) / s tr2 = ( aa+dd ) / two det = ( aa-tr2 )*( dd-tr2 ) - bb*cc rtdisc = sqrt( -det ) w( kbot-1 ) = ( tr2+rtdisc )*s w( kbot ) = ( tr2-rtdisc )*s ks = kbot - 1_${ik}$ end if end if if( kbot-ks+1>ns ) then ! ==== sort the shifts (helps a little) ==== sorted = .false. do k = kbot, ks + 1, -1 if( sorted )go to 60 sorted = .true. do i = ks, k - 1 if( cabs1( w( i ) )<cabs1( w( i+1 ) ) )then sorted = .false. swap = w( i ) w( i ) = w( i+1 ) w( i+1 ) = swap end if end do end do 60 continue end if end if ! ==== if there are only two shifts, then use ! . only cone. ==== if( kbot-ks+1==2_${ik}$ ) then if( cabs1( w( kbot )-h( kbot, kbot ) )<cabs1( w( kbot-1 )-h( kbot, kbot ) )& ) then w( kbot-1 ) = w( kbot ) else w( kbot ) = w( kbot-1 ) end if end if ! ==== use up to ns of the the smallest magnitude ! . shifts. if there aren't ns shifts available, ! . then use them all, possibly dropping cone to ! . make the number of shifts even. ==== ns = min( ns, kbot-ks+1 ) ns = ns - mod( ns, 2_${ik}$ ) ks = kbot - ns + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep: ! . split workspace under the subdiagonal into ! . - a kdu-by-kdu work array u in the lower ! . left-hand-corner, ! . - a kdu-by-at-least-kdu-but-more-is-better ! . (kdu-by-nho) horizontal work array wh along ! . the bottom edge, ! . - and an at-least-kdu-but-more-is-better-by-kdu ! . (nve-by-kdu) vertical work wv arrow along ! . the left-hand-edge. ==== kdu = 2_${ik}$*ns ku = n - kdu + 1_${ik}$ kwh = kdu + 1_${ik}$ nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$ kwv = kdu + 4_${ik}$ nve = n - kdu - kwv + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep ==== call stdlib${ii}$_claqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,w( ks ), h, ldh, & iloz, ihiz, z, ldz, work,3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,nho, h( ku,& kwh ), ldh ) end if ! ==== note progress (or the lack of it). ==== if( ld>0_${ik}$ ) then ndfl = 1_${ik}$ else ndfl = ndfl + 1_${ik}$ end if ! ==== end of main loop ==== end do loop_70 ! ==== iteration limit exceeded. set info to show where ! . the problem occurred and exit. ==== info = kbot 80 continue end if ! ==== return the optimal value of lwork. ==== work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=sp) end subroutine stdlib${ii}$_claqr4 pure module subroutine stdlib${ii}$_zlaqr4( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, work,& !! ZLAQR4 implements one level of recursion for ZLAQR0. !! It is a complete implementation of the small bulge multi-shift !! QR algorithm. It may be called by ZLAQR0 and, for large enough !! deflation window size, it may be called by ZLAQR3. This !! subroutine is identical to ZLAQR0 except that it calls ZLAQR2 !! instead of ZLAQR3. !! ZLAQR4 computes the eigenvalues of a Hessenberg matrix H !! and, optionally, the matrices T and Z from the Schur decomposition !! H = Z T Z**H, where T is an upper triangular matrix (the !! Schur form), and Z is the unitary matrix of Schur vectors. !! Optionally Z may be postmultiplied into an input unitary !! matrix Q so that this routine can give the Schur factorization !! of a matrix A which has been reduced to the Hessenberg form H !! by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. lwork, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n integer(${ik}$), intent(out) :: info logical(lk), intent(in) :: wantt, wantz ! Array Arguments complex(dp), intent(inout) :: h(ldh,*), z(ldz,*) complex(dp), intent(out) :: w(*), work(*) ! ================================================================ ! Parameters integer(${ik}$), parameter :: ntiny = 15_${ik}$ integer(${ik}$), parameter :: kexnw = 5_${ik}$ integer(${ik}$), parameter :: kexsh = 6_${ik}$ real(dp), parameter :: wilk1 = 0.75_dp ! ==== matrices of order ntiny or smaller must be processed by ! . stdlib${ii}$_zlahqr because of insufficient subdiagonal scratch space. ! . (this is a hard limit.) ==== ! ==== exceptional deflation windows: try to cure rare ! . slow convergence by varying the size of the ! . deflation window after kexnw iterations. ==== ! ==== exceptional shifts: try to cure rare slow convergence ! . with ad-hoc exceptional shifts every kexsh iterations. ! . ==== ! ==== the constant wilk1 is used to form the exceptional ! . shifts. ==== ! Local Scalars complex(dp) :: aa, bb, cc, cdum, dd, det, rtdisc, swap, tr2 real(dp) :: s integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& nwmax, nwr, nwupbd logical(lk) :: sorted character(len=2) :: jbcmpz ! Local Arrays complex(dp) :: zdum(1_${ik}$,1_${ik}$) ! Intrinsic Functions ! Statement Functions real(dp) :: cabs1 ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) ! Executable Statements info = 0_${ik}$ ! ==== quick return for n = 0: nothing to do. ==== if( n==0_${ik}$ ) then work( 1_${ik}$ ) = cone return end if if( n<=ntiny ) then ! ==== tiny matrices must use stdlib_zlahqr. ==== lwkopt = 1_${ik}$ if( lwork/=-1_${ik}$ )call stdlib${ii}$_zlahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, & z, ldz, info ) else ! ==== use small bulge multi-shift qr with aggressive early ! . deflation on larger-than-tiny matrices. ==== ! ==== hope for the best. ==== info = 0_${ik}$ ! ==== set up job flags for stdlib${ii}$_ilaenv. ==== if( wantt ) then jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S' else jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E' end if if( wantz ) then jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V' else jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N' end if ! ==== nwr = recommended deflation window size. at this ! . point, n > ntiny = 15, so there is enough ! . subdiagonal workspace for nwr>=2 as required. ! . (in fact, there is enough subdiagonal space for ! . nwr>=4.) ==== nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) nwr = max( 2_${ik}$, nwr ) nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr ) ! ==== nsr = recommended number of simultaneous shifts. ! . at this point n > ntiny = 15, so there is at ! . enough subdiagonal workspace for nsr to be even ! . and greater than or equal to two as required. ==== nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo ) nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) ) ! ==== estimate optimal workspace ==== ! ==== workspace query call to stdlib${ii}$_zlaqr2 ==== call stdlib${ii}$_zlaqr2( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& ld, w, h, ldh, n, h, ldh, n, h,ldh, work, -1_${ik}$ ) ! ==== optimal workspace = max(stdlib${ii}$_zlaqr5, stdlib${ii}$_zlaqr2) ==== lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) ) ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=dp) return end if ! ==== stdlib${ii}$_zlahqr/stdlib${ii}$_zlaqr0 crossover point ==== nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) nmin = max( ntiny, nmin ) ! ==== nibble crossover point ==== nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) nibble = max( 0_${ik}$, nibble ) ! ==== accumulate reflections during ttswp? use block ! . 2-by-2 structure during matrix-matrix multiply? ==== kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) kacc22 = max( 0_${ik}$, kacc22 ) kacc22 = min( 2_${ik}$, kacc22 ) ! ==== nwmax = the largest possible deflation window for ! . which there is sufficient workspace. ==== nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ ) nw = nwmax ! ==== nsmax = the largest number of simultaneous shifts ! . for which there is sufficient workspace. ==== nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ ) nsmax = nsmax - mod( nsmax, 2_${ik}$ ) ! ==== ndfl: an iteration count restarted at deflation. ==== ndfl = 1_${ik}$ ! ==== itmax = iteration limit ==== itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) ) ! ==== last row and column in the active block ==== kbot = ihi ! ==== main loop ==== loop_70: do it = 1, itmax ! ==== done when kbot falls below ilo ==== if( kbot<ilo )go to 80 ! ==== locate active block ==== do k = kbot, ilo + 1, -1 if( h( k, k-1 )==czero )go to 20 end do k = ilo 20 continue ktop = k ! ==== select deflation window size: ! . typical case: ! . if possible and advisable, nibble the entire ! . active block. if not, use size min(nwr,nwmax) ! . or min(nwr+1,nwmax) depending upon which has ! . the smaller corresponding subdiagonal entry ! . (a heuristic). ! . exceptional case: ! . if there have been no deflations in kexnw or ! . more iterations, then vary the deflation window ! . size. at first, because, larger windows are, ! . in general, more powerful than smaller ones, ! . rapidly increase the window to the maximum possible. ! . then, gradually reduce the window size. ==== nh = kbot - ktop + 1_${ik}$ nwupbd = min( nh, nwmax ) if( ndfl<kexnw ) then nw = min( nwupbd, nwr ) else nw = min( nwupbd, 2_${ik}$*nw ) end if if( nw<nwmax ) then if( nw>=nh-1 ) then nw = nh else kwtop = kbot - nw + 1_${ik}$ if( cabs1( h( kwtop, kwtop-1 ) )>cabs1( h( kwtop-1, kwtop-2 ) ) )nw = nw + & 1_${ik}$ end if end if if( ndfl<kexnw ) then ndec = -1_${ik}$ else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then ndec = ndec + 1_${ik}$ if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$ nw = nw - ndec end if ! ==== aggressive early deflation: ! . split workspace under the subdiagonal into ! . - an nw-by-nw work array v in the lower ! . left-hand-corner, ! . - an nw-by-at-least-nw-but-more-is-better ! . (nw-by-nho) horizontal work array along ! . the bottom edge, ! . - an at-least-nw-but-more-is-better (nhv-by-nw) ! . vertical work array along the left-hand-edge. ! . ==== kv = n - nw + 1_${ik}$ kt = nw + 1_${ik}$ nho = ( n-nw-1 ) - kt + 1_${ik}$ kwv = nw + 2_${ik}$ nve = ( n-nw ) - kwv + 1_${ik}$ ! ==== aggressive early deflation ==== call stdlib${ii}$_zlaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & ls, ld, w, h( kv, 1_${ik}$ ), ldh, nho,h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh, work,& lwork ) ! ==== adjust kbot accounting for new deflations. ==== kbot = kbot - ld ! ==== ks points to the shifts. ==== ks = kbot - ls + 1_${ik}$ ! ==== skip an expensive qr sweep if there is a (partly ! . heuristic) reason to expect that many eigenvalues ! . will deflate without it. here, the qr sweep is ! . skipped if many eigenvalues have just been deflated ! . or if the remaining active block is small. if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& ) ) ) then ! ==== ns = nominal number of simultaneous shifts. ! . this may be lowered (slightly) if stdlib${ii}$_zlaqr2 ! . did not provide that many shifts. ==== ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) ) ns = ns - mod( ns, 2_${ik}$ ) ! ==== if there have been no deflations ! . in a multiple of kexsh iterations, ! . then try exceptional shifts. ! . otherwise use shifts provided by ! . stdlib${ii}$_zlaqr2 above or from the eigenvalues ! . of a trailing principal submatrix. ==== if( mod( ndfl, kexsh )==0_${ik}$ ) then ks = kbot - ns + 1_${ik}$ do i = kbot, ks + 1, -2 w( i ) = h( i, i ) + wilk1*cabs1( h( i, i-1 ) ) w( i-1 ) = w( i ) end do else ! ==== got ns/2 or fewer shifts? use stdlib_zlahqr ! . on a trailing principal submatrix to ! . get more. (since ns<=nsmax<=(n-3)/6, ! . there is enough space below the subdiagonal ! . to fit an ns-by-ns scratch array.) ==== if( kbot-ks+1<=ns / 2_${ik}$ ) then ks = kbot - ns + 1_${ik}$ kt = n - ns + 1_${ik}$ call stdlib${ii}$_zlacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh ) call stdlib${ii}$_zlahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, w( ks )& , 1_${ik}$, 1_${ik}$, zdum,1_${ik}$, inf ) ks = ks + inf ! ==== in case of a rare qr failure use ! . eigenvalues of the trailing 2-by-2 ! . principal submatrix. scale to avoid ! . overflows, underflows and subnormals. ! . (the scale factor s can not be czero, ! . because h(kbot,kbot-1) is nonzero.) ==== if( ks>=kbot ) then s = cabs1( h( kbot-1, kbot-1 ) ) +cabs1( h( kbot, kbot-1 ) ) +cabs1( & h( kbot-1, kbot ) ) +cabs1( h( kbot, kbot ) ) aa = h( kbot-1, kbot-1 ) / s cc = h( kbot, kbot-1 ) / s bb = h( kbot-1, kbot ) / s dd = h( kbot, kbot ) / s tr2 = ( aa+dd ) / two det = ( aa-tr2 )*( dd-tr2 ) - bb*cc rtdisc = sqrt( -det ) w( kbot-1 ) = ( tr2+rtdisc )*s w( kbot ) = ( tr2-rtdisc )*s ks = kbot - 1_${ik}$ end if end if if( kbot-ks+1>ns ) then ! ==== sort the shifts (helps a little) ==== sorted = .false. do k = kbot, ks + 1, -1 if( sorted )go to 60 sorted = .true. do i = ks, k - 1 if( cabs1( w( i ) )<cabs1( w( i+1 ) ) )then sorted = .false. swap = w( i ) w( i ) = w( i+1 ) w( i+1 ) = swap end if end do end do 60 continue end if end if ! ==== if there are only two shifts, then use ! . only cone. ==== if( kbot-ks+1==2_${ik}$ ) then if( cabs1( w( kbot )-h( kbot, kbot ) )<cabs1( w( kbot-1 )-h( kbot, kbot ) )& ) then w( kbot-1 ) = w( kbot ) else w( kbot ) = w( kbot-1 ) end if end if ! ==== use up to ns of the the smallest magnitude ! . shifts. if there aren't ns shifts available, ! . then use them all, possibly dropping cone to ! . make the number of shifts even. ==== ns = min( ns, kbot-ks+1 ) ns = ns - mod( ns, 2_${ik}$ ) ks = kbot - ns + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep: ! . split workspace under the subdiagonal into ! . - a kdu-by-kdu work array u in the lower ! . left-hand-corner, ! . - a kdu-by-at-least-kdu-but-more-is-better ! . (kdu-by-nho) horizontal work array wh along ! . the bottom edge, ! . - and an at-least-kdu-but-more-is-better-by-kdu ! . (nve-by-kdu) vertical work wv arrow along ! . the left-hand-edge. ==== kdu = 2_${ik}$*ns ku = n - kdu + 1_${ik}$ kwh = kdu + 1_${ik}$ nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$ kwv = kdu + 4_${ik}$ nve = n - kdu - kwv + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep ==== call stdlib${ii}$_zlaqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,w( ks ), h, ldh, & iloz, ihiz, z, ldz, work,3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,nho, h( ku,& kwh ), ldh ) end if ! ==== note progress (or the lack of it). ==== if( ld>0_${ik}$ ) then ndfl = 1_${ik}$ else ndfl = ndfl + 1_${ik}$ end if ! ==== end of main loop ==== end do loop_70 ! ==== iteration limit exceeded. set info to show where ! . the problem occurred and exit. ==== info = kbot 80 continue end if ! ==== return the optimal value of lwork. ==== work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=dp) end subroutine stdlib${ii}$_zlaqr4 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$laqr4( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, work,& !! ZLAQR4: implements one level of recursion for ZLAQR0. !! It is a complete implementation of the small bulge multi-shift !! QR algorithm. It may be called by ZLAQR0 and, for large enough !! deflation window size, it may be called by ZLAQR3. This !! subroutine is identical to ZLAQR0 except that it calls ZLAQR2 !! instead of ZLAQR3. !! ZLAQR4 computes the eigenvalues of a Hessenberg matrix H !! and, optionally, the matrices T and Z from the Schur decomposition !! H = Z T Z**H, where T is an upper triangular matrix (the !! Schur form), and Z is the unitary matrix of Schur vectors. !! Optionally Z may be postmultiplied into an input unitary !! matrix Q so that this routine can give the Schur factorization !! of a matrix A which has been reduced to the Hessenberg form H !! by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. lwork, info ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n integer(${ik}$), intent(out) :: info logical(lk), intent(in) :: wantt, wantz ! Array Arguments complex(${ck}$), intent(inout) :: h(ldh,*), z(ldz,*) complex(${ck}$), intent(out) :: w(*), work(*) ! ================================================================ ! Parameters integer(${ik}$), parameter :: ntiny = 15_${ik}$ integer(${ik}$), parameter :: kexnw = 5_${ik}$ integer(${ik}$), parameter :: kexsh = 6_${ik}$ real(${ck}$), parameter :: wilk1 = 0.75_${ck}$ ! ==== matrices of order ntiny or smaller must be processed by ! . stdlib${ii}$_${ci}$lahqr because of insufficient subdiagonal scratch space. ! . (this is a hard limit.) ==== ! ==== exceptional deflation windows: try to cure rare ! . slow convergence by varying the size of the ! . deflation window after kexnw iterations. ==== ! ==== exceptional shifts: try to cure rare slow convergence ! . with ad-hoc exceptional shifts every kexsh iterations. ! . ==== ! ==== the constant wilk1 is used to form the exceptional ! . shifts. ==== ! Local Scalars complex(${ck}$) :: aa, bb, cc, cdum, dd, det, rtdisc, swap, tr2 real(${ck}$) :: s integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, & kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,& nwmax, nwr, nwupbd logical(lk) :: sorted character(len=2) :: jbcmpz ! Local Arrays complex(${ck}$) :: zdum(1_${ik}$,1_${ik}$) ! Intrinsic Functions ! Statement Functions real(${ck}$) :: cabs1 ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=${ck}$) ) + abs( aimag( cdum ) ) ! Executable Statements info = 0_${ik}$ ! ==== quick return for n = 0: nothing to do. ==== if( n==0_${ik}$ ) then work( 1_${ik}$ ) = cone return end if if( n<=ntiny ) then ! ==== tiny matrices must use stdlib_${ci}$lahqr. ==== lwkopt = 1_${ik}$ if( lwork/=-1_${ik}$ )call stdlib${ii}$_${ci}$lahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, & z, ldz, info ) else ! ==== use small bulge multi-shift qr with aggressive early ! . deflation on larger-than-tiny matrices. ==== ! ==== hope for the best. ==== info = 0_${ik}$ ! ==== set up job flags for stdlib${ii}$_ilaenv. ==== if( wantt ) then jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S' else jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E' end if if( wantz ) then jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V' else jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N' end if ! ==== nwr = recommended deflation window size. at this ! . point, n > ntiny = 15, so there is enough ! . subdiagonal workspace for nwr>=2 as required. ! . (in fact, there is enough subdiagonal space for ! . nwr>=4.) ==== nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) nwr = max( 2_${ik}$, nwr ) nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr ) ! ==== nsr = recommended number of simultaneous shifts. ! . at this point n > ntiny = 15, so there is at ! . enough subdiagonal workspace for nsr to be even ! . and greater than or equal to two as required. ==== nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo ) nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) ) ! ==== estimate optimal workspace ==== ! ==== workspace query call to stdlib${ii}$_${ci}$laqr2 ==== call stdlib${ii}$_${ci}$laqr2( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,& ld, w, h, ldh, n, h, ldh, n, h,ldh, work, -1_${ik}$ ) ! ==== optimal workspace = max(stdlib${ii}$_${ci}$laqr5, stdlib${ii}$_${ci}$laqr2) ==== lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) ) ! ==== quick return in case of workspace query. ==== if( lwork==-1_${ik}$ ) then work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=${ck}$) return end if ! ==== stdlib${ii}$_${ci}$lahqr/stdlib${ii}$_${ci}$laqr0 crossover point ==== nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) nmin = max( ntiny, nmin ) ! ==== nibble crossover point ==== nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) nibble = max( 0_${ik}$, nibble ) ! ==== accumulate reflections during ttswp? use block ! . 2-by-2 structure during matrix-matrix multiply? ==== kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork ) kacc22 = max( 0_${ik}$, kacc22 ) kacc22 = min( 2_${ik}$, kacc22 ) ! ==== nwmax = the largest possible deflation window for ! . which there is sufficient workspace. ==== nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ ) nw = nwmax ! ==== nsmax = the largest number of simultaneous shifts ! . for which there is sufficient workspace. ==== nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ ) nsmax = nsmax - mod( nsmax, 2_${ik}$ ) ! ==== ndfl: an iteration count restarted at deflation. ==== ndfl = 1_${ik}$ ! ==== itmax = iteration limit ==== itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) ) ! ==== last row and column in the active block ==== kbot = ihi ! ==== main loop ==== loop_70: do it = 1, itmax ! ==== done when kbot falls below ilo ==== if( kbot<ilo )go to 80 ! ==== locate active block ==== do k = kbot, ilo + 1, -1 if( h( k, k-1 )==czero )go to 20 end do k = ilo 20 continue ktop = k ! ==== select deflation window size: ! . typical case: ! . if possible and advisable, nibble the entire ! . active block. if not, use size min(nwr,nwmax) ! . or min(nwr+1,nwmax) depending upon which has ! . the smaller corresponding subdiagonal entry ! . (a heuristic). ! . exceptional case: ! . if there have been no deflations in kexnw or ! . more iterations, then vary the deflation window ! . size. at first, because, larger windows are, ! . in general, more powerful than smaller ones, ! . rapidly increase the window to the maximum possible. ! . then, gradually reduce the window size. ==== nh = kbot - ktop + 1_${ik}$ nwupbd = min( nh, nwmax ) if( ndfl<kexnw ) then nw = min( nwupbd, nwr ) else nw = min( nwupbd, 2_${ik}$*nw ) end if if( nw<nwmax ) then if( nw>=nh-1 ) then nw = nh else kwtop = kbot - nw + 1_${ik}$ if( cabs1( h( kwtop, kwtop-1 ) )>cabs1( h( kwtop-1, kwtop-2 ) ) )nw = nw + & 1_${ik}$ end if end if if( ndfl<kexnw ) then ndec = -1_${ik}$ else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then ndec = ndec + 1_${ik}$ if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$ nw = nw - ndec end if ! ==== aggressive early deflation: ! . split workspace under the subdiagonal into ! . - an nw-by-nw work array v in the lower ! . left-hand-corner, ! . - an nw-by-at-least-nw-but-more-is-better ! . (nw-by-nho) horizontal work array along ! . the bottom edge, ! . - an at-least-nw-but-more-is-better (nhv-by-nw) ! . vertical work array along the left-hand-edge. ! . ==== kv = n - nw + 1_${ik}$ kt = nw + 1_${ik}$ nho = ( n-nw-1 ) - kt + 1_${ik}$ kwv = nw + 2_${ik}$ nve = ( n-nw ) - kwv + 1_${ik}$ ! ==== aggressive early deflation ==== call stdlib${ii}$_${ci}$laqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, & ls, ld, w, h( kv, 1_${ik}$ ), ldh, nho,h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh, work,& lwork ) ! ==== adjust kbot accounting for new deflations. ==== kbot = kbot - ld ! ==== ks points to the shifts. ==== ks = kbot - ls + 1_${ik}$ ! ==== skip an expensive qr sweep if there is a (partly ! . heuristic) reason to expect that many eigenvalues ! . will deflate without it. here, the qr sweep is ! . skipped if many eigenvalues have just been deflated ! . or if the remaining active block is small. if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )& ) ) ) then ! ==== ns = nominal number of simultaneous shifts. ! . this may be lowered (slightly) if stdlib${ii}$_${ci}$laqr2 ! . did not provide that many shifts. ==== ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) ) ns = ns - mod( ns, 2_${ik}$ ) ! ==== if there have been no deflations ! . in a multiple of kexsh iterations, ! . then try exceptional shifts. ! . otherwise use shifts provided by ! . stdlib${ii}$_${ci}$laqr2 above or from the eigenvalues ! . of a trailing principal submatrix. ==== if( mod( ndfl, kexsh )==0_${ik}$ ) then ks = kbot - ns + 1_${ik}$ do i = kbot, ks + 1, -2 w( i ) = h( i, i ) + wilk1*cabs1( h( i, i-1 ) ) w( i-1 ) = w( i ) end do else ! ==== got ns/2 or fewer shifts? use stdlib_${ci}$lahqr ! . on a trailing principal submatrix to ! . get more. (since ns<=nsmax<=(n-3)/6, ! . there is enough space below the subdiagonal ! . to fit an ns-by-ns scratch array.) ==== if( kbot-ks+1<=ns / 2_${ik}$ ) then ks = kbot - ns + 1_${ik}$ kt = n - ns + 1_${ik}$ call stdlib${ii}$_${ci}$lacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh ) call stdlib${ii}$_${ci}$lahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, w( ks )& , 1_${ik}$, 1_${ik}$, zdum,1_${ik}$, inf ) ks = ks + inf ! ==== in case of a rare qr failure use ! . eigenvalues of the trailing 2-by-2 ! . principal submatrix. scale to avoid ! . overflows, underflows and subnormals. ! . (the scale factor s can not be czero, ! . because h(kbot,kbot-1) is nonzero.) ==== if( ks>=kbot ) then s = cabs1( h( kbot-1, kbot-1 ) ) +cabs1( h( kbot, kbot-1 ) ) +cabs1( & h( kbot-1, kbot ) ) +cabs1( h( kbot, kbot ) ) aa = h( kbot-1, kbot-1 ) / s cc = h( kbot, kbot-1 ) / s bb = h( kbot-1, kbot ) / s dd = h( kbot, kbot ) / s tr2 = ( aa+dd ) / two det = ( aa-tr2 )*( dd-tr2 ) - bb*cc rtdisc = sqrt( -det ) w( kbot-1 ) = ( tr2+rtdisc )*s w( kbot ) = ( tr2-rtdisc )*s ks = kbot - 1_${ik}$ end if end if if( kbot-ks+1>ns ) then ! ==== sort the shifts (helps a little) ==== sorted = .false. do k = kbot, ks + 1, -1 if( sorted )go to 60 sorted = .true. do i = ks, k - 1 if( cabs1( w( i ) )<cabs1( w( i+1 ) ) )then sorted = .false. swap = w( i ) w( i ) = w( i+1 ) w( i+1 ) = swap end if end do end do 60 continue end if end if ! ==== if there are only two shifts, then use ! . only cone. ==== if( kbot-ks+1==2_${ik}$ ) then if( cabs1( w( kbot )-h( kbot, kbot ) )<cabs1( w( kbot-1 )-h( kbot, kbot ) )& ) then w( kbot-1 ) = w( kbot ) else w( kbot ) = w( kbot-1 ) end if end if ! ==== use up to ns of the the smallest magnitude ! . shifts. if there aren't ns shifts available, ! . then use them all, possibly dropping cone to ! . make the number of shifts even. ==== ns = min( ns, kbot-ks+1 ) ns = ns - mod( ns, 2_${ik}$ ) ks = kbot - ns + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep: ! . split workspace under the subdiagonal into ! . - a kdu-by-kdu work array u in the lower ! . left-hand-corner, ! . - a kdu-by-at-least-kdu-but-more-is-better ! . (kdu-by-nho) horizontal work array wh along ! . the bottom edge, ! . - and an at-least-kdu-but-more-is-better-by-kdu ! . (nve-by-kdu) vertical work wv arrow along ! . the left-hand-edge. ==== kdu = 2_${ik}$*ns ku = n - kdu + 1_${ik}$ kwh = kdu + 1_${ik}$ nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$ kwv = kdu + 4_${ik}$ nve = n - kdu - kwv + 1_${ik}$ ! ==== small-bulge multi-shift qr sweep ==== call stdlib${ii}$_${ci}$laqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,w( ks ), h, ldh, & iloz, ihiz, z, ldz, work,3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,nho, h( ku,& kwh ), ldh ) end if ! ==== note progress (or the lack of it). ==== if( ld>0_${ik}$ ) then ndfl = 1_${ik}$ else ndfl = ndfl + 1_${ik}$ end if ! ==== end of main loop ==== end do loop_70 ! ==== iteration limit exceeded. set info to show where ! . the problem occurred and exit. ==== info = kbot 80 continue end if ! ==== return the optimal value of lwork. ==== work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=${ck}$) end subroutine stdlib${ii}$_${ci}$laqr4 #:endif #:endfor pure module subroutine stdlib${ii}$_slaqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts,sr, si, h, ldh, & !! SLAQR5 , called by SLAQR0, performs a !! single small-bulge multi-shift QR sweep. iloz, ihiz, z, ldz, v, ldv, u,ldu, nv, wv, ldwv, nh, wh, ldwh ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv, ldwh, ldwv, & ldz, n, nh, nshfts, nv logical(lk), intent(in) :: wantt, wantz ! Array Arguments real(sp), intent(inout) :: h(ldh,*), si(*), sr(*), z(ldz,*) real(sp), intent(out) :: u(ldu,*), v(ldv,*), wh(ldwh,*), wv(ldwv,*) ! ================================================================ ! Local Scalars real(sp) :: alpha, beta, h11, h12, h21, h22, refsum, safmax, safmin, scl, smlnum, swap,& tst1, tst2, ulp integer(${ik}$) :: i, i2, i4, incol, j, jbot, jcol, jlen, jrow, jtop, k, k1, kdu, kms, & krcol, m, m22, mbot, mtop, nbmps, ndcol, ns, nu logical(lk) :: accum, bmp22 ! Intrinsic Functions ! Local Arrays real(sp) :: vt(3_${ik}$) ! Executable Statements ! ==== if there are no shifts, then there is nothing to do. ==== if( nshfts<2 )return ! ==== if the active block is empty or 1-by-1, then there ! . is nothing to do. ==== if( ktop>=kbot )return ! ==== shuffle shifts into pairs of real shifts and pairs ! . of complex conjugate shifts assuming complex ! . conjugate shifts are already adjacent to one ! . another. ==== do i = 1, nshfts - 2, 2 if( si( i )/=-si( i+1 ) ) then swap = sr( i ) sr( i ) = sr( i+1 ) sr( i+1 ) = sr( i+2 ) sr( i+2 ) = swap swap = si( i ) si( i ) = si( i+1 ) si( i+1 ) = si( i+2 ) si( i+2 ) = swap end if end do ! ==== nshfts is supposed to be even, but if it is odd, ! . then simply reduce it by one. the shuffle above ! . ensures that the dropped shift is real and that ! . the remaining shifts are paired. ==== ns = nshfts - mod( nshfts, 2_${ik}$ ) ! ==== machine constants for deflation ==== safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_slabad( safmin, safmax ) ulp = stdlib${ii}$_slamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=sp) / ulp ) ! ==== use accumulated reflections to update far-from-diagonal ! . entries ? ==== accum = ( kacc22==1_${ik}$ ) .or. ( kacc22==2_${ik}$ ) ! ==== clear trash ==== if( ktop+2<=kbot )h( ktop+2, ktop ) = zero ! ==== nbmps = number of 2-shift bulges in the chain ==== nbmps = ns / 2_${ik}$ ! ==== kdu = width of slab ==== kdu = 4_${ik}$*nbmps ! ==== create and chase chains of nbmps bulges ==== loop_180: do incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps ! jtop = index from which updates from the right start. if( accum ) then jtop = max( ktop, incol ) else if( wantt ) then jtop = 1_${ik}$ else jtop = ktop end if ndcol = incol + kdu if( accum )call stdlib${ii}$_slaset( 'ALL', kdu, kdu, zero, one, u, ldu ) ! ==== near-the-diagonal bulge chase. the following loop ! . performs the near-the-diagonal part of a small bulge ! . multi-shift qr sweep. each 4*nbmps column diagonal ! . chunk extends from column incol to column ndcol ! . (including both column incol and column ndcol). the ! . following loop chases a 2*nbmps+1 column long chain of ! . nbmps bulges 2*nbmps-1 columns to the right. (incol ! . may be less than ktop and and ndcol may be greater than ! . kbot indicating phantom columns from which to chase ! . bulges before they are actually introduced or to which ! . to chase bulges beyond column kbot.) ==== loop_145: do krcol = incol, min( incol+2*nbmps-1, kbot-2 ) ! ==== bulges number mtop to mbot are active double implicit ! . shift bulges. there may or may not also be small ! . 2-by-2 bulge, if there is room. the inactive bulges ! . (if any) must wait until the active bulges have moved ! . down the diagonal to make room. the phantom matrix ! . paradigm described above helps keep track. ==== mtop = max( 1_${ik}$, ( ktop-krcol ) / 2_${ik}$+1 ) mbot = min( nbmps, ( kbot-krcol-1 ) / 2_${ik}$ ) m22 = mbot + 1_${ik}$ bmp22 = ( mbot<nbmps ) .and. ( krcol+2*( m22-1 ) )==( kbot-2 ) ! ==== generate reflections to chase the chain right ! . one column. (the minimum value of k is ktop-1.) ==== if ( bmp22 ) then ! ==== special case: 2-by-2 reflection at bottom treated ! . separately ==== k = krcol + 2_${ik}$*( m22-1 ) if( k==ktop-1 ) then call stdlib${ii}$_slaqr1( 2_${ik}$, h( k+1, k+1 ), ldh, sr( 2_${ik}$*m22-1 ),si( 2_${ik}$*m22-1 ), sr(& 2_${ik}$*m22 ), si( 2_${ik}$*m22 ),v( 1_${ik}$, m22 ) ) beta = v( 1_${ik}$, m22 ) call stdlib${ii}$_slarfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) ) else beta = h( k+1, k ) v( 2_${ik}$, m22 ) = h( k+2, k ) call stdlib${ii}$_slarfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) ) h( k+1, k ) = beta h( k+2, k ) = zero end if ! ==== perform update from right within ! . computational window. ==== do j = jtop, min( kbot, k+3 ) refsum = v( 1_${ik}$, m22 )*( h( j, k+1 )+v( 2_${ik}$, m22 )*h( j, k+2 ) ) h( j, k+1 ) = h( j, k+1 ) - refsum h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2_${ik}$, m22 ) end do ! ==== perform update from left within ! . computational window. ==== if( accum ) then jbot = min( ndcol, kbot ) else if( wantt ) then jbot = n else jbot = kbot end if do j = k+1, jbot refsum = v( 1_${ik}$, m22 )*( h( k+1, j )+v( 2_${ik}$, m22 )*h( k+2, j ) ) h( k+1, j ) = h( k+1, j ) - refsum h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m22 ) end do ! ==== the following convergence test requires that ! . the tradition small-compared-to-nearby-diagonals ! . criterion and the ahues ! . criteria both be satisfied. the latter improves ! . accuracy in some examples. falling back on an ! . alternate convergence criterion when tst1 or tst2 ! . is zero (as done here) is traditional but probably ! . unnecessary. ==== if( k>=ktop ) then if( h( k+1, k )/=zero ) then tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) ) if( tst1==zero ) then if( k>=ktop+1 )tst1 = tst1 + abs( h( k, k-1 ) ) if( k>=ktop+2 )tst1 = tst1 + abs( h( k, k-2 ) ) if( k>=ktop+3 )tst1 = tst1 + abs( h( k, k-3 ) ) if( k<=kbot-2 )tst1 = tst1 + abs( h( k+2, k+1 ) ) if( k<=kbot-3 )tst1 = tst1 + abs( h( k+3, k+1 ) ) if( k<=kbot-4 )tst1 = tst1 + abs( h( k+4, k+1 ) ) end if if( abs( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then h12 = max( abs( h( k+1, k ) ),abs( h( k, k+1 ) ) ) h21 = min( abs( h( k+1, k ) ),abs( h( k, k+1 ) ) ) h11 = max( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) h22 = min( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) scl = h11 + h12 tst2 = h22*( h11 / scl ) if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) ) & then h( k+1, k ) = zero end if end if end if end if ! ==== accumulate orthogonal transformations. ==== if( accum ) then kms = k - incol do j = max( 1, ktop-incol ), kdu refsum = v( 1_${ik}$, m22 )*( u( j, kms+1 )+v( 2_${ik}$, m22 )*u( j, kms+2 ) ) u( j, kms+1 ) = u( j, kms+1 ) - refsum u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2_${ik}$, m22 ) end do else if( wantz ) then do j = iloz, ihiz refsum = v( 1_${ik}$, m22 )*( z( j, k+1 )+v( 2_${ik}$, m22 )*z( j, k+2 ) ) z( j, k+1 ) = z( j, k+1 ) - refsum z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2_${ik}$, m22 ) end do end if end if ! ==== normal case: chain of 3-by-3 reflections ==== loop_80: do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) if( k==ktop-1 ) then call stdlib${ii}$_slaqr1( 3_${ik}$, h( ktop, ktop ), ldh, sr( 2_${ik}$*m-1 ),si( 2_${ik}$*m-1 ), sr( & 2_${ik}$*m ), si( 2_${ik}$*m ),v( 1_${ik}$, m ) ) alpha = v( 1_${ik}$, m ) call stdlib${ii}$_slarfg( 3_${ik}$, alpha, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) ) else ! ==== perform delayed transformation of row below ! . mth bulge. exploit fact that first two elements ! . of row are actually zero. ==== refsum = v( 1_${ik}$, m )*v( 3_${ik}$, m )*h( k+3, k+2 ) h( k+3, k ) = -refsum h( k+3, k+1 ) = -refsum*v( 2_${ik}$, m ) h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*v( 3_${ik}$, m ) ! ==== calculate reflection to move ! . mth bulge one step. ==== beta = h( k+1, k ) v( 2_${ik}$, m ) = h( k+2, k ) v( 3_${ik}$, m ) = h( k+3, k ) call stdlib${ii}$_slarfg( 3_${ik}$, beta, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) ) ! ==== a bulge may collapse because of vigilant ! . deflation or destructive underflow. in the ! . underflow case, try the two-small-subdiagonals ! . trick to try to reinflate the bulge. ==== if( h( k+3, k )/=zero .or. h( k+3, k+1 )/=zero .or. h( k+3, k+2 )==zero ) & then ! ==== typical case: not collapsed (yet). ==== h( k+1, k ) = beta h( k+2, k ) = zero h( k+3, k ) = zero else ! ==== atypical case: collapsed. attempt to ! . reintroduce ignoring h(k+1,k) and h(k+2,k). ! . if the fill resulting from the new ! . reflector is too large, then abandon it. ! . otherwise, use the new one. ==== call stdlib${ii}$_slaqr1( 3_${ik}$, h( k+1, k+1 ), ldh, sr( 2_${ik}$*m-1 ),si( 2_${ik}$*m-1 ), sr( & 2_${ik}$*m ), si( 2_${ik}$*m ),vt ) alpha = vt( 1_${ik}$ ) call stdlib${ii}$_slarfg( 3_${ik}$, alpha, vt( 2_${ik}$ ), 1_${ik}$, vt( 1_${ik}$ ) ) refsum = vt( 1_${ik}$ )*( h( k+1, k )+vt( 2_${ik}$ )*h( k+2, k ) ) if( abs( h( k+2, k )-refsum*vt( 2_${ik}$ ) )+abs( refsum*vt( 3_${ik}$ ) )>ulp*( abs( & h( k, k ) )+abs( h( k+1,k+1 ) )+abs( h( k+2, k+2 ) ) ) ) then ! ==== starting a new bulge here would ! . create non-negligible fill. use ! . the old one with trepidation. ==== h( k+1, k ) = beta h( k+2, k ) = zero h( k+3, k ) = zero else ! ==== starting a new bulge here would ! . create only negligible fill. ! . replace the old reflector with ! . the new one. ==== h( k+1, k ) = h( k+1, k ) - refsum h( k+2, k ) = zero h( k+3, k ) = zero v( 1_${ik}$, m ) = vt( 1_${ik}$ ) v( 2_${ik}$, m ) = vt( 2_${ik}$ ) v( 3_${ik}$, m ) = vt( 3_${ik}$ ) end if end if end if ! ==== apply reflection from the right and ! . the first column of update from the left. ! . these updates are required for the vigilant ! . deflation check. we still delay most of the ! . updates from the left for efficiency. ==== do j = jtop, min( kbot, k+3 ) refsum = v( 1_${ik}$, m )*( h( j, k+1 )+v( 2_${ik}$, m )*h( j, k+2 )+v( 3_${ik}$, m )*h( j, k+3 & ) ) h( j, k+1 ) = h( j, k+1 ) - refsum h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2_${ik}$, m ) h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3_${ik}$, m ) end do ! ==== perform update from left for subsequent ! . column. ==== refsum = v( 1_${ik}$, m )*( h( k+1, k+1 )+v( 2_${ik}$, m )*h( k+2, k+1 )+v( 3_${ik}$, m )*h( k+3, & k+1 ) ) h( k+1, k+1 ) = h( k+1, k+1 ) - refsum h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2_${ik}$, m ) h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3_${ik}$, m ) ! ==== the following convergence test requires that ! . the tradition small-compared-to-nearby-diagonals ! . criterion and the ahues ! . criteria both be satisfied. the latter improves ! . accuracy in some examples. falling back on an ! . alternate convergence criterion when tst1 or tst2 ! . is zero (as done here) is traditional but probably ! . unnecessary. ==== if( k<ktop)cycle if( h( k+1, k )/=zero ) then tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) ) if( tst1==zero ) then if( k>=ktop+1 )tst1 = tst1 + abs( h( k, k-1 ) ) if( k>=ktop+2 )tst1 = tst1 + abs( h( k, k-2 ) ) if( k>=ktop+3 )tst1 = tst1 + abs( h( k, k-3 ) ) if( k<=kbot-2 )tst1 = tst1 + abs( h( k+2, k+1 ) ) if( k<=kbot-3 )tst1 = tst1 + abs( h( k+3, k+1 ) ) if( k<=kbot-4 )tst1 = tst1 + abs( h( k+4, k+1 ) ) end if if( abs( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) ) h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) ) h11 = max( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) h22 = min( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) scl = h11 + h12 tst2 = h22*( h11 / scl ) if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) ) & then h( k+1, k ) = zero end if end if end if end do loop_80 ! ==== multiply h by reflections from the left ==== if( accum ) then jbot = min( ndcol, kbot ) else if( wantt ) then jbot = n else jbot = kbot end if do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) do j = max( ktop, krcol + 2*m ), jbot refsum = v( 1_${ik}$, m )*( h( k+1, j )+v( 2_${ik}$, m )*h( k+2, j )+v( 3_${ik}$, m )*h( k+3, j & ) ) h( k+1, j ) = h( k+1, j ) - refsum h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m ) h( k+3, j ) = h( k+3, j ) - refsum*v( 3_${ik}$, m ) end do end do ! ==== accumulate orthogonal transformations. ==== if( accum ) then ! ==== accumulate u. (if needed, update z later ! . with an efficient matrix-matrix ! . multiply.) ==== do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) kms = k - incol i2 = max( 1_${ik}$, ktop-incol ) i2 = max( i2, kms-(krcol-incol)+1_${ik}$ ) i4 = min( kdu, krcol + 2_${ik}$*( mbot-1 ) - incol + 5_${ik}$ ) do j = i2, i4 refsum = v( 1_${ik}$, m )*( u( j, kms+1 )+v( 2_${ik}$, m )*u( j, kms+2 )+v( 3_${ik}$, m )*u( & j, kms+3 ) ) u( j, kms+1 ) = u( j, kms+1 ) - refsum u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2_${ik}$, m ) u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3_${ik}$, m ) end do end do else if( wantz ) then ! ==== u is not accumulated, so update z ! . now by multiplying by reflections ! . from the right. ==== do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) do j = iloz, ihiz refsum = v( 1_${ik}$, m )*( z( j, k+1 )+v( 2_${ik}$, m )*z( j, k+2 )+v( 3_${ik}$, m )*z( j, & k+3 ) ) z( j, k+1 ) = z( j, k+1 ) - refsum z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2_${ik}$, m ) z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3_${ik}$, m ) end do end do end if ! ==== end of near-the-diagonal bulge chase. ==== end do loop_145 ! ==== use u (if accumulated) to update far-from-diagonal ! . entries in h. if required, use u to update z as ! . well. ==== if( accum ) then if( wantt ) then jtop = 1_${ik}$ jbot = n else jtop = ktop jbot = kbot end if k1 = max( 1_${ik}$, ktop-incol ) nu = ( kdu-max( 0_${ik}$, ndcol-kbot ) ) - k1 + 1_${ik}$ ! ==== horizontal multiply ==== do jcol = min( ndcol, kbot ) + 1, jbot, nh jlen = min( nh, jbot-jcol+1 ) call stdlib${ii}$_sgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),ldu, h( incol+k1, & jcol ), ldh, zero, wh,ldwh ) call stdlib${ii}$_slacpy( 'ALL', nu, jlen, wh, ldwh,h( incol+k1, jcol ), ldh ) end do ! ==== vertical multiply ==== do jrow = jtop, max( ktop, incol ) - 1, nv jlen = min( nv, max( ktop, incol )-jrow ) call stdlib${ii}$_sgemm( 'N', 'N', jlen, nu, nu, one,h( jrow, incol+k1 ), ldh, u( & k1, k1 ),ldu, zero, wv, ldwv ) call stdlib${ii}$_slacpy( 'ALL', jlen, nu, wv, ldwv,h( jrow, incol+k1 ), ldh ) end do ! ==== z multiply (also vertical) ==== if( wantz ) then do jrow = iloz, ihiz, nv jlen = min( nv, ihiz-jrow+1 ) call stdlib${ii}$_sgemm( 'N', 'N', jlen, nu, nu, one,z( jrow, incol+k1 ), ldz, u(& k1, k1 ),ldu, zero, wv, ldwv ) call stdlib${ii}$_slacpy( 'ALL', jlen, nu, wv, ldwv,z( jrow, incol+k1 ), ldz ) end do end if end if end do loop_180 end subroutine stdlib${ii}$_slaqr5 pure module subroutine stdlib${ii}$_dlaqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts,sr, si, h, ldh, & !! DLAQR5 , called by DLAQR0, performs a !! single small-bulge multi-shift QR sweep. iloz, ihiz, z, ldz, v, ldv, u,ldu, nv, wv, ldwv, nh, wh, ldwh ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv, ldwh, ldwv, & ldz, n, nh, nshfts, nv logical(lk), intent(in) :: wantt, wantz ! Array Arguments real(dp), intent(inout) :: h(ldh,*), si(*), sr(*), z(ldz,*) real(dp), intent(out) :: u(ldu,*), v(ldv,*), wh(ldwh,*), wv(ldwv,*) ! ================================================================ ! Local Scalars real(dp) :: alpha, beta, h11, h12, h21, h22, refsum, safmax, safmin, scl, smlnum, swap,& tst1, tst2, ulp integer(${ik}$) :: i, i2, i4, incol, j, jbot, jcol, jlen, jrow, jtop, k, k1, kdu, kms, & krcol, m, m22, mbot, mtop, nbmps, ndcol, ns, nu logical(lk) :: accum, bmp22 ! Intrinsic Functions ! Local Arrays real(dp) :: vt(3_${ik}$) ! Executable Statements ! ==== if there are no shifts, then there is nothing to do. ==== if( nshfts<2 )return ! ==== if the active block is empty or 1-by-1, then there ! . is nothing to do. ==== if( ktop>=kbot )return ! ==== shuffle shifts into pairs of real shifts and pairs ! . of complex conjugate shifts assuming complex ! . conjugate shifts are already adjacent to one ! . another. ==== do i = 1, nshfts - 2, 2 if( si( i )/=-si( i+1 ) ) then swap = sr( i ) sr( i ) = sr( i+1 ) sr( i+1 ) = sr( i+2 ) sr( i+2 ) = swap swap = si( i ) si( i ) = si( i+1 ) si( i+1 ) = si( i+2 ) si( i+2 ) = swap end if end do ! ==== nshfts is supposed to be even, but if it is odd, ! . then simply reduce it by one. the shuffle above ! . ensures that the dropped shift is real and that ! . the remaining shifts are paired. ==== ns = nshfts - mod( nshfts, 2_${ik}$ ) ! ==== machine constants for deflation ==== safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_dlabad( safmin, safmax ) ulp = stdlib${ii}$_dlamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=dp) / ulp ) ! ==== use accumulated reflections to update far-from-diagonal ! . entries ? ==== accum = ( kacc22==1_${ik}$ ) .or. ( kacc22==2_${ik}$ ) ! ==== clear trash ==== if( ktop+2<=kbot )h( ktop+2, ktop ) = zero ! ==== nbmps = number of 2-shift bulges in the chain ==== nbmps = ns / 2_${ik}$ ! ==== kdu = width of slab ==== kdu = 4_${ik}$*nbmps ! ==== create and chase chains of nbmps bulges ==== loop_180: do incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps ! jtop = index from which updates from the right start. if( accum ) then jtop = max( ktop, incol ) else if( wantt ) then jtop = 1_${ik}$ else jtop = ktop end if ndcol = incol + kdu if( accum )call stdlib${ii}$_dlaset( 'ALL', kdu, kdu, zero, one, u, ldu ) ! ==== near-the-diagonal bulge chase. the following loop ! . performs the near-the-diagonal part of a small bulge ! . multi-shift qr sweep. each 4*nbmps column diagonal ! . chunk extends from column incol to column ndcol ! . (including both column incol and column ndcol). the ! . following loop chases a 2*nbmps+1 column long chain of ! . nbmps bulges 2*nbmps columns to the right. (incol ! . may be less than ktop and and ndcol may be greater than ! . kbot indicating phantom columns from which to chase ! . bulges before they are actually introduced or to which ! . to chase bulges beyond column kbot.) ==== loop_145: do krcol = incol, min( incol+2*nbmps-1, kbot-2 ) ! ==== bulges number mtop to mbot are active double implicit ! . shift bulges. there may or may not also be small ! . 2-by-2 bulge, if there is room. the inactive bulges ! . (if any) must wait until the active bulges have moved ! . down the diagonal to make room. the phantom matrix ! . paradigm described above helps keep track. ==== mtop = max( 1_${ik}$, ( ktop-krcol ) / 2_${ik}$+1 ) mbot = min( nbmps, ( kbot-krcol-1 ) / 2_${ik}$ ) m22 = mbot + 1_${ik}$ bmp22 = ( mbot<nbmps ) .and. ( krcol+2*( m22-1 ) )==( kbot-2 ) ! ==== generate reflections to chase the chain right ! . one column. (the minimum value of k is ktop-1.) ==== if ( bmp22 ) then ! ==== special case: 2-by-2 reflection at bottom treated ! . separately ==== k = krcol + 2_${ik}$*( m22-1 ) if( k==ktop-1 ) then call stdlib${ii}$_dlaqr1( 2_${ik}$, h( k+1, k+1 ), ldh, sr( 2_${ik}$*m22-1 ),si( 2_${ik}$*m22-1 ), sr(& 2_${ik}$*m22 ), si( 2_${ik}$*m22 ),v( 1_${ik}$, m22 ) ) beta = v( 1_${ik}$, m22 ) call stdlib${ii}$_dlarfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) ) else beta = h( k+1, k ) v( 2_${ik}$, m22 ) = h( k+2, k ) call stdlib${ii}$_dlarfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) ) h( k+1, k ) = beta h( k+2, k ) = zero end if ! ==== perform update from right within ! . computational window. ==== do j = jtop, min( kbot, k+3 ) refsum = v( 1_${ik}$, m22 )*( h( j, k+1 )+v( 2_${ik}$, m22 )*h( j, k+2 ) ) h( j, k+1 ) = h( j, k+1 ) - refsum h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2_${ik}$, m22 ) end do ! ==== perform update from left within ! . computational window. ==== if( accum ) then jbot = min( ndcol, kbot ) else if( wantt ) then jbot = n else jbot = kbot end if do j = k+1, jbot refsum = v( 1_${ik}$, m22 )*( h( k+1, j )+v( 2_${ik}$, m22 )*h( k+2, j ) ) h( k+1, j ) = h( k+1, j ) - refsum h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m22 ) end do ! ==== the following convergence test requires that ! . the tradition small-compared-to-nearby-diagonals ! . criterion and the ahues ! . criteria both be satisfied. the latter improves ! . accuracy in some examples. falling back on an ! . alternate convergence criterion when tst1 or tst2 ! . is zero (as done here) is traditional but probably ! . unnecessary. ==== if( k>=ktop ) then if( h( k+1, k )/=zero ) then tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) ) if( tst1==zero ) then if( k>=ktop+1 )tst1 = tst1 + abs( h( k, k-1 ) ) if( k>=ktop+2 )tst1 = tst1 + abs( h( k, k-2 ) ) if( k>=ktop+3 )tst1 = tst1 + abs( h( k, k-3 ) ) if( k<=kbot-2 )tst1 = tst1 + abs( h( k+2, k+1 ) ) if( k<=kbot-3 )tst1 = tst1 + abs( h( k+3, k+1 ) ) if( k<=kbot-4 )tst1 = tst1 + abs( h( k+4, k+1 ) ) end if if( abs( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) ) then h12 = max( abs( h( k+1, k ) ),abs( h( k, k+1 ) ) ) h21 = min( abs( h( k+1, k ) ),abs( h( k, k+1 ) ) ) h11 = max( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) h22 = min( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) scl = h11 + h12 tst2 = h22*( h11 / scl ) if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) ) & then h( k+1, k ) = zero end if end if end if end if ! ==== accumulate orthogonal transformations. ==== if( accum ) then kms = k - incol do j = max( 1, ktop-incol ), kdu refsum = v( 1_${ik}$, m22 )*( u( j, kms+1 )+v( 2_${ik}$, m22 )*u( j, kms+2 ) ) u( j, kms+1 ) = u( j, kms+1 ) - refsum u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2_${ik}$, m22 ) end do else if( wantz ) then do j = iloz, ihiz refsum = v( 1_${ik}$, m22 )*( z( j, k+1 )+v( 2_${ik}$, m22 )*z( j, k+2 ) ) z( j, k+1 ) = z( j, k+1 ) - refsum z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2_${ik}$, m22 ) end do end if end if ! ==== normal case: chain of 3-by-3 reflections ==== loop_80: do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) if( k==ktop-1 ) then call stdlib${ii}$_dlaqr1( 3_${ik}$, h( ktop, ktop ), ldh, sr( 2_${ik}$*m-1 ),si( 2_${ik}$*m-1 ), sr( & 2_${ik}$*m ), si( 2_${ik}$*m ),v( 1_${ik}$, m ) ) alpha = v( 1_${ik}$, m ) call stdlib${ii}$_dlarfg( 3_${ik}$, alpha, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) ) else ! ==== perform delayed transformation of row below ! . mth bulge. exploit fact that first two elements ! . of row are actually zero. ==== refsum = v( 1_${ik}$, m )*v( 3_${ik}$, m )*h( k+3, k+2 ) h( k+3, k ) = -refsum h( k+3, k+1 ) = -refsum*v( 2_${ik}$, m ) h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*v( 3_${ik}$, m ) ! ==== calculate reflection to move ! . mth bulge one step. ==== beta = h( k+1, k ) v( 2_${ik}$, m ) = h( k+2, k ) v( 3_${ik}$, m ) = h( k+3, k ) call stdlib${ii}$_dlarfg( 3_${ik}$, beta, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) ) ! ==== a bulge may collapse because of vigilant ! . deflation or destructive underflow. in the ! . underflow case, try the two-small-subdiagonals ! . trick to try to reinflate the bulge. ==== if( h( k+3, k )/=zero .or. h( k+3, k+1 )/=zero .or. h( k+3, k+2 )==zero ) & then ! ==== typical case: not collapsed (yet). ==== h( k+1, k ) = beta h( k+2, k ) = zero h( k+3, k ) = zero else ! ==== atypical case: collapsed. attempt to ! . reintroduce ignoring h(k+1,k) and h(k+2,k). ! . if the fill resulting from the new ! . reflector is too large, then abandon it. ! . otherwise, use the new one. ==== call stdlib${ii}$_dlaqr1( 3_${ik}$, h( k+1, k+1 ), ldh, sr( 2_${ik}$*m-1 ),si( 2_${ik}$*m-1 ), sr( & 2_${ik}$*m ), si( 2_${ik}$*m ),vt ) alpha = vt( 1_${ik}$ ) call stdlib${ii}$_dlarfg( 3_${ik}$, alpha, vt( 2_${ik}$ ), 1_${ik}$, vt( 1_${ik}$ ) ) refsum = vt( 1_${ik}$ )*( h( k+1, k )+vt( 2_${ik}$ )*h( k+2, k ) ) if( abs( h( k+2, k )-refsum*vt( 2_${ik}$ ) )+abs( refsum*vt( 3_${ik}$ ) )>ulp*( abs( & h( k, k ) )+abs( h( k+1,k+1 ) )+abs( h( k+2, k+2 ) ) ) ) then ! ==== starting a new bulge here would ! . create non-negligible fill. use ! . the old one with trepidation. ==== h( k+1, k ) = beta h( k+2, k ) = zero h( k+3, k ) = zero else ! ==== starting a new bulge here would ! . create only negligible fill. ! . replace the old reflector with ! . the new one. ==== h( k+1, k ) = h( k+1, k ) - refsum h( k+2, k ) = zero h( k+3, k ) = zero v( 1_${ik}$, m ) = vt( 1_${ik}$ ) v( 2_${ik}$, m ) = vt( 2_${ik}$ ) v( 3_${ik}$, m ) = vt( 3_${ik}$ ) end if end if end if ! ==== apply reflection from the right and ! . the first column of update from the left. ! . these updates are required for the vigilant ! . deflation check. we still delay most of the ! . updates from the left for efficiency. ==== do j = jtop, min( kbot, k+3 ) refsum = v( 1_${ik}$, m )*( h( j, k+1 )+v( 2_${ik}$, m )*h( j, k+2 )+v( 3_${ik}$, m )*h( j, k+3 & ) ) h( j, k+1 ) = h( j, k+1 ) - refsum h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2_${ik}$, m ) h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3_${ik}$, m ) end do ! ==== perform update from left for subsequent ! . column. ==== refsum = v( 1_${ik}$, m )*( h( k+1, k+1 )+v( 2_${ik}$, m )*h( k+2, k+1 )+v( 3_${ik}$, m )*h( k+3, & k+1 ) ) h( k+1, k+1 ) = h( k+1, k+1 ) - refsum h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2_${ik}$, m ) h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3_${ik}$, m ) ! ==== the following convergence test requires that ! . the tradition small-compared-to-nearby-diagonals ! . criterion and the ahues ! . criteria both be satisfied. the latter improves ! . accuracy in some examples. falling back on an ! . alternate convergence criterion when tst1 or tst2 ! . is zero (as done here) is traditional but probably ! . unnecessary. ==== if( k<ktop)cycle if( h( k+1, k )/=zero ) then tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) ) if( tst1==zero ) then if( k>=ktop+1 )tst1 = tst1 + abs( h( k, k-1 ) ) if( k>=ktop+2 )tst1 = tst1 + abs( h( k, k-2 ) ) if( k>=ktop+3 )tst1 = tst1 + abs( h( k, k-3 ) ) if( k<=kbot-2 )tst1 = tst1 + abs( h( k+2, k+1 ) ) if( k<=kbot-3 )tst1 = tst1 + abs( h( k+3, k+1 ) ) if( k<=kbot-4 )tst1 = tst1 + abs( h( k+4, k+1 ) ) end if if( abs( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) ) h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) ) h11 = max( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) h22 = min( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) scl = h11 + h12 tst2 = h22*( h11 / scl ) if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) ) & then h( k+1, k ) = zero end if end if end if end do loop_80 ! ==== multiply h by reflections from the left ==== if( accum ) then jbot = min( ndcol, kbot ) else if( wantt ) then jbot = n else jbot = kbot end if do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) do j = max( ktop, krcol + 2*m ), jbot refsum = v( 1_${ik}$, m )*( h( k+1, j )+v( 2_${ik}$, m )*h( k+2, j )+v( 3_${ik}$, m )*h( k+3, j & ) ) h( k+1, j ) = h( k+1, j ) - refsum h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m ) h( k+3, j ) = h( k+3, j ) - refsum*v( 3_${ik}$, m ) end do end do ! ==== accumulate orthogonal transformations. ==== if( accum ) then ! ==== accumulate u. (if needed, update z later ! . with an efficient matrix-matrix ! . multiply.) ==== do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) kms = k - incol i2 = max( 1_${ik}$, ktop-incol ) i2 = max( i2, kms-(krcol-incol)+1_${ik}$ ) i4 = min( kdu, krcol + 2_${ik}$*( mbot-1 ) - incol + 5_${ik}$ ) do j = i2, i4 refsum = v( 1_${ik}$, m )*( u( j, kms+1 )+v( 2_${ik}$, m )*u( j, kms+2 )+v( 3_${ik}$, m )*u( & j, kms+3 ) ) u( j, kms+1 ) = u( j, kms+1 ) - refsum u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2_${ik}$, m ) u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3_${ik}$, m ) end do end do else if( wantz ) then ! ==== u is not accumulated, so update z ! . now by multiplying by reflections ! . from the right. ==== do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) do j = iloz, ihiz refsum = v( 1_${ik}$, m )*( z( j, k+1 )+v( 2_${ik}$, m )*z( j, k+2 )+v( 3_${ik}$, m )*z( j, & k+3 ) ) z( j, k+1 ) = z( j, k+1 ) - refsum z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2_${ik}$, m ) z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3_${ik}$, m ) end do end do end if ! ==== end of near-the-diagonal bulge chase. ==== end do loop_145 ! ==== use u (if accumulated) to update far-from-diagonal ! . entries in h. if required, use u to update z as ! . well. ==== if( accum ) then if( wantt ) then jtop = 1_${ik}$ jbot = n else jtop = ktop jbot = kbot end if k1 = max( 1_${ik}$, ktop-incol ) nu = ( kdu-max( 0_${ik}$, ndcol-kbot ) ) - k1 + 1_${ik}$ ! ==== horizontal multiply ==== do jcol = min( ndcol, kbot ) + 1, jbot, nh jlen = min( nh, jbot-jcol+1 ) call stdlib${ii}$_dgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),ldu, h( incol+k1, & jcol ), ldh, zero, wh,ldwh ) call stdlib${ii}$_dlacpy( 'ALL', nu, jlen, wh, ldwh,h( incol+k1, jcol ), ldh ) end do ! ==== vertical multiply ==== do jrow = jtop, max( ktop, incol ) - 1, nv jlen = min( nv, max( ktop, incol )-jrow ) call stdlib${ii}$_dgemm( 'N', 'N', jlen, nu, nu, one,h( jrow, incol+k1 ), ldh, u( & k1, k1 ),ldu, zero, wv, ldwv ) call stdlib${ii}$_dlacpy( 'ALL', jlen, nu, wv, ldwv,h( jrow, incol+k1 ), ldh ) end do ! ==== z multiply (also vertical) ==== if( wantz ) then do jrow = iloz, ihiz, nv jlen = min( nv, ihiz-jrow+1 ) call stdlib${ii}$_dgemm( 'N', 'N', jlen, nu, nu, one,z( jrow, incol+k1 ), ldz, u(& k1, k1 ),ldu, zero, wv, ldwv ) call stdlib${ii}$_dlacpy( 'ALL', jlen, nu, wv, ldwv,z( jrow, incol+k1 ), ldz ) end do end if end if end do loop_180 end subroutine stdlib${ii}$_dlaqr5 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$laqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts,sr, si, h, ldh, & !! DLAQR5:, called by DLAQR0, performs a !! single small-bulge multi-shift QR sweep. iloz, ihiz, z, ldz, v, ldv, u,ldu, nv, wv, ldwv, nh, wh, ldwh ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv, ldwh, ldwv, & ldz, n, nh, nshfts, nv logical(lk), intent(in) :: wantt, wantz ! Array Arguments real(${rk}$), intent(inout) :: h(ldh,*), si(*), sr(*), z(ldz,*) real(${rk}$), intent(out) :: u(ldu,*), v(ldv,*), wh(ldwh,*), wv(ldwv,*) ! ================================================================ ! Local Scalars real(${rk}$) :: alpha, beta, h11, h12, h21, h22, refsum, safmax, safmin, scl, smlnum, swap,& tst1, tst2, ulp integer(${ik}$) :: i, i2, i4, incol, j, jbot, jcol, jlen, jrow, jtop, k, k1, kdu, kms, & krcol, m, m22, mbot, mtop, nbmps, ndcol, ns, nu logical(lk) :: accum, bmp22 ! Intrinsic Functions ! Local Arrays real(${rk}$) :: vt(3_${ik}$) ! Executable Statements ! ==== if there are no shifts, then there is nothing to do. ==== if( nshfts<2 )return ! ==== if the active block is empty or 1-by-1, then there ! . is nothing to do. ==== if( ktop>=kbot )return ! ==== shuffle shifts into pairs of real shifts and pairs ! . of complex conjugate shifts assuming complex ! . conjugate shifts are already adjacent to one ! . another. ==== do i = 1, nshfts - 2, 2 if( si( i )/=-si( i+1 ) ) then swap = sr( i ) sr( i ) = sr( i+1 ) sr( i+1 ) = sr( i+2 ) sr( i+2 ) = swap swap = si( i ) si( i ) = si( i+1 ) si( i+1 ) = si( i+2 ) si( i+2 ) = swap end if end do ! ==== nshfts is supposed to be even, but if it is odd, ! . then simply reduce it by one. the shuffle above ! . ensures that the dropped shift is real and that ! . the remaining shifts are paired. ==== ns = nshfts - mod( nshfts, 2_${ik}$ ) ! ==== machine constants for deflation ==== safmin = stdlib${ii}$_${ri}$lamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_${ri}$labad( safmin, safmax ) ulp = stdlib${ii}$_${ri}$lamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=${rk}$) / ulp ) ! ==== use accumulated reflections to update far-from-diagonal ! . entries ? ==== accum = ( kacc22==1_${ik}$ ) .or. ( kacc22==2_${ik}$ ) ! ==== clear trash ==== if( ktop+2<=kbot )h( ktop+2, ktop ) = zero ! ==== nbmps = number of 2-shift bulges in the chain ==== nbmps = ns / 2_${ik}$ ! ==== kdu = width of slab ==== kdu = 4_${ik}$*nbmps ! ==== create and chase chains of nbmps bulges ==== loop_180: do incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps ! jtop = index from which updates from the right start. if( accum ) then jtop = max( ktop, incol ) else if( wantt ) then jtop = 1_${ik}$ else jtop = ktop end if ndcol = incol + kdu if( accum )call stdlib${ii}$_${ri}$laset( 'ALL', kdu, kdu, zero, one, u, ldu ) ! ==== near-the-diagonal bulge chase. the following loop ! . performs the near-the-diagonal part of a small bulge ! . multi-shift qr sweep. each 4*nbmps column diagonal ! . chunk extends from column incol to column ndcol ! . (including both column incol and column ndcol). the ! . following loop chases a 2*nbmps+1 column long chain of ! . nbmps bulges 2*nbmps columns to the right. (incol ! . may be less than ktop and and ndcol may be greater than ! . kbot indicating phantom columns from which to chase ! . bulges before they are actually introduced or to which ! . to chase bulges beyond column kbot.) ==== loop_145: do krcol = incol, min( incol+2*nbmps-1, kbot-2 ) ! ==== bulges number mtop to mbot are active double implicit ! . shift bulges. there may or may not also be small ! . 2-by-2 bulge, if there is room. the inactive bulges ! . (if any) must wait until the active bulges have moved ! . down the diagonal to make room. the phantom matrix ! . paradigm described above helps keep track. ==== mtop = max( 1_${ik}$, ( ktop-krcol ) / 2_${ik}$+1 ) mbot = min( nbmps, ( kbot-krcol-1 ) / 2_${ik}$ ) m22 = mbot + 1_${ik}$ bmp22 = ( mbot<nbmps ) .and. ( krcol+2*( m22-1 ) )==( kbot-2 ) ! ==== generate reflections to chase the chain right ! . one column. (the minimum value of k is ktop-1.) ==== if ( bmp22 ) then ! ==== special case: 2-by-2 reflection at bottom treated ! . separately ==== k = krcol + 2_${ik}$*( m22-1 ) if( k==ktop-1 ) then call stdlib${ii}$_${ri}$laqr1( 2_${ik}$, h( k+1, k+1 ), ldh, sr( 2_${ik}$*m22-1 ),si( 2_${ik}$*m22-1 ), sr(& 2_${ik}$*m22 ), si( 2_${ik}$*m22 ),v( 1_${ik}$, m22 ) ) beta = v( 1_${ik}$, m22 ) call stdlib${ii}$_${ri}$larfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) ) else beta = h( k+1, k ) v( 2_${ik}$, m22 ) = h( k+2, k ) call stdlib${ii}$_${ri}$larfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) ) h( k+1, k ) = beta h( k+2, k ) = zero end if ! ==== perform update from right within ! . computational window. ==== do j = jtop, min( kbot, k+3 ) refsum = v( 1_${ik}$, m22 )*( h( j, k+1 )+v( 2_${ik}$, m22 )*h( j, k+2 ) ) h( j, k+1 ) = h( j, k+1 ) - refsum h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2_${ik}$, m22 ) end do ! ==== perform update from left within ! . computational window. ==== if( accum ) then jbot = min( ndcol, kbot ) else if( wantt ) then jbot = n else jbot = kbot end if do j = k+1, jbot refsum = v( 1_${ik}$, m22 )*( h( k+1, j )+v( 2_${ik}$, m22 )*h( k+2, j ) ) h( k+1, j ) = h( k+1, j ) - refsum h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m22 ) end do ! ==== the following convergence test requires that ! . the tradition small-compared-to-nearby-diagonals ! . criterion and the ahues ! . criteria both be satisfied. the latter improves ! . accuracy in some examples. falling back on an ! . alternate convergence criterion when tst1 or tst2 ! . is zero (as done here) is traditional but probably ! . unnecessary. ==== if( k>=ktop ) then if( h( k+1, k )/=zero ) then tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) ) if( tst1==zero ) then if( k>=ktop+1 )tst1 = tst1 + abs( h( k, k-1 ) ) if( k>=ktop+2 )tst1 = tst1 + abs( h( k, k-2 ) ) if( k>=ktop+3 )tst1 = tst1 + abs( h( k, k-3 ) ) if( k<=kbot-2 )tst1 = tst1 + abs( h( k+2, k+1 ) ) if( k<=kbot-3 )tst1 = tst1 + abs( h( k+3, k+1 ) ) if( k<=kbot-4 )tst1 = tst1 + abs( h( k+4, k+1 ) ) end if if( abs( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) ) then h12 = max( abs( h( k+1, k ) ),abs( h( k, k+1 ) ) ) h21 = min( abs( h( k+1, k ) ),abs( h( k, k+1 ) ) ) h11 = max( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) h22 = min( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) scl = h11 + h12 tst2 = h22*( h11 / scl ) if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) ) & then h( k+1, k ) = zero end if end if end if end if ! ==== accumulate orthogonal transformations. ==== if( accum ) then kms = k - incol do j = max( 1, ktop-incol ), kdu refsum = v( 1_${ik}$, m22 )*( u( j, kms+1 )+v( 2_${ik}$, m22 )*u( j, kms+2 ) ) u( j, kms+1 ) = u( j, kms+1 ) - refsum u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2_${ik}$, m22 ) end do else if( wantz ) then do j = iloz, ihiz refsum = v( 1_${ik}$, m22 )*( z( j, k+1 )+v( 2_${ik}$, m22 )*z( j, k+2 ) ) z( j, k+1 ) = z( j, k+1 ) - refsum z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2_${ik}$, m22 ) end do end if end if ! ==== normal case: chain of 3-by-3 reflections ==== loop_80: do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) if( k==ktop-1 ) then call stdlib${ii}$_${ri}$laqr1( 3_${ik}$, h( ktop, ktop ), ldh, sr( 2_${ik}$*m-1 ),si( 2_${ik}$*m-1 ), sr( & 2_${ik}$*m ), si( 2_${ik}$*m ),v( 1_${ik}$, m ) ) alpha = v( 1_${ik}$, m ) call stdlib${ii}$_${ri}$larfg( 3_${ik}$, alpha, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) ) else ! ==== perform delayed transformation of row below ! . mth bulge. exploit fact that first two elements ! . of row are actually zero. ==== refsum = v( 1_${ik}$, m )*v( 3_${ik}$, m )*h( k+3, k+2 ) h( k+3, k ) = -refsum h( k+3, k+1 ) = -refsum*v( 2_${ik}$, m ) h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*v( 3_${ik}$, m ) ! ==== calculate reflection to move ! . mth bulge one step. ==== beta = h( k+1, k ) v( 2_${ik}$, m ) = h( k+2, k ) v( 3_${ik}$, m ) = h( k+3, k ) call stdlib${ii}$_${ri}$larfg( 3_${ik}$, beta, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) ) ! ==== a bulge may collapse because of vigilant ! . deflation or destructive underflow. in the ! . underflow case, try the two-small-subdiagonals ! . trick to try to reinflate the bulge. ==== if( h( k+3, k )/=zero .or. h( k+3, k+1 )/=zero .or. h( k+3, k+2 )==zero ) & then ! ==== typical case: not collapsed (yet). ==== h( k+1, k ) = beta h( k+2, k ) = zero h( k+3, k ) = zero else ! ==== atypical case: collapsed. attempt to ! . reintroduce ignoring h(k+1,k) and h(k+2,k). ! . if the fill resulting from the new ! . reflector is too large, then abandon it. ! . otherwise, use the new one. ==== call stdlib${ii}$_${ri}$laqr1( 3_${ik}$, h( k+1, k+1 ), ldh, sr( 2_${ik}$*m-1 ),si( 2_${ik}$*m-1 ), sr( & 2_${ik}$*m ), si( 2_${ik}$*m ),vt ) alpha = vt( 1_${ik}$ ) call stdlib${ii}$_${ri}$larfg( 3_${ik}$, alpha, vt( 2_${ik}$ ), 1_${ik}$, vt( 1_${ik}$ ) ) refsum = vt( 1_${ik}$ )*( h( k+1, k )+vt( 2_${ik}$ )*h( k+2, k ) ) if( abs( h( k+2, k )-refsum*vt( 2_${ik}$ ) )+abs( refsum*vt( 3_${ik}$ ) )>ulp*( abs( & h( k, k ) )+abs( h( k+1,k+1 ) )+abs( h( k+2, k+2 ) ) ) ) then ! ==== starting a new bulge here would ! . create non-negligible fill. use ! . the old one with trepidation. ==== h( k+1, k ) = beta h( k+2, k ) = zero h( k+3, k ) = zero else ! ==== starting a new bulge here would ! . create only negligible fill. ! . replace the old reflector with ! . the new one. ==== h( k+1, k ) = h( k+1, k ) - refsum h( k+2, k ) = zero h( k+3, k ) = zero v( 1_${ik}$, m ) = vt( 1_${ik}$ ) v( 2_${ik}$, m ) = vt( 2_${ik}$ ) v( 3_${ik}$, m ) = vt( 3_${ik}$ ) end if end if end if ! ==== apply reflection from the right and ! . the first column of update from the left. ! . these updates are required for the vigilant ! . deflation check. we still delay most of the ! . updates from the left for efficiency. ==== do j = jtop, min( kbot, k+3 ) refsum = v( 1_${ik}$, m )*( h( j, k+1 )+v( 2_${ik}$, m )*h( j, k+2 )+v( 3_${ik}$, m )*h( j, k+3 & ) ) h( j, k+1 ) = h( j, k+1 ) - refsum h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2_${ik}$, m ) h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3_${ik}$, m ) end do ! ==== perform update from left for subsequent ! . column. ==== refsum = v( 1_${ik}$, m )*( h( k+1, k+1 )+v( 2_${ik}$, m )*h( k+2, k+1 )+v( 3_${ik}$, m )*h( k+3, & k+1 ) ) h( k+1, k+1 ) = h( k+1, k+1 ) - refsum h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2_${ik}$, m ) h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3_${ik}$, m ) ! ==== the following convergence test requires that ! . the tradition small-compared-to-nearby-diagonals ! . criterion and the ahues ! . criteria both be satisfied. the latter improves ! . accuracy in some examples. falling back on an ! . alternate convergence criterion when tst1 or tst2 ! . is zero (as done here) is traditional but probably ! . unnecessary. ==== if( k<ktop)cycle if( h( k+1, k )/=zero ) then tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) ) if( tst1==zero ) then if( k>=ktop+1 )tst1 = tst1 + abs( h( k, k-1 ) ) if( k>=ktop+2 )tst1 = tst1 + abs( h( k, k-2 ) ) if( k>=ktop+3 )tst1 = tst1 + abs( h( k, k-3 ) ) if( k<=kbot-2 )tst1 = tst1 + abs( h( k+2, k+1 ) ) if( k<=kbot-3 )tst1 = tst1 + abs( h( k+3, k+1 ) ) if( k<=kbot-4 )tst1 = tst1 + abs( h( k+4, k+1 ) ) end if if( abs( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) ) h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) ) h11 = max( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) h22 = min( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) ) scl = h11 + h12 tst2 = h22*( h11 / scl ) if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) ) & then h( k+1, k ) = zero end if end if end if end do loop_80 ! ==== multiply h by reflections from the left ==== if( accum ) then jbot = min( ndcol, kbot ) else if( wantt ) then jbot = n else jbot = kbot end if do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) do j = max( ktop, krcol + 2*m ), jbot refsum = v( 1_${ik}$, m )*( h( k+1, j )+v( 2_${ik}$, m )*h( k+2, j )+v( 3_${ik}$, m )*h( k+3, j & ) ) h( k+1, j ) = h( k+1, j ) - refsum h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m ) h( k+3, j ) = h( k+3, j ) - refsum*v( 3_${ik}$, m ) end do end do ! ==== accumulate orthogonal transformations. ==== if( accum ) then ! ==== accumulate u. (if needed, update z later ! . with an efficient matrix-matrix ! . multiply.) ==== do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) kms = k - incol i2 = max( 1_${ik}$, ktop-incol ) i2 = max( i2, kms-(krcol-incol)+1_${ik}$ ) i4 = min( kdu, krcol + 2_${ik}$*( mbot-1 ) - incol + 5_${ik}$ ) do j = i2, i4 refsum = v( 1_${ik}$, m )*( u( j, kms+1 )+v( 2_${ik}$, m )*u( j, kms+2 )+v( 3_${ik}$, m )*u( & j, kms+3 ) ) u( j, kms+1 ) = u( j, kms+1 ) - refsum u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2_${ik}$, m ) u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3_${ik}$, m ) end do end do else if( wantz ) then ! ==== u is not accumulated, so update z ! . now by multiplying by reflections ! . from the right. ==== do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) do j = iloz, ihiz refsum = v( 1_${ik}$, m )*( z( j, k+1 )+v( 2_${ik}$, m )*z( j, k+2 )+v( 3_${ik}$, m )*z( j, & k+3 ) ) z( j, k+1 ) = z( j, k+1 ) - refsum z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2_${ik}$, m ) z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3_${ik}$, m ) end do end do end if ! ==== end of near-the-diagonal bulge chase. ==== end do loop_145 ! ==== use u (if accumulated) to update far-from-diagonal ! . entries in h. if required, use u to update z as ! . well. ==== if( accum ) then if( wantt ) then jtop = 1_${ik}$ jbot = n else jtop = ktop jbot = kbot end if k1 = max( 1_${ik}$, ktop-incol ) nu = ( kdu-max( 0_${ik}$, ndcol-kbot ) ) - k1 + 1_${ik}$ ! ==== horizontal multiply ==== do jcol = min( ndcol, kbot ) + 1, jbot, nh jlen = min( nh, jbot-jcol+1 ) call stdlib${ii}$_${ri}$gemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),ldu, h( incol+k1, & jcol ), ldh, zero, wh,ldwh ) call stdlib${ii}$_${ri}$lacpy( 'ALL', nu, jlen, wh, ldwh,h( incol+k1, jcol ), ldh ) end do ! ==== vertical multiply ==== do jrow = jtop, max( ktop, incol ) - 1, nv jlen = min( nv, max( ktop, incol )-jrow ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', jlen, nu, nu, one,h( jrow, incol+k1 ), ldh, u( & k1, k1 ),ldu, zero, wv, ldwv ) call stdlib${ii}$_${ri}$lacpy( 'ALL', jlen, nu, wv, ldwv,h( jrow, incol+k1 ), ldh ) end do ! ==== z multiply (also vertical) ==== if( wantz ) then do jrow = iloz, ihiz, nv jlen = min( nv, ihiz-jrow+1 ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', jlen, nu, nu, one,z( jrow, incol+k1 ), ldz, u(& k1, k1 ),ldu, zero, wv, ldwv ) call stdlib${ii}$_${ri}$lacpy( 'ALL', jlen, nu, wv, ldwv,z( jrow, incol+k1 ), ldz ) end do end if end if end do loop_180 end subroutine stdlib${ii}$_${ri}$laqr5 #:endif #:endfor pure module subroutine stdlib${ii}$_claqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts, s,h, ldh, iloz, & !! CLAQR5 called by CLAQR0 performs a !! single small-bulge multi-shift QR sweep. ihiz, z, ldz, v, ldv, u, ldu, nv,wv, ldwv, nh, wh, ldwh ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv, ldwh, ldwv, & ldz, n, nh, nshfts, nv logical(lk), intent(in) :: wantt, wantz ! Array Arguments complex(sp), intent(inout) :: h(ldh,*), s(*), z(ldz,*) complex(sp), intent(out) :: u(ldu,*), v(ldv,*), wh(ldwh,*), wv(ldwv,*) ! ================================================================ ! Parameters ! Local Scalars complex(sp) :: alpha, beta, cdum, refsum real(sp) :: h11, h12, h21, h22, safmax, safmin, scl, smlnum, tst1, tst2, ulp integer(${ik}$) :: i2, i4, incol, j, jbot, jcol, jlen, jrow, jtop, k, k1, kdu, kms, krcol,& m, m22, mbot, mtop, nbmps, ndcol, ns, nu logical(lk) :: accum, bmp22 ! Intrinsic Functions ! Local Arrays complex(sp) :: vt(3_${ik}$) ! Statement Functions real(sp) :: cabs1 ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) ) ! Executable Statements ! ==== if there are no shifts, then there is nothing to do. ==== if( nshfts<2 )return ! ==== if the active block is empty or 1-by-1, then there ! . is nothing to do. ==== if( ktop>=kbot )return ! ==== nshfts is supposed to be even, but if it is odd, ! . then simply reduce it by cone. ==== ns = nshfts - mod( nshfts, 2_${ik}$ ) ! ==== machine constants for deflation ==== safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_slabad( safmin, safmax ) ulp = stdlib${ii}$_slamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=sp) / ulp ) ! ==== use accumulated reflections to update far-from-diagonal ! . entries ? ==== accum = ( kacc22==1_${ik}$ ) .or. ( kacc22==2_${ik}$ ) ! ==== clear trash ==== if( ktop+2<=kbot )h( ktop+2, ktop ) = czero ! ==== nbmps = number of 2-shift bulges in the chain ==== nbmps = ns / 2_${ik}$ ! ==== kdu = width of slab ==== kdu = 4_${ik}$*nbmps ! ==== create and chase chains of nbmps bulges ==== loop_180: do incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps ! jtop = index from which updates from the right start. if( accum ) then jtop = max( ktop, incol ) else if( wantt ) then jtop = 1_${ik}$ else jtop = ktop end if ndcol = incol + kdu if( accum )call stdlib${ii}$_claset( 'ALL', kdu, kdu, czero, cone, u, ldu ) ! ==== near-the-diagonal bulge chase. the following loop ! . performs the near-the-diagonal part of a small bulge ! . multi-shift qr sweep. each 4*nbmps column diagonal ! . chunk extends from column incol to column ndcol ! . (including both column incol and column ndcol). the ! . following loop chases a 2*nbmps+1 column long chain of ! . nbmps bulges 2*nbmps columns to the right. (incol ! . may be less than ktop and and ndcol may be greater than ! . kbot indicating phantom columns from which to chase ! . bulges before they are actually introduced or to which ! . to chase bulges beyond column kbot.) ==== loop_145: do krcol = incol, min( incol+2*nbmps-1, kbot-2 ) ! ==== bulges number mtop to mbot are active double implicit ! . shift bulges. there may or may not also be small ! . 2-by-2 bulge, if there is room. the inactive bulges ! . (if any) must wait until the active bulges have moved ! . down the diagonal to make room. the phantom matrix ! . paradigm described above helps keep track. ==== mtop = max( 1_${ik}$, ( ktop-krcol ) / 2_${ik}$+1 ) mbot = min( nbmps, ( kbot-krcol-1 ) / 2_${ik}$ ) m22 = mbot + 1_${ik}$ bmp22 = ( mbot<nbmps ) .and. ( krcol+2*( m22-1 ) )==( kbot-2 ) ! ==== generate reflections to chase the chain right ! . cone column. (the minimum value of k is ktop-1.) ==== if ( bmp22 ) then ! ==== special case: 2-by-2 reflection at bottom treated ! . separately ==== k = krcol + 2_${ik}$*( m22-1 ) if( k==ktop-1 ) then call stdlib${ii}$_claqr1( 2_${ik}$, h( k+1, k+1 ), ldh, s( 2_${ik}$*m22-1 ),s( 2_${ik}$*m22 ), v( 1_${ik}$, & m22 ) ) beta = v( 1_${ik}$, m22 ) call stdlib${ii}$_clarfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) ) else beta = h( k+1, k ) v( 2_${ik}$, m22 ) = h( k+2, k ) call stdlib${ii}$_clarfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) ) h( k+1, k ) = beta h( k+2, k ) = czero end if ! ==== perform update from right within ! . computational window. ==== do j = jtop, min( kbot, k+3 ) refsum = v( 1_${ik}$, m22 )*( h( j, k+1 )+v( 2_${ik}$, m22 )*h( j, k+2 ) ) h( j, k+1 ) = h( j, k+1 ) - refsum h( j, k+2 ) = h( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m22 ) ) end do ! ==== perform update from left within ! . computational window. ==== if( accum ) then jbot = min( ndcol, kbot ) else if( wantt ) then jbot = n else jbot = kbot end if do j = k+1, jbot refsum = conjg( v( 1_${ik}$, m22 ) )*( h( k+1, j )+conjg( v( 2_${ik}$, m22 ) )*h( k+2, j & ) ) h( k+1, j ) = h( k+1, j ) - refsum h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m22 ) end do ! ==== the following convergence test requires that ! . the tradition small-compared-to-nearby-diagonals ! . criterion and the ahues ! . criteria both be satisfied. the latter improves ! . accuracy in some examples. falling back on an ! . alternate convergence criterion when tst1 or tst2 ! . is czero (as done here) is traditional but probably ! . unnecessary. ==== if( k>=ktop) then if( h( k+1, k )/=czero ) then tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) ) if( tst1==zero ) then if( k>=ktop+1 )tst1 = tst1 + cabs1( h( k, k-1 ) ) if( k>=ktop+2 )tst1 = tst1 + cabs1( h( k, k-2 ) ) if( k>=ktop+3 )tst1 = tst1 + cabs1( h( k, k-3 ) ) if( k<=kbot-2 )tst1 = tst1 + cabs1( h( k+2, k+1 ) ) if( k<=kbot-3 )tst1 = tst1 + cabs1( h( k+3, k+1 ) ) if( k<=kbot-4 )tst1 = tst1 + cabs1( h( k+4, k+1 ) ) end if if( cabs1( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) ) then h12 = max( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) h21 = min( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) h11 = max( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) h22 = min( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) scl = h11 + h12 tst2 = h22*( h11 / scl ) if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) )h( & k+1, k ) = czero end if end if end if ! ==== accumulate orthogonal transformations. ==== if( accum ) then kms = k - incol do j = max( 1, ktop-incol ), kdu refsum = v( 1_${ik}$, m22 )*( u( j, kms+1 )+v( 2_${ik}$, m22 )*u( j, kms+2 ) ) u( j, kms+1 ) = u( j, kms+1 ) - refsum u( j, kms+2 ) = u( j, kms+2 ) -refsum*conjg( v( 2_${ik}$, m22 ) ) end do else if( wantz ) then do j = iloz, ihiz refsum = v( 1_${ik}$, m22 )*( z( j, k+1 )+v( 2_${ik}$, m22 )*z( j, k+2 ) ) z( j, k+1 ) = z( j, k+1 ) - refsum z( j, k+2 ) = z( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m22 ) ) end do end if end if ! ==== normal case: chain of 3-by-3 reflections ==== loop_80: do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) if( k==ktop-1 ) then call stdlib${ii}$_claqr1( 3_${ik}$, h( ktop, ktop ), ldh, s( 2_${ik}$*m-1 ),s( 2_${ik}$*m ), v( 1_${ik}$, m )& ) alpha = v( 1_${ik}$, m ) call stdlib${ii}$_clarfg( 3_${ik}$, alpha, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) ) else ! ==== perform delayed transformation of row below ! . mth bulge. exploit fact that first two elements ! . of row are actually czero. ==== refsum = v( 1_${ik}$, m )*v( 3_${ik}$, m )*h( k+3, k+2 ) h( k+3, k ) = -refsum h( k+3, k+1 ) = -refsum*conjg( v( 2_${ik}$, m ) ) h( k+3, k+2 ) = h( k+3, k+2 ) -refsum*conjg( v( 3_${ik}$, m ) ) ! ==== calculate reflection to move ! . mth bulge cone step. ==== beta = h( k+1, k ) v( 2_${ik}$, m ) = h( k+2, k ) v( 3_${ik}$, m ) = h( k+3, k ) call stdlib${ii}$_clarfg( 3_${ik}$, beta, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) ) ! ==== a bulge may collapse because of vigilant ! . deflation or destructive underflow. in the ! . underflow case, try the two-small-subdiagonals ! . trick to try to reinflate the bulge. ==== if( h( k+3, k )/=czero .or. h( k+3, k+1 )/=czero .or. h( k+3, k+2 )==czero & ) then ! ==== typical case: not collapsed (yet). ==== h( k+1, k ) = beta h( k+2, k ) = czero h( k+3, k ) = czero else ! ==== atypical case: collapsed. attempt to ! . reintroduce ignoring h(k+1,k) and h(k+2,k). ! . if the fill resulting from the new ! . reflector is too large, then abandon it. ! . otherwise, use the new cone. ==== call stdlib${ii}$_claqr1( 3_${ik}$, h( k+1, k+1 ), ldh, s( 2_${ik}$*m-1 ),s( 2_${ik}$*m ), vt ) alpha = vt( 1_${ik}$ ) call stdlib${ii}$_clarfg( 3_${ik}$, alpha, vt( 2_${ik}$ ), 1_${ik}$, vt( 1_${ik}$ ) ) refsum = conjg( vt( 1_${ik}$ ) )*( h( k+1, k )+conjg( vt( 2_${ik}$ ) )*h( k+2, k ) ) if( cabs1( h( k+2, k )-refsum*vt( 2_${ik}$ ) )+cabs1( refsum*vt( 3_${ik}$ ) )>ulp*( & cabs1( h( k, k ) )+cabs1( h( k+1,k+1 ) )+cabs1( h( k+2, k+2 ) ) ) ) & then ! ==== starting a new bulge here would ! . create non-negligible fill. use ! . the old cone with trepidation. ==== h( k+1, k ) = beta h( k+2, k ) = czero h( k+3, k ) = czero else ! ==== starting a new bulge here would ! . create only negligible fill. ! . replace the old reflector with ! . the new cone. ==== h( k+1, k ) = h( k+1, k ) - refsum h( k+2, k ) = czero h( k+3, k ) = czero v( 1_${ik}$, m ) = vt( 1_${ik}$ ) v( 2_${ik}$, m ) = vt( 2_${ik}$ ) v( 3_${ik}$, m ) = vt( 3_${ik}$ ) end if end if end if ! ==== apply reflection from the right and ! . the first column of update from the left. ! . these updates are required for the vigilant ! . deflation check. we still delay most of the ! . updates from the left for efficiency. ==== do j = jtop, min( kbot, k+3 ) refsum = v( 1_${ik}$, m )*( h( j, k+1 )+v( 2_${ik}$, m )*h( j, k+2 )+v( 3_${ik}$, m )*h( j, k+3 & ) ) h( j, k+1 ) = h( j, k+1 ) - refsum h( j, k+2 ) = h( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m ) ) h( j, k+3 ) = h( j, k+3 ) -refsum*conjg( v( 3_${ik}$, m ) ) end do ! ==== perform update from left for subsequent ! . column. ==== refsum = conjg( v( 1_${ik}$, m ) )*( h( k+1, k+1 )+conjg( v( 2_${ik}$, m ) )*h( k+2, k+1 )+& conjg( v( 3_${ik}$, m ) )*h( k+3, k+1 ) ) h( k+1, k+1 ) = h( k+1, k+1 ) - refsum h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2_${ik}$, m ) h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3_${ik}$, m ) ! ==== the following convergence test requires that ! . the tradition small-compared-to-nearby-diagonals ! . criterion and the ahues ! . criteria both be satisfied. the latter improves ! . accuracy in some examples. falling back on an ! . alternate convergence criterion when tst1 or tst2 ! . is czero (as done here) is traditional but probably ! . unnecessary. ==== if( k<ktop)cycle if( h( k+1, k )/=czero ) then tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) ) if( tst1==zero ) then if( k>=ktop+1 )tst1 = tst1 + cabs1( h( k, k-1 ) ) if( k>=ktop+2 )tst1 = tst1 + cabs1( h( k, k-2 ) ) if( k>=ktop+3 )tst1 = tst1 + cabs1( h( k, k-3 ) ) if( k<=kbot-2 )tst1 = tst1 + cabs1( h( k+2, k+1 ) ) if( k<=kbot-3 )tst1 = tst1 + cabs1( h( k+3, k+1 ) ) if( k<=kbot-4 )tst1 = tst1 + cabs1( h( k+4, k+1 ) ) end if if( cabs1( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then h12 = max( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) h21 = min( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) h11 = max( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) h22 = min( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) scl = h11 + h12 tst2 = h22*( h11 / scl ) if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) )h( k+1,& k ) = czero end if end if end do loop_80 ! ==== multiply h by reflections from the left ==== if( accum ) then jbot = min( ndcol, kbot ) else if( wantt ) then jbot = n else jbot = kbot end if do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) do j = max( ktop, krcol + 2*m ), jbot refsum = conjg( v( 1_${ik}$, m ) )*( h( k+1, j )+conjg( v( 2_${ik}$, m ) )*h( k+2, j )+& conjg( v( 3_${ik}$, m ) )*h( k+3, j ) ) h( k+1, j ) = h( k+1, j ) - refsum h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m ) h( k+3, j ) = h( k+3, j ) - refsum*v( 3_${ik}$, m ) end do end do ! ==== accumulate orthogonal transformations. ==== if( accum ) then ! ==== accumulate u. (if needed, update z later ! . with an efficient matrix-matrix ! . multiply.) ==== do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) kms = k - incol i2 = max( 1_${ik}$, ktop-incol ) i2 = max( i2, kms-(krcol-incol)+1_${ik}$ ) i4 = min( kdu, krcol + 2_${ik}$*( mbot-1 ) - incol + 5_${ik}$ ) do j = i2, i4 refsum = v( 1_${ik}$, m )*( u( j, kms+1 )+v( 2_${ik}$, m )*u( j, kms+2 )+v( 3_${ik}$, m )*u( & j, kms+3 ) ) u( j, kms+1 ) = u( j, kms+1 ) - refsum u( j, kms+2 ) = u( j, kms+2 ) -refsum*conjg( v( 2_${ik}$, m ) ) u( j, kms+3 ) = u( j, kms+3 ) -refsum*conjg( v( 3_${ik}$, m ) ) end do end do else if( wantz ) then ! ==== u is not accumulated, so update z ! . now by multiplying by reflections ! . from the right. ==== do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) do j = iloz, ihiz refsum = v( 1_${ik}$, m )*( z( j, k+1 )+v( 2_${ik}$, m )*z( j, k+2 )+v( 3_${ik}$, m )*z( j, & k+3 ) ) z( j, k+1 ) = z( j, k+1 ) - refsum z( j, k+2 ) = z( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m ) ) z( j, k+3 ) = z( j, k+3 ) -refsum*conjg( v( 3_${ik}$, m ) ) end do end do end if ! ==== end of near-the-diagonal bulge chase. ==== end do loop_145 ! ==== use u (if accumulated) to update far-from-diagonal ! . entries in h. if required, use u to update z as ! . well. ==== if( accum ) then if( wantt ) then jtop = 1_${ik}$ jbot = n else jtop = ktop jbot = kbot end if k1 = max( 1_${ik}$, ktop-incol ) nu = ( kdu-max( 0_${ik}$, ndcol-kbot ) ) - k1 + 1_${ik}$ ! ==== horizontal multiply ==== do jcol = min( ndcol, kbot ) + 1, jbot, nh jlen = min( nh, jbot-jcol+1 ) call stdlib${ii}$_cgemm( 'C', 'N', nu, jlen, nu, cone, u( k1, k1 ),ldu, h( incol+k1,& jcol ), ldh, czero, wh,ldwh ) call stdlib${ii}$_clacpy( 'ALL', nu, jlen, wh, ldwh,h( incol+k1, jcol ), ldh ) end do ! ==== vertical multiply ==== do jrow = jtop, max( ktop, incol ) - 1, nv jlen = min( nv, max( ktop, incol )-jrow ) call stdlib${ii}$_cgemm( 'N', 'N', jlen, nu, nu, cone,h( jrow, incol+k1 ), ldh, u( & k1, k1 ),ldu, czero, wv, ldwv ) call stdlib${ii}$_clacpy( 'ALL', jlen, nu, wv, ldwv,h( jrow, incol+k1 ), ldh ) end do ! ==== z multiply (also vertical) ==== if( wantz ) then do jrow = iloz, ihiz, nv jlen = min( nv, ihiz-jrow+1 ) call stdlib${ii}$_cgemm( 'N', 'N', jlen, nu, nu, cone,z( jrow, incol+k1 ), ldz, & u( k1, k1 ),ldu, czero, wv, ldwv ) call stdlib${ii}$_clacpy( 'ALL', jlen, nu, wv, ldwv,z( jrow, incol+k1 ), ldz ) end do end if end if end do loop_180 end subroutine stdlib${ii}$_claqr5 pure module subroutine stdlib${ii}$_zlaqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts, s,h, ldh, iloz, & !! ZLAQR5 , called by ZLAQR0, performs a !! single small-bulge multi-shift QR sweep. ihiz, z, ldz, v, ldv, u, ldu, nv,wv, ldwv, nh, wh, ldwh ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv, ldwh, ldwv, & ldz, n, nh, nshfts, nv logical(lk), intent(in) :: wantt, wantz ! Array Arguments complex(dp), intent(inout) :: h(ldh,*), s(*), z(ldz,*) complex(dp), intent(out) :: u(ldu,*), v(ldv,*), wh(ldwh,*), wv(ldwv,*) ! ================================================================ ! Parameters ! Local Scalars complex(dp) :: alpha, beta, cdum, refsum real(dp) :: h11, h12, h21, h22, safmax, safmin, scl, smlnum, tst1, tst2, ulp integer(${ik}$) :: i2, i4, incol, j, jbot, jcol, jlen, jrow, jtop, k, k1, kdu, kms, krcol,& m, m22, mbot, mtop, nbmps, ndcol, ns, nu logical(lk) :: accum, bmp22 ! Intrinsic Functions ! Local Arrays complex(dp) :: vt(3_${ik}$) ! Statement Functions real(dp) :: cabs1 ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) ) ! Executable Statements ! ==== if there are no shifts, then there is nothing to do. ==== if( nshfts<2 )return ! ==== if the active block is empty or 1-by-1, then there ! . is nothing to do. ==== if( ktop>=kbot )return ! ==== nshfts is supposed to be even, but if it is odd, ! . then simply reduce it by cone. ==== ns = nshfts - mod( nshfts, 2_${ik}$ ) ! ==== machine constants for deflation ==== safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_dlabad( safmin, safmax ) ulp = stdlib${ii}$_dlamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=dp) / ulp ) ! ==== use accumulated reflections to update far-from-diagonal ! . entries ? ==== accum = ( kacc22==1_${ik}$ ) .or. ( kacc22==2_${ik}$ ) ! ==== clear trash ==== if( ktop+2<=kbot )h( ktop+2, ktop ) = czero ! ==== nbmps = number of 2-shift bulges in the chain ==== nbmps = ns / 2_${ik}$ ! ==== kdu = width of slab ==== kdu = 4_${ik}$*nbmps ! ==== create and chase chains of nbmps bulges ==== loop_180: do incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps ! jtop = index from which updates from the right start. if( accum ) then jtop = max( ktop, incol ) else if( wantt ) then jtop = 1_${ik}$ else jtop = ktop end if ndcol = incol + kdu if( accum )call stdlib${ii}$_zlaset( 'ALL', kdu, kdu, czero, cone, u, ldu ) ! ==== near-the-diagonal bulge chase. the following loop ! . performs the near-the-diagonal part of a small bulge ! . multi-shift qr sweep. each 4*nbmps column diagonal ! . chunk extends from column incol to column ndcol ! . (including both column incol and column ndcol). the ! . following loop chases a 2*nbmps+1 column long chain of ! . nbmps bulges 2*nbmps columns to the right. (incol ! . may be less than ktop and and ndcol may be greater than ! . kbot indicating phantom columns from which to chase ! . bulges before they are actually introduced or to which ! . to chase bulges beyond column kbot.) ==== loop_145: do krcol = incol, min( incol+2*nbmps-1, kbot-2 ) ! ==== bulges number mtop to mbot are active double implicit ! . shift bulges. there may or may not also be small ! . 2-by-2 bulge, if there is room. the inactive bulges ! . (if any) must wait until the active bulges have moved ! . down the diagonal to make room. the phantom matrix ! . paradigm described above helps keep track. ==== mtop = max( 1_${ik}$, ( ktop-krcol ) / 2_${ik}$+1 ) mbot = min( nbmps, ( kbot-krcol-1 ) / 2_${ik}$ ) m22 = mbot + 1_${ik}$ bmp22 = ( mbot<nbmps ) .and. ( krcol+2*( m22-1 ) )==( kbot-2 ) ! ==== generate reflections to chase the chain right ! . cone column. (the minimum value of k is ktop-1.) ==== if ( bmp22 ) then ! ==== special case: 2-by-2 reflection at bottom treated ! . separately ==== k = krcol + 2_${ik}$*( m22-1 ) if( k==ktop-1 ) then call stdlib${ii}$_zlaqr1( 2_${ik}$, h( k+1, k+1 ), ldh, s( 2_${ik}$*m22-1 ),s( 2_${ik}$*m22 ), v( 1_${ik}$, & m22 ) ) beta = v( 1_${ik}$, m22 ) call stdlib${ii}$_zlarfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) ) else beta = h( k+1, k ) v( 2_${ik}$, m22 ) = h( k+2, k ) call stdlib${ii}$_zlarfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) ) h( k+1, k ) = beta h( k+2, k ) = czero end if ! ==== perform update from right within ! . computational window. ==== do j = jtop, min( kbot, k+3 ) refsum = v( 1_${ik}$, m22 )*( h( j, k+1 )+v( 2_${ik}$, m22 )*h( j, k+2 ) ) h( j, k+1 ) = h( j, k+1 ) - refsum h( j, k+2 ) = h( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m22 ) ) end do ! ==== perform update from left within ! . computational window. ==== if( accum ) then jbot = min( ndcol, kbot ) else if( wantt ) then jbot = n else jbot = kbot end if do j = k+1, jbot refsum = conjg( v( 1_${ik}$, m22 ) )*( h( k+1, j )+conjg( v( 2_${ik}$, m22 ) )*h( k+2, j & ) ) h( k+1, j ) = h( k+1, j ) - refsum h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m22 ) end do ! ==== the following convergence test requires that ! . the tradition small-compared-to-nearby-diagonals ! . criterion and the ahues ! . criteria both be satisfied. the latter improves ! . accuracy in some examples. falling back on an ! . alternate convergence criterion when tst1 or tst2 ! . is czero (as done here) is traditional but probably ! . unnecessary. ==== if( k>=ktop ) then if( h( k+1, k )/=czero ) then tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) ) if( tst1==zero ) then if( k>=ktop+1 )tst1 = tst1 + cabs1( h( k, k-1 ) ) if( k>=ktop+2 )tst1 = tst1 + cabs1( h( k, k-2 ) ) if( k>=ktop+3 )tst1 = tst1 + cabs1( h( k, k-3 ) ) if( k<=kbot-2 )tst1 = tst1 + cabs1( h( k+2, k+1 ) ) if( k<=kbot-3 )tst1 = tst1 + cabs1( h( k+3, k+1 ) ) if( k<=kbot-4 )tst1 = tst1 + cabs1( h( k+4, k+1 ) ) end if if( cabs1( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) ) then h12 = max( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) h21 = min( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) h11 = max( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) h22 = min( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) scl = h11 + h12 tst2 = h22*( h11 / scl ) if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) )h( & k+1, k ) = czero end if end if end if ! ==== accumulate orthogonal transformations. ==== if( accum ) then kms = k - incol do j = max( 1, ktop-incol ), kdu refsum = v( 1_${ik}$, m22 )*( u( j, kms+1 )+v( 2_${ik}$, m22 )*u( j, kms+2 ) ) u( j, kms+1 ) = u( j, kms+1 ) - refsum u( j, kms+2 ) = u( j, kms+2 ) -refsum*conjg( v( 2_${ik}$, m22 ) ) end do else if( wantz ) then do j = iloz, ihiz refsum = v( 1_${ik}$, m22 )*( z( j, k+1 )+v( 2_${ik}$, m22 )*z( j, k+2 ) ) z( j, k+1 ) = z( j, k+1 ) - refsum z( j, k+2 ) = z( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m22 ) ) end do end if end if ! ==== normal case: chain of 3-by-3 reflections ==== loop_80: do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) if( k==ktop-1 ) then call stdlib${ii}$_zlaqr1( 3_${ik}$, h( ktop, ktop ), ldh, s( 2_${ik}$*m-1 ),s( 2_${ik}$*m ), v( 1_${ik}$, m )& ) alpha = v( 1_${ik}$, m ) call stdlib${ii}$_zlarfg( 3_${ik}$, alpha, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) ) else ! ==== perform delayed transformation of row below ! . mth bulge. exploit fact that first two elements ! . of row are actually czero. ==== refsum = v( 1_${ik}$, m )*v( 3_${ik}$, m )*h( k+3, k+2 ) h( k+3, k ) = -refsum h( k+3, k+1 ) = -refsum*conjg( v( 2_${ik}$, m ) ) h( k+3, k+2 ) = h( k+3, k+2 ) -refsum*conjg( v( 3_${ik}$, m ) ) ! ==== calculate reflection to move ! . mth bulge cone step. ==== beta = h( k+1, k ) v( 2_${ik}$, m ) = h( k+2, k ) v( 3_${ik}$, m ) = h( k+3, k ) call stdlib${ii}$_zlarfg( 3_${ik}$, beta, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) ) ! ==== a bulge may collapse because of vigilant ! . deflation or destructive underflow. in the ! . underflow case, try the two-small-subdiagonals ! . trick to try to reinflate the bulge. ==== if( h( k+3, k )/=czero .or. h( k+3, k+1 )/=czero .or. h( k+3, k+2 )==czero & ) then ! ==== typical case: not collapsed (yet). ==== h( k+1, k ) = beta h( k+2, k ) = czero h( k+3, k ) = czero else ! ==== atypical case: collapsed. attempt to ! . reintroduce ignoring h(k+1,k) and h(k+2,k). ! . if the fill resulting from the new ! . reflector is too large, then abandon it. ! . otherwise, use the new cone. ==== call stdlib${ii}$_zlaqr1( 3_${ik}$, h( k+1, k+1 ), ldh, s( 2_${ik}$*m-1 ),s( 2_${ik}$*m ), vt ) alpha = vt( 1_${ik}$ ) call stdlib${ii}$_zlarfg( 3_${ik}$, alpha, vt( 2_${ik}$ ), 1_${ik}$, vt( 1_${ik}$ ) ) refsum = conjg( vt( 1_${ik}$ ) )*( h( k+1, k )+conjg( vt( 2_${ik}$ ) )*h( k+2, k ) ) if( cabs1( h( k+2, k )-refsum*vt( 2_${ik}$ ) )+cabs1( refsum*vt( 3_${ik}$ ) )>ulp*( & cabs1( h( k, k ) )+cabs1( h( k+1,k+1 ) )+cabs1( h( k+2, k+2 ) ) ) ) & then ! ==== starting a new bulge here would ! . create non-negligible fill. use ! . the old cone with trepidation. ==== h( k+1, k ) = beta h( k+2, k ) = czero h( k+3, k ) = czero else ! ==== starting a new bulge here would ! . create only negligible fill. ! . replace the old reflector with ! . the new cone. ==== h( k+1, k ) = h( k+1, k ) - refsum h( k+2, k ) = czero h( k+3, k ) = czero v( 1_${ik}$, m ) = vt( 1_${ik}$ ) v( 2_${ik}$, m ) = vt( 2_${ik}$ ) v( 3_${ik}$, m ) = vt( 3_${ik}$ ) end if end if end if ! ==== apply reflection from the right and ! . the first column of update from the left. ! . these updates are required for the vigilant ! . deflation check. we still delay most of the ! . updates from the left for efficiency. ==== do j = jtop, min( kbot, k+3 ) refsum = v( 1_${ik}$, m )*( h( j, k+1 )+v( 2_${ik}$, m )*h( j, k+2 )+v( 3_${ik}$, m )*h( j, k+3 & ) ) h( j, k+1 ) = h( j, k+1 ) - refsum h( j, k+2 ) = h( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m ) ) h( j, k+3 ) = h( j, k+3 ) -refsum*conjg( v( 3_${ik}$, m ) ) end do ! ==== perform update from left for subsequent ! . column. ==== refsum = conjg( v( 1_${ik}$, m ) )*( h( k+1, k+1 )+conjg( v( 2_${ik}$, m ) )*h( k+2, k+1 )+& conjg( v( 3_${ik}$, m ) )*h( k+3, k+1 ) ) h( k+1, k+1 ) = h( k+1, k+1 ) - refsum h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2_${ik}$, m ) h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3_${ik}$, m ) ! ==== the following convergence test requires that ! . the tradition small-compared-to-nearby-diagonals ! . criterion and the ahues ! . criteria both be satisfied. the latter improves ! . accuracy in some examples. falling back on an ! . alternate convergence criterion when tst1 or tst2 ! . is czero (as done here) is traditional but probably ! . unnecessary. ==== if( k<ktop)cycle if( h( k+1, k )/=czero ) then tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) ) if( tst1==zero ) then if( k>=ktop+1 )tst1 = tst1 + cabs1( h( k, k-1 ) ) if( k>=ktop+2 )tst1 = tst1 + cabs1( h( k, k-2 ) ) if( k>=ktop+3 )tst1 = tst1 + cabs1( h( k, k-3 ) ) if( k<=kbot-2 )tst1 = tst1 + cabs1( h( k+2, k+1 ) ) if( k<=kbot-3 )tst1 = tst1 + cabs1( h( k+3, k+1 ) ) if( k<=kbot-4 )tst1 = tst1 + cabs1( h( k+4, k+1 ) ) end if if( cabs1( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then h12 = max( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) h21 = min( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) h11 = max( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) h22 = min( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) scl = h11 + h12 tst2 = h22*( h11 / scl ) if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) )h( k+1,& k ) = czero end if end if end do loop_80 ! ==== multiply h by reflections from the left ==== if( accum ) then jbot = min( ndcol, kbot ) else if( wantt ) then jbot = n else jbot = kbot end if do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) do j = max( ktop, krcol + 2*m ), jbot refsum = conjg( v( 1_${ik}$, m ) )*( h( k+1, j )+conjg( v( 2_${ik}$, m ) )*h( k+2, j )+& conjg( v( 3_${ik}$, m ) )*h( k+3, j ) ) h( k+1, j ) = h( k+1, j ) - refsum h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m ) h( k+3, j ) = h( k+3, j ) - refsum*v( 3_${ik}$, m ) end do end do ! ==== accumulate orthogonal transformations. ==== if( accum ) then ! ==== accumulate u. (if needed, update z later ! . with an efficient matrix-matrix ! . multiply.) ==== do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) kms = k - incol i2 = max( 1_${ik}$, ktop-incol ) i2 = max( i2, kms-(krcol-incol)+1_${ik}$ ) i4 = min( kdu, krcol + 2_${ik}$*( mbot-1 ) - incol + 5_${ik}$ ) do j = i2, i4 refsum = v( 1_${ik}$, m )*( u( j, kms+1 )+v( 2_${ik}$, m )*u( j, kms+2 )+v( 3_${ik}$, m )*u( & j, kms+3 ) ) u( j, kms+1 ) = u( j, kms+1 ) - refsum u( j, kms+2 ) = u( j, kms+2 ) -refsum*conjg( v( 2_${ik}$, m ) ) u( j, kms+3 ) = u( j, kms+3 ) -refsum*conjg( v( 3_${ik}$, m ) ) end do end do else if( wantz ) then ! ==== u is not accumulated, so update z ! . now by multiplying by reflections ! . from the right. ==== do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) do j = iloz, ihiz refsum = v( 1_${ik}$, m )*( z( j, k+1 )+v( 2_${ik}$, m )*z( j, k+2 )+v( 3_${ik}$, m )*z( j, & k+3 ) ) z( j, k+1 ) = z( j, k+1 ) - refsum z( j, k+2 ) = z( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m ) ) z( j, k+3 ) = z( j, k+3 ) -refsum*conjg( v( 3_${ik}$, m ) ) end do end do end if ! ==== end of near-the-diagonal bulge chase. ==== end do loop_145 ! ==== use u (if accumulated) to update far-from-diagonal ! . entries in h. if required, use u to update z as ! . well. ==== if( accum ) then if( wantt ) then jtop = 1_${ik}$ jbot = n else jtop = ktop jbot = kbot end if k1 = max( 1_${ik}$, ktop-incol ) nu = ( kdu-max( 0_${ik}$, ndcol-kbot ) ) - k1 + 1_${ik}$ ! ==== horizontal multiply ==== do jcol = min( ndcol, kbot ) + 1, jbot, nh jlen = min( nh, jbot-jcol+1 ) call stdlib${ii}$_zgemm( 'C', 'N', nu, jlen, nu, cone, u( k1, k1 ),ldu, h( incol+k1,& jcol ), ldh, czero, wh,ldwh ) call stdlib${ii}$_zlacpy( 'ALL', nu, jlen, wh, ldwh,h( incol+k1, jcol ), ldh ) end do ! ==== vertical multiply ==== do jrow = jtop, max( ktop, incol ) - 1, nv jlen = min( nv, max( ktop, incol )-jrow ) call stdlib${ii}$_zgemm( 'N', 'N', jlen, nu, nu, cone,h( jrow, incol+k1 ), ldh, u( & k1, k1 ),ldu, czero, wv, ldwv ) call stdlib${ii}$_zlacpy( 'ALL', jlen, nu, wv, ldwv,h( jrow, incol+k1 ), ldh ) end do ! ==== z multiply (also vertical) ==== if( wantz ) then do jrow = iloz, ihiz, nv jlen = min( nv, ihiz-jrow+1 ) call stdlib${ii}$_zgemm( 'N', 'N', jlen, nu, nu, cone,z( jrow, incol+k1 ), ldz, & u( k1, k1 ),ldu, czero, wv, ldwv ) call stdlib${ii}$_zlacpy( 'ALL', jlen, nu, wv, ldwv,z( jrow, incol+k1 ), ldz ) end do end if end if end do loop_180 end subroutine stdlib${ii}$_zlaqr5 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$laqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts, s,h, ldh, iloz, & !! ZLAQR5:, called by ZLAQR0, performs a !! single small-bulge multi-shift QR sweep. ihiz, z, ldz, v, ldv, u, ldu, nv,wv, ldwv, nh, wh, ldwh ) ! -- lapack auxiliary routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv, ldwh, ldwv, & ldz, n, nh, nshfts, nv logical(lk), intent(in) :: wantt, wantz ! Array Arguments complex(${ck}$), intent(inout) :: h(ldh,*), s(*), z(ldz,*) complex(${ck}$), intent(out) :: u(ldu,*), v(ldv,*), wh(ldwh,*), wv(ldwv,*) ! ================================================================ ! Parameters ! Local Scalars complex(${ck}$) :: alpha, beta, cdum, refsum real(${ck}$) :: h11, h12, h21, h22, safmax, safmin, scl, smlnum, tst1, tst2, ulp integer(${ik}$) :: i2, i4, incol, j, jbot, jcol, jlen, jrow, jtop, k, k1, kdu, kms, krcol,& m, m22, mbot, mtop, nbmps, ndcol, ns, nu logical(lk) :: accum, bmp22 ! Intrinsic Functions ! Local Arrays complex(${ck}$) :: vt(3_${ik}$) ! Statement Functions real(${ck}$) :: cabs1 ! Statement Function Definitions cabs1( cdum ) = abs( real( cdum,KIND=${ck}$) ) + abs( aimag( cdum ) ) ! Executable Statements ! ==== if there are no shifts, then there is nothing to do. ==== if( nshfts<2 )return ! ==== if the active block is empty or 1-by-1, then there ! . is nothing to do. ==== if( ktop>=kbot )return ! ==== nshfts is supposed to be even, but if it is odd, ! . then simply reduce it by cone. ==== ns = nshfts - mod( nshfts, 2_${ik}$ ) ! ==== machine constants for deflation ==== safmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'SAFE MINIMUM' ) safmax = one / safmin call stdlib${ii}$_${c2ri(ci)}$labad( safmin, safmax ) ulp = stdlib${ii}$_${c2ri(ci)}$lamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=${ck}$) / ulp ) ! ==== use accumulated reflections to update far-from-diagonal ! . entries ? ==== accum = ( kacc22==1_${ik}$ ) .or. ( kacc22==2_${ik}$ ) ! ==== clear trash ==== if( ktop+2<=kbot )h( ktop+2, ktop ) = czero ! ==== nbmps = number of 2-shift bulges in the chain ==== nbmps = ns / 2_${ik}$ ! ==== kdu = width of slab ==== kdu = 4_${ik}$*nbmps ! ==== create and chase chains of nbmps bulges ==== loop_180: do incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps ! jtop = index from which updates from the right start. if( accum ) then jtop = max( ktop, incol ) else if( wantt ) then jtop = 1_${ik}$ else jtop = ktop end if ndcol = incol + kdu if( accum )call stdlib${ii}$_${ci}$laset( 'ALL', kdu, kdu, czero, cone, u, ldu ) ! ==== near-the-diagonal bulge chase. the following loop ! . performs the near-the-diagonal part of a small bulge ! . multi-shift qr sweep. each 4*nbmps column diagonal ! . chunk extends from column incol to column ndcol ! . (including both column incol and column ndcol). the ! . following loop chases a 2*nbmps+1 column long chain of ! . nbmps bulges 2*nbmps columns to the right. (incol ! . may be less than ktop and and ndcol may be greater than ! . kbot indicating phantom columns from which to chase ! . bulges before they are actually introduced or to which ! . to chase bulges beyond column kbot.) ==== loop_145: do krcol = incol, min( incol+2*nbmps-1, kbot-2 ) ! ==== bulges number mtop to mbot are active double implicit ! . shift bulges. there may or may not also be small ! . 2-by-2 bulge, if there is room. the inactive bulges ! . (if any) must wait until the active bulges have moved ! . down the diagonal to make room. the phantom matrix ! . paradigm described above helps keep track. ==== mtop = max( 1_${ik}$, ( ktop-krcol ) / 2_${ik}$+1 ) mbot = min( nbmps, ( kbot-krcol-1 ) / 2_${ik}$ ) m22 = mbot + 1_${ik}$ bmp22 = ( mbot<nbmps ) .and. ( krcol+2*( m22-1 ) )==( kbot-2 ) ! ==== generate reflections to chase the chain right ! . cone column. (the minimum value of k is ktop-1.) ==== if ( bmp22 ) then ! ==== special case: 2-by-2 reflection at bottom treated ! . separately ==== k = krcol + 2_${ik}$*( m22-1 ) if( k==ktop-1 ) then call stdlib${ii}$_${ci}$laqr1( 2_${ik}$, h( k+1, k+1 ), ldh, s( 2_${ik}$*m22-1 ),s( 2_${ik}$*m22 ), v( 1_${ik}$, & m22 ) ) beta = v( 1_${ik}$, m22 ) call stdlib${ii}$_${ci}$larfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) ) else beta = h( k+1, k ) v( 2_${ik}$, m22 ) = h( k+2, k ) call stdlib${ii}$_${ci}$larfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) ) h( k+1, k ) = beta h( k+2, k ) = czero end if ! ==== perform update from right within ! . computational window. ==== do j = jtop, min( kbot, k+3 ) refsum = v( 1_${ik}$, m22 )*( h( j, k+1 )+v( 2_${ik}$, m22 )*h( j, k+2 ) ) h( j, k+1 ) = h( j, k+1 ) - refsum h( j, k+2 ) = h( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m22 ) ) end do ! ==== perform update from left within ! . computational window. ==== if( accum ) then jbot = min( ndcol, kbot ) else if( wantt ) then jbot = n else jbot = kbot end if do j = k+1, jbot refsum = conjg( v( 1_${ik}$, m22 ) )*( h( k+1, j )+conjg( v( 2_${ik}$, m22 ) )*h( k+2, j & ) ) h( k+1, j ) = h( k+1, j ) - refsum h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m22 ) end do ! ==== the following convergence test requires that ! . the tradition small-compared-to-nearby-diagonals ! . criterion and the ahues ! . criteria both be satisfied. the latter improves ! . accuracy in some examples. falling back on an ! . alternate convergence criterion when tst1 or tst2 ! . is czero (as done here) is traditional but probably ! . unnecessary. ==== if( k>=ktop ) then if( h( k+1, k )/=czero ) then tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) ) if( tst1==zero ) then if( k>=ktop+1 )tst1 = tst1 + cabs1( h( k, k-1 ) ) if( k>=ktop+2 )tst1 = tst1 + cabs1( h( k, k-2 ) ) if( k>=ktop+3 )tst1 = tst1 + cabs1( h( k, k-3 ) ) if( k<=kbot-2 )tst1 = tst1 + cabs1( h( k+2, k+1 ) ) if( k<=kbot-3 )tst1 = tst1 + cabs1( h( k+3, k+1 ) ) if( k<=kbot-4 )tst1 = tst1 + cabs1( h( k+4, k+1 ) ) end if if( cabs1( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) ) then h12 = max( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) h21 = min( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) h11 = max( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) h22 = min( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) scl = h11 + h12 tst2 = h22*( h11 / scl ) if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) )h( & k+1, k ) = czero end if end if end if ! ==== accumulate orthogonal transformations. ==== if( accum ) then kms = k - incol do j = max( 1, ktop-incol ), kdu refsum = v( 1_${ik}$, m22 )*( u( j, kms+1 )+v( 2_${ik}$, m22 )*u( j, kms+2 ) ) u( j, kms+1 ) = u( j, kms+1 ) - refsum u( j, kms+2 ) = u( j, kms+2 ) -refsum*conjg( v( 2_${ik}$, m22 ) ) end do else if( wantz ) then do j = iloz, ihiz refsum = v( 1_${ik}$, m22 )*( z( j, k+1 )+v( 2_${ik}$, m22 )*z( j, k+2 ) ) z( j, k+1 ) = z( j, k+1 ) - refsum z( j, k+2 ) = z( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m22 ) ) end do end if end if ! ==== normal case: chain of 3-by-3 reflections ==== loop_80: do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) if( k==ktop-1 ) then call stdlib${ii}$_${ci}$laqr1( 3_${ik}$, h( ktop, ktop ), ldh, s( 2_${ik}$*m-1 ),s( 2_${ik}$*m ), v( 1_${ik}$, m )& ) alpha = v( 1_${ik}$, m ) call stdlib${ii}$_${ci}$larfg( 3_${ik}$, alpha, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) ) else ! ==== perform delayed transformation of row below ! . mth bulge. exploit fact that first two elements ! . of row are actually czero. ==== refsum = v( 1_${ik}$, m )*v( 3_${ik}$, m )*h( k+3, k+2 ) h( k+3, k ) = -refsum h( k+3, k+1 ) = -refsum*conjg( v( 2_${ik}$, m ) ) h( k+3, k+2 ) = h( k+3, k+2 ) -refsum*conjg( v( 3_${ik}$, m ) ) ! ==== calculate reflection to move ! . mth bulge cone step. ==== beta = h( k+1, k ) v( 2_${ik}$, m ) = h( k+2, k ) v( 3_${ik}$, m ) = h( k+3, k ) call stdlib${ii}$_${ci}$larfg( 3_${ik}$, beta, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) ) ! ==== a bulge may collapse because of vigilant ! . deflation or destructive underflow. in the ! . underflow case, try the two-small-subdiagonals ! . trick to try to reinflate the bulge. ==== if( h( k+3, k )/=czero .or. h( k+3, k+1 )/=czero .or. h( k+3, k+2 )==czero & ) then ! ==== typical case: not collapsed (yet). ==== h( k+1, k ) = beta h( k+2, k ) = czero h( k+3, k ) = czero else ! ==== atypical case: collapsed. attempt to ! . reintroduce ignoring h(k+1,k) and h(k+2,k). ! . if the fill resulting from the new ! . reflector is too large, then abandon it. ! . otherwise, use the new cone. ==== call stdlib${ii}$_${ci}$laqr1( 3_${ik}$, h( k+1, k+1 ), ldh, s( 2_${ik}$*m-1 ),s( 2_${ik}$*m ), vt ) alpha = vt( 1_${ik}$ ) call stdlib${ii}$_${ci}$larfg( 3_${ik}$, alpha, vt( 2_${ik}$ ), 1_${ik}$, vt( 1_${ik}$ ) ) refsum = conjg( vt( 1_${ik}$ ) )*( h( k+1, k )+conjg( vt( 2_${ik}$ ) )*h( k+2, k ) ) if( cabs1( h( k+2, k )-refsum*vt( 2_${ik}$ ) )+cabs1( refsum*vt( 3_${ik}$ ) )>ulp*( & cabs1( h( k, k ) )+cabs1( h( k+1,k+1 ) )+cabs1( h( k+2, k+2 ) ) ) ) & then ! ==== starting a new bulge here would ! . create non-negligible fill. use ! . the old cone with trepidation. ==== h( k+1, k ) = beta h( k+2, k ) = czero h( k+3, k ) = czero else ! ==== starting a new bulge here would ! . create only negligible fill. ! . replace the old reflector with ! . the new cone. ==== h( k+1, k ) = h( k+1, k ) - refsum h( k+2, k ) = czero h( k+3, k ) = czero v( 1_${ik}$, m ) = vt( 1_${ik}$ ) v( 2_${ik}$, m ) = vt( 2_${ik}$ ) v( 3_${ik}$, m ) = vt( 3_${ik}$ ) end if end if end if ! ==== apply reflection from the right and ! . the first column of update from the left. ! . these updates are required for the vigilant ! . deflation check. we still delay most of the ! . updates from the left for efficiency. ==== do j = jtop, min( kbot, k+3 ) refsum = v( 1_${ik}$, m )*( h( j, k+1 )+v( 2_${ik}$, m )*h( j, k+2 )+v( 3_${ik}$, m )*h( j, k+3 & ) ) h( j, k+1 ) = h( j, k+1 ) - refsum h( j, k+2 ) = h( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m ) ) h( j, k+3 ) = h( j, k+3 ) -refsum*conjg( v( 3_${ik}$, m ) ) end do ! ==== perform update from left for subsequent ! . column. ==== refsum = conjg( v( 1_${ik}$, m ) )*( h( k+1, k+1 )+conjg( v( 2_${ik}$, m ) )*h( k+2, k+1 )+& conjg( v( 3_${ik}$, m ) )*h( k+3, k+1 ) ) h( k+1, k+1 ) = h( k+1, k+1 ) - refsum h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2_${ik}$, m ) h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3_${ik}$, m ) ! ==== the following convergence test requires that ! . the tradition small-compared-to-nearby-diagonals ! . criterion and the ahues ! . criteria both be satisfied. the latter improves ! . accuracy in some examples. falling back on an ! . alternate convergence criterion when tst1 or tst2 ! . is czero (as done here) is traditional but probably ! . unnecessary. ==== if( k<ktop)cycle if( h( k+1, k )/=czero ) then tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) ) if( tst1==zero ) then if( k>=ktop+1 )tst1 = tst1 + cabs1( h( k, k-1 ) ) if( k>=ktop+2 )tst1 = tst1 + cabs1( h( k, k-2 ) ) if( k>=ktop+3 )tst1 = tst1 + cabs1( h( k, k-3 ) ) if( k<=kbot-2 )tst1 = tst1 + cabs1( h( k+2, k+1 ) ) if( k<=kbot-3 )tst1 = tst1 + cabs1( h( k+3, k+1 ) ) if( k<=kbot-4 )tst1 = tst1 + cabs1( h( k+4, k+1 ) ) end if if( cabs1( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then h12 = max( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) h21 = min( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) ) h11 = max( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) h22 = min( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) ) scl = h11 + h12 tst2 = h22*( h11 / scl ) if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) )h( k+1,& k ) = czero end if end if end do loop_80 ! ==== multiply h by reflections from the left ==== if( accum ) then jbot = min( ndcol, kbot ) else if( wantt ) then jbot = n else jbot = kbot end if do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) do j = max( ktop, krcol + 2*m ), jbot refsum = conjg( v( 1_${ik}$, m ) )*( h( k+1, j )+conjg( v( 2_${ik}$, m ) )*h( k+2, j )+& conjg( v( 3_${ik}$, m ) )*h( k+3, j ) ) h( k+1, j ) = h( k+1, j ) - refsum h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m ) h( k+3, j ) = h( k+3, j ) - refsum*v( 3_${ik}$, m ) end do end do ! ==== accumulate orthogonal transformations. ==== if( accum ) then ! ==== accumulate u. (if needed, update z later ! . with an efficient matrix-matrix ! . multiply.) ==== do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) kms = k - incol i2 = max( 1_${ik}$, ktop-incol ) i2 = max( i2, kms-(krcol-incol)+1_${ik}$ ) i4 = min( kdu, krcol + 2_${ik}$*( mbot-1 ) - incol + 5_${ik}$ ) do j = i2, i4 refsum = v( 1_${ik}$, m )*( u( j, kms+1 )+v( 2_${ik}$, m )*u( j, kms+2 )+v( 3_${ik}$, m )*u( & j, kms+3 ) ) u( j, kms+1 ) = u( j, kms+1 ) - refsum u( j, kms+2 ) = u( j, kms+2 ) -refsum*conjg( v( 2_${ik}$, m ) ) u( j, kms+3 ) = u( j, kms+3 ) -refsum*conjg( v( 3_${ik}$, m ) ) end do end do else if( wantz ) then ! ==== u is not accumulated, so update z ! . now by multiplying by reflections ! . from the right. ==== do m = mbot, mtop, -1 k = krcol + 2_${ik}$*( m-1 ) do j = iloz, ihiz refsum = v( 1_${ik}$, m )*( z( j, k+1 )+v( 2_${ik}$, m )*z( j, k+2 )+v( 3_${ik}$, m )*z( j, & k+3 ) ) z( j, k+1 ) = z( j, k+1 ) - refsum z( j, k+2 ) = z( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m ) ) z( j, k+3 ) = z( j, k+3 ) -refsum*conjg( v( 3_${ik}$, m ) ) end do end do end if ! ==== end of near-the-diagonal bulge chase. ==== end do loop_145 ! ==== use u (if accumulated) to update far-from-diagonal ! . entries in h. if required, use u to update z as ! . well. ==== if( accum ) then if( wantt ) then jtop = 1_${ik}$ jbot = n else jtop = ktop jbot = kbot end if k1 = max( 1_${ik}$, ktop-incol ) nu = ( kdu-max( 0_${ik}$, ndcol-kbot ) ) - k1 + 1_${ik}$ ! ==== horizontal multiply ==== do jcol = min( ndcol, kbot ) + 1, jbot, nh jlen = min( nh, jbot-jcol+1 ) call stdlib${ii}$_${ci}$gemm( 'C', 'N', nu, jlen, nu, cone, u( k1, k1 ),ldu, h( incol+k1,& jcol ), ldh, czero, wh,ldwh ) call stdlib${ii}$_${ci}$lacpy( 'ALL', nu, jlen, wh, ldwh,h( incol+k1, jcol ), ldh ) end do ! ==== vertical multiply ==== do jrow = jtop, max( ktop, incol ) - 1, nv jlen = min( nv, max( ktop, incol )-jrow ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', jlen, nu, nu, cone,h( jrow, incol+k1 ), ldh, u( & k1, k1 ),ldu, czero, wv, ldwv ) call stdlib${ii}$_${ci}$lacpy( 'ALL', jlen, nu, wv, ldwv,h( jrow, incol+k1 ), ldh ) end do ! ==== z multiply (also vertical) ==== if( wantz ) then do jrow = iloz, ihiz, nv jlen = min( nv, ihiz-jrow+1 ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', jlen, nu, nu, cone,z( jrow, incol+k1 ), ldz, & u( k1, k1 ),ldu, czero, wv, ldwv ) call stdlib${ii}$_${ci}$lacpy( 'ALL', jlen, nu, wv, ldwv,z( jrow, incol+k1 ), ldz ) end do end if end if end do loop_180 end subroutine stdlib${ii}$_${ci}$laqr5 #:endif #:endfor recursive module subroutine stdlib${ii}$_slaqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, alphar, & !! SLAQZ0 computes the eigenvalues of a real matrix pair (H,T), !! where H is an upper Hessenberg matrix and T is upper triangular, !! using the double-shift QZ method. !! Matrix pairs of this type are produced by the reduction to !! generalized upper Hessenberg form of a real matrix pair (A,B): !! A = Q1*H*Z1**T, B = Q1*T*Z1**T, !! as computed by SGGHRD. !! If JOB='S', then the Hessenberg-triangular pair (H,T) is !! also reduced to generalized Schur form, !! H = Q*S*Z**T, T = Q*P*Z**T, !! where Q and Z are orthogonal matrices, P is an upper triangular !! matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2 !! diagonal blocks. !! The 1-by-1 blocks correspond to real eigenvalues of the matrix pair !! (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of !! eigenvalues. !! Additionally, the 2-by-2 upper triangular diagonal blocks of P !! corresponding to 2-by-2 blocks of S are reduced to positive diagonal !! form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0, !! P(j,j) > 0, and P(j+1,j+1) > 0. !! Optionally, the orthogonal matrix Q from the generalized Schur !! factorization may be postmultiplied into an input matrix Q1, and the !! orthogonal matrix Z may be postmultiplied into an input matrix Z1. !! If Q1 and Z1 are the orthogonal matrices from SGGHRD that reduced !! the matrix pair (A,B) to generalized upper Hessenberg form, then the !! output matrices Q1*Q and Z1*Z are the orthogonal factors from the !! generalized Schur factorization of (A,B): !! A = (Q1*Q)*S*(Z1*Z)**T, B = (Q1*Q)*P*(Z1*Z)**T. !! To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, !! of (A,B)) are computed as a pair of values (alpha,beta), where alpha is !! complex and beta real. !! If beta is nonzero, lambda = alpha / beta is an eigenvalue of the !! generalized nonsymmetric eigenvalue problem (GNEP) !! A*x = lambda*B*x !! and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the !! alternate form of the GNEP !! mu*A*y = B*y. !! Real eigenvalues can be read directly from the generalized Schur !! form: !! alpha = S(i,i), beta = P(i,i). !! Ref: C.B. Moler !! Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), !! pp. 241--256. !! Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ !! Algorithm with Aggressive Early Deflation", SIAM J. Numer. !! Anal., 29(2006), pp. 199--227. !! Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift, !! multipole rational QZ method with agressive early deflation" alphai, beta,q, ldq, z, ldz, work, lwork, rec,info ) use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments character, intent( in ) :: wants, wantq, wantz integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,rec integer(${ik}$), intent( out ) :: info real(sp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq, * ),z( ldz, * ), alphar(& * ), alphai( * ), beta( * ), work( * ) ! ================================================================ ! local scalars real(sp) :: smlnum, ulp, eshift, safmin, safmax, c1, s1, temp, swap integer(${ik}$) :: istart, istop, iiter, maxit, istart2, k, ld, nshifts, nblock, nw, nmin,& nibble, n_undeflated, n_deflated, ns, sweep_info, shiftpos, lworkreq, k2, istartm, & istopm, iwants, iwantq, iwantz, norm_info, aed_info, nwr, nbr, nsr, itemp1, itemp2, & rcost, i logical(lk) :: ilschur, ilq, ilz character(len=3_${ik}$) :: jbcmpz if( stdlib_lsame( wants, 'E' ) ) then ilschur = .false. iwants = 1_${ik}$ else if( stdlib_lsame( wants, 'S' ) ) then ilschur = .true. iwants = 2_${ik}$ else iwants = 0_${ik}$ end if if( stdlib_lsame( wantq, 'N' ) ) then ilq = .false. iwantq = 1_${ik}$ else if( stdlib_lsame( wantq, 'V' ) ) then ilq = .true. iwantq = 2_${ik}$ else if( stdlib_lsame( wantq, 'I' ) ) then ilq = .true. iwantq = 3_${ik}$ else iwantq = 0_${ik}$ end if if( stdlib_lsame( wantz, 'N' ) ) then ilz = .false. iwantz = 1_${ik}$ else if( stdlib_lsame( wantz, 'V' ) ) then ilz = .true. iwantz = 2_${ik}$ else if( stdlib_lsame( wantz, 'I' ) ) then ilz = .true. iwantz = 3_${ik}$ else iwantz = 0_${ik}$ end if ! check argument values info = 0_${ik}$ if( iwants==0_${ik}$ ) then info = -1_${ik}$ else if( iwantq==0_${ik}$ ) then info = -2_${ik}$ else if( iwantz==0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( ilo<1_${ik}$ ) then info = -5_${ik}$ else if( ihi>n .or. ihi<ilo-1 ) then info = -6_${ik}$ else if( lda<n ) then info = -8_${ik}$ else if( ldb<n ) then info = -10_${ik}$ else if( ldq<1_${ik}$ .or. ( ilq .and. ldq<n ) ) then info = -15_${ik}$ else if( ldz<1_${ik}$ .or. ( ilz .and. ldz<n ) ) then info = -17_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SLAQZ0', -info ) return end if ! quick return if possible if( n<=0_${ik}$ ) then work( 1_${ik}$ ) = real( 1_${ik}$,KIND=sp) return end if ! get the parameters jbcmpz( 1_${ik}$:1_${ik}$ ) = wants jbcmpz( 2_${ik}$:2_${ik}$ ) = wantq jbcmpz( 3_${ik}$:3_${ik}$ ) = wantz nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'SLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'SLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nwr = max( 2_${ik}$, nwr ) nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr ) nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'SLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'SLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nsr = min( nsr, ( n+6 ) / 9_${ik}$, ihi-ilo ) nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) ) rcost = stdlib${ii}$_ilaenv( 17_${ik}$, 'SLAQZ0', jbcmpz, n, ilo, ihi, lwork ) itemp1 = int( nsr/sqrt( 1_${ik}$+2*nsr/( real( rcost,KIND=sp)/100_${ik}$*n ) ),KIND=${ik}$) itemp1 = ( ( itemp1-1 )/4_${ik}$ )*4_${ik}$+4 nbr = nsr+itemp1 if( n < nmin .or. rec >= 2_${ik}$ ) then call stdlib${ii}$_shgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alphar, alphai,& beta, q, ldq, z, ldz, work,lwork, info ) return end if ! find out required workspace ! workspace query to stdlib${ii}$_slaqz3 nw = max( nwr, nmin ) call stdlib${ii}$_slaqz3( ilschur, ilq, ilz, n, ilo, ihi, nw, a, lda, b, ldb,q, ldq, z, ldz, & n_undeflated, n_deflated, alphar,alphai, beta, work, nw, work, nw, work, -1_${ik}$, rec,& aed_info ) itemp1 = int( work( 1_${ik}$ ),KIND=${ik}$) ! workspace query to stdlib${ii}$_slaqz4 call stdlib${ii}$_slaqz4( ilschur, ilq, ilz, n, ilo, ihi, nsr, nbr, alphar,alphai, beta, a, & lda, b, ldb, q, ldq, z, ldz, work,nbr, work, nbr, work, -1_${ik}$, sweep_info ) itemp2 = int( work( 1_${ik}$ ),KIND=${ik}$) lworkreq = max( itemp1+2*nw**2_${ik}$, itemp2+2*nbr**2_${ik}$ ) if ( lwork ==-1_${ik}$ ) then work( 1_${ik}$ ) = real( lworkreq,KIND=sp) return else if ( lwork < lworkreq ) then info = -19_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SLAQZ0', info ) return end if ! initialize q and z if( iwantq==3_${ik}$ ) call stdlib${ii}$_slaset( 'FULL', n, n, zero, one, q, ldq ) if( iwantz==3_${ik}$ ) call stdlib${ii}$_slaset( 'FULL', n, n, zero, one, z, ldz ) ! get machine constants safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' ) safmax = one/safmin call stdlib${ii}$_slabad( safmin, safmax ) ulp = stdlib${ii}$_slamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=sp)/ulp ) istart = ilo istop = ihi maxit = 3_${ik}$*( ihi-ilo+1 ) ld = 0_${ik}$ do iiter = 1, maxit if( iiter >= maxit ) then info = istop+1 goto 80 end if if ( istart+1 >= istop ) then istop = istart exit end if ! check deflations at the end if ( abs( a( istop-1, istop-2 ) ) <= max( smlnum,ulp*( abs( a( istop-1, istop-1 ) )+& abs( a( istop-2,istop-2 ) ) ) ) ) then a( istop-1, istop-2 ) = zero istop = istop-2 ld = 0_${ik}$ eshift = zero else if ( abs( a( istop, istop-1 ) ) <= max( smlnum,ulp*( abs( a( istop, istop ) )+& abs( a( istop-1,istop-1 ) ) ) ) ) then a( istop, istop-1 ) = zero istop = istop-1 ld = 0_${ik}$ eshift = zero end if ! check deflations at the start if ( abs( a( istart+2, istart+1 ) ) <= max( smlnum,ulp*( abs( a( istart+1, istart+1 & ) )+abs( a( istart+2,istart+2 ) ) ) ) ) then a( istart+2, istart+1 ) = zero istart = istart+2 ld = 0_${ik}$ eshift = zero else if ( abs( a( istart+1, istart ) ) <= max( smlnum,ulp*( abs( a( istart, istart )& )+abs( a( istart+1,istart+1 ) ) ) ) ) then a( istart+1, istart ) = zero istart = istart+1 ld = 0_${ik}$ eshift = zero end if if ( istart+1 >= istop ) then exit end if ! check interior deflations istart2 = istart do k = istop, istart+1, -1 if ( abs( a( k, k-1 ) ) <= max( smlnum, ulp*( abs( a( k,k ) )+abs( a( k-1, k-1 ) & ) ) ) ) then a( k, k-1 ) = zero istart2 = k exit end if end do ! get range to apply rotations to if ( ilschur ) then istartm = 1_${ik}$ istopm = n else istartm = istart2 istopm = istop end if ! check infinite eigenvalues, this is done without blocking so might ! slow down the method when many infinite eigenvalues are present k = istop do while ( k>=istart2 ) temp = zero if( k < istop ) then temp = temp+abs( b( k, k+1 ) ) end if if( k > istart2 ) then temp = temp+abs( b( k-1, k ) ) end if if( abs( b( k, k ) ) < max( smlnum, ulp*temp ) ) then ! a diagonal element of b is negligable, move it ! to the top and deflate it do k2 = k, istart2+1, -1 call stdlib${ii}$_slartg( b( k2-1, k2 ), b( k2-1, k2-1 ), c1, s1,temp ) b( k2-1, k2 ) = temp b( k2-1, k2-1 ) = zero call stdlib${ii}$_srot( k2-2-istartm+1, b( istartm, k2 ), 1_${ik}$,b( istartm, k2-1 ), & 1_${ik}$, c1, s1 ) call stdlib${ii}$_srot( min( k2+1, istop )-istartm+1, a( istartm,k2 ), 1_${ik}$, a( & istartm, k2-1 ), 1_${ik}$, c1, s1 ) if ( ilz ) then call stdlib${ii}$_srot( n, z( 1_${ik}$, k2 ), 1_${ik}$, z( 1_${ik}$, k2-1 ), 1_${ik}$, c1,s1 ) end if if( k2<istop ) then call stdlib${ii}$_slartg( a( k2, k2-1 ), a( k2+1, k2-1 ), c1,s1, temp ) a( k2, k2-1 ) = temp a( k2+1, k2-1 ) = zero call stdlib${ii}$_srot( istopm-k2+1, a( k2, k2 ), lda, a( k2+1,k2 ), lda, c1, & s1 ) call stdlib${ii}$_srot( istopm-k2+1, b( k2, k2 ), ldb, b( k2+1,k2 ), ldb, c1, & s1 ) if( ilq ) then call stdlib${ii}$_srot( n, q( 1_${ik}$, k2 ), 1_${ik}$, q( 1_${ik}$, k2+1 ), 1_${ik}$,c1, s1 ) end if end if end do if( istart2<istop )then call stdlib${ii}$_slartg( a( istart2, istart2 ), a( istart2+1,istart2 ), c1, s1, & temp ) a( istart2, istart2 ) = temp a( istart2+1, istart2 ) = zero call stdlib${ii}$_srot( istopm-( istart2+1 )+1_${ik}$, a( istart2,istart2+1 ), lda, a( & istart2+1,istart2+1 ), lda, c1, s1 ) call stdlib${ii}$_srot( istopm-( istart2+1 )+1_${ik}$, b( istart2,istart2+1 ), ldb, b( & istart2+1,istart2+1 ), ldb, c1, s1 ) if( ilq ) then call stdlib${ii}$_srot( n, q( 1_${ik}$, istart2 ), 1_${ik}$, q( 1_${ik}$,istart2+1 ), 1_${ik}$, c1, s1 ) end if end if istart2 = istart2+1 end if k = k-1 end do ! istart2 now points to the top of the bottom right ! unreduced hessenberg block if ( istart2 >= istop ) then istop = istart2-1 ld = 0_${ik}$ eshift = zero cycle end if nw = nwr nshifts = nsr nblock = nbr if ( istop-istart2+1 < nmin ) then ! setting nw to the size of the subblock will make aed deflate ! all the eigenvalues. this is slightly more efficient than just ! using qz_small because the off diagonal part gets updated via blas. if ( istop-istart+1 < nmin ) then nw = istop-istart+1 istart2 = istart else nw = istop-istart2+1 end if end if ! time for aed call stdlib${ii}$_slaqz3( ilschur, ilq, ilz, n, istart2, istop, nw, a, lda,b, ldb, q, ldq,& z, ldz, n_undeflated, n_deflated,alphar, alphai, beta, work, nw, work( nw**2_${ik}$+1 ),& nw, work( 2_${ik}$*nw**2_${ik}$+1 ), lwork-2*nw**2_${ik}$, rec,aed_info ) if ( n_deflated > 0_${ik}$ ) then istop = istop-n_deflated ld = 0_${ik}$ eshift = zero end if if ( 100_${ik}$*n_deflated > nibble*( n_deflated+n_undeflated ) .or.istop-istart2+1 < nmin & ) then ! aed has uncovered many eigenvalues. skip a qz sweep and run ! aed again. cycle end if ld = ld+1 ns = min( nshifts, istop-istart2 ) ns = min( ns, n_undeflated ) shiftpos = istop-n_deflated-n_undeflated+1 ! shuffle shifts to put double shifts in front ! this ensures that we don't split up a double shift do i = shiftpos, shiftpos+n_undeflated-1, 2 if( alphai( i )/=-alphai( i+1 ) ) then swap = alphar( i ) alphar( i ) = alphar( i+1 ) alphar( i+1 ) = alphar( i+2 ) alphar( i+2 ) = swap swap = alphai( i ) alphai( i ) = alphai( i+1 ) alphai( i+1 ) = alphai( i+2 ) alphai( i+2 ) = swap swap = beta( i ) beta( i ) = beta( i+1 ) beta( i+1 ) = beta( i+2 ) beta( i+2 ) = swap end if end do if ( mod( ld, 6_${ik}$ ) == 0_${ik}$ ) then ! exceptional shift. chosen for no particularly good reason. if( ( real( maxit,KIND=sp)*safmin )*abs( a( istop,istop-1 ) )<abs( a( istop-1, & istop-1 ) ) ) then eshift = a( istop, istop-1 )/b( istop-1, istop-1 ) else eshift = eshift+one/( safmin*real( maxit,KIND=sp) ) end if alphar( shiftpos ) = one alphar( shiftpos+1 ) = zero alphai( shiftpos ) = zero alphai( shiftpos+1 ) = zero beta( shiftpos ) = eshift beta( shiftpos+1 ) = eshift ns = 2_${ik}$ end if ! time for a qz sweep call stdlib${ii}$_slaqz4( ilschur, ilq, ilz, n, istart2, istop, ns, nblock,alphar( & shiftpos ), alphai( shiftpos ),beta( shiftpos ), a, lda, b, ldb, q, ldq, z, ldz,& work, nblock, work( nblock**2_${ik}$+1 ), nblock,work( 2_${ik}$*nblock**2_${ik}$+1 ), lwork-2*nblock**2_${ik}$,& sweep_info ) end do ! call stdlib${ii}$_shgeqz to normalize the eigenvalue blocks and set the eigenvalues ! if all the eigenvalues have been found, stdlib${ii}$_shgeqz will not do any iterations ! and only normalize the blocks. in case of a rare convergence failure, ! the single shift might perform better. 80 continue call stdlib${ii}$_shgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & beta, q, ldq, z, ldz, work, lwork,norm_info ) info = norm_info end subroutine stdlib${ii}$_slaqz0 recursive module subroutine stdlib${ii}$_dlaqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, alphar, & !! DLAQZ0 computes the eigenvalues of a real matrix pair (H,T), !! where H is an upper Hessenberg matrix and T is upper triangular, !! using the double-shift QZ method. !! Matrix pairs of this type are produced by the reduction to !! generalized upper Hessenberg form of a real matrix pair (A,B): !! A = Q1*H*Z1**T, B = Q1*T*Z1**T, !! as computed by DGGHRD. !! If JOB='S', then the Hessenberg-triangular pair (H,T) is !! also reduced to generalized Schur form, !! H = Q*S*Z**T, T = Q*P*Z**T, !! where Q and Z are orthogonal matrices, P is an upper triangular !! matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2 !! diagonal blocks. !! The 1-by-1 blocks correspond to real eigenvalues of the matrix pair !! (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of !! eigenvalues. !! Additionally, the 2-by-2 upper triangular diagonal blocks of P !! corresponding to 2-by-2 blocks of S are reduced to positive diagonal !! form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0, !! P(j,j) > 0, and P(j+1,j+1) > 0. !! Optionally, the orthogonal matrix Q from the generalized Schur !! factorization may be postmultiplied into an input matrix Q1, and the !! orthogonal matrix Z may be postmultiplied into an input matrix Z1. !! If Q1 and Z1 are the orthogonal matrices from DGGHRD that reduced !! the matrix pair (A,B) to generalized upper Hessenberg form, then the !! output matrices Q1*Q and Z1*Z are the orthogonal factors from the !! generalized Schur factorization of (A,B): !! A = (Q1*Q)*S*(Z1*Z)**T, B = (Q1*Q)*P*(Z1*Z)**T. !! To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, !! of (A,B)) are computed as a pair of values (alpha,beta), where alpha is !! complex and beta real. !! If beta is nonzero, lambda = alpha / beta is an eigenvalue of the !! generalized nonsymmetric eigenvalue problem (GNEP) !! A*x = lambda*B*x !! and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the !! alternate form of the GNEP !! mu*A*y = B*y. !! Real eigenvalues can be read directly from the generalized Schur !! form: !! alpha = S(i,i), beta = P(i,i). !! Ref: C.B. Moler !! Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), !! pp. 241--256. !! Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ !! Algorithm with Aggressive Early Deflation", SIAM J. Numer. !! Anal., 29(2006), pp. 199--227. !! Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift, !! multipole rational QZ method with agressive early deflation" alphai, beta,q, ldq, z, ldz, work, lwork, rec,info ) use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments character, intent( in ) :: wants, wantq, wantz integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,rec integer(${ik}$), intent( out ) :: info real(dp), intent( inout ) :: a( lda, * ), b( ldb, * ),q( ldq, * ), z( ldz, * ), alphar(& * ),alphai( * ), beta( * ), work( * ) ! ================================================================ ! local scalars real(dp) :: smlnum, ulp, eshift, safmin, safmax, c1, s1, temp, swap integer(${ik}$) :: istart, istop, iiter, maxit, istart2, k, ld, nshifts, nblock, nw, nmin,& nibble, n_undeflated, n_deflated, ns, sweep_info, shiftpos, lworkreq, k2, istartm, & istopm, iwants, iwantq, iwantz, norm_info, aed_info, nwr, nbr, nsr, itemp1, itemp2, & rcost, i logical(lk) :: ilschur, ilq, ilz character(len=3_${ik}$) :: jbcmpz if( stdlib_lsame( wants, 'E' ) ) then ilschur = .false. iwants = 1_${ik}$ else if( stdlib_lsame( wants, 'S' ) ) then ilschur = .true. iwants = 2_${ik}$ else iwants = 0_${ik}$ end if if( stdlib_lsame( wantq, 'N' ) ) then ilq = .false. iwantq = 1_${ik}$ else if( stdlib_lsame( wantq, 'V' ) ) then ilq = .true. iwantq = 2_${ik}$ else if( stdlib_lsame( wantq, 'I' ) ) then ilq = .true. iwantq = 3_${ik}$ else iwantq = 0_${ik}$ end if if( stdlib_lsame( wantz, 'N' ) ) then ilz = .false. iwantz = 1_${ik}$ else if( stdlib_lsame( wantz, 'V' ) ) then ilz = .true. iwantz = 2_${ik}$ else if( stdlib_lsame( wantz, 'I' ) ) then ilz = .true. iwantz = 3_${ik}$ else iwantz = 0_${ik}$ end if ! check argument values info = 0_${ik}$ if( iwants==0_${ik}$ ) then info = -1_${ik}$ else if( iwantq==0_${ik}$ ) then info = -2_${ik}$ else if( iwantz==0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( ilo<1_${ik}$ ) then info = -5_${ik}$ else if( ihi>n .or. ihi<ilo-1 ) then info = -6_${ik}$ else if( lda<n ) then info = -8_${ik}$ else if( ldb<n ) then info = -10_${ik}$ else if( ldq<1_${ik}$ .or. ( ilq .and. ldq<n ) ) then info = -15_${ik}$ else if( ldz<1_${ik}$ .or. ( ilz .and. ldz<n ) ) then info = -17_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLAQZ0', -info ) return end if ! quick return if possible if( n<=0_${ik}$ ) then work( 1_${ik}$ ) = real( 1_${ik}$,KIND=dp) return end if ! get the parameters jbcmpz( 1_${ik}$:1_${ik}$ ) = wants jbcmpz( 2_${ik}$:2_${ik}$ ) = wantq jbcmpz( 3_${ik}$:3_${ik}$ ) = wantz nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nwr = max( 2_${ik}$, nwr ) nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr ) nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nsr = min( nsr, ( n+6 ) / 9_${ik}$, ihi-ilo ) nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) ) rcost = stdlib${ii}$_ilaenv( 17_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork ) itemp1 = int( nsr/sqrt( 1_${ik}$+2*nsr/( real( rcost,KIND=dp)/100_${ik}$*n ) ),KIND=${ik}$) itemp1 = ( ( itemp1-1 )/4_${ik}$ )*4_${ik}$+4 nbr = nsr+itemp1 if( n < nmin .or. rec >= 2_${ik}$ ) then call stdlib${ii}$_dhgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alphar, alphai,& beta, q, ldq, z, ldz, work,lwork, info ) return end if ! find out required workspace ! workspace query to stdlib${ii}$_dlaqz3 nw = max( nwr, nmin ) call stdlib${ii}$_dlaqz3( ilschur, ilq, ilz, n, ilo, ihi, nw, a, lda, b, ldb,q, ldq, z, ldz, & n_undeflated, n_deflated, alphar,alphai, beta, work, nw, work, nw, work, -1_${ik}$, rec,& aed_info ) itemp1 = int( work( 1_${ik}$ ),KIND=${ik}$) ! workspace query to stdlib${ii}$_dlaqz4 call stdlib${ii}$_dlaqz4( ilschur, ilq, ilz, n, ilo, ihi, nsr, nbr, alphar,alphai, beta, a, & lda, b, ldb, q, ldq, z, ldz, work,nbr, work, nbr, work, -1_${ik}$, sweep_info ) itemp2 = int( work( 1_${ik}$ ),KIND=${ik}$) lworkreq = max( itemp1+2*nw**2_${ik}$, itemp2+2*nbr**2_${ik}$ ) if ( lwork ==-1_${ik}$ ) then work( 1_${ik}$ ) = real( lworkreq,KIND=dp) return else if ( lwork < lworkreq ) then info = -19_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLAQZ0', info ) return end if ! initialize q and z if( iwantq==3_${ik}$ ) call stdlib${ii}$_dlaset( 'FULL', n, n, zero, one, q, ldq ) if( iwantz==3_${ik}$ ) call stdlib${ii}$_dlaset( 'FULL', n, n, zero, one, z, ldz ) ! get machine constants safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' ) safmax = one/safmin call stdlib${ii}$_dlabad( safmin, safmax ) ulp = stdlib${ii}$_dlamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=dp)/ulp ) istart = ilo istop = ihi maxit = 3_${ik}$*( ihi-ilo+1 ) ld = 0_${ik}$ do iiter = 1, maxit if( iiter >= maxit ) then info = istop+1 goto 80 end if if ( istart+1 >= istop ) then istop = istart exit end if ! check deflations at the end if ( abs( a( istop-1, istop-2 ) ) <= max( smlnum,ulp*( abs( a( istop-1, istop-1 ) )+& abs( a( istop-2,istop-2 ) ) ) ) ) then a( istop-1, istop-2 ) = zero istop = istop-2 ld = 0_${ik}$ eshift = zero else if ( abs( a( istop, istop-1 ) ) <= max( smlnum,ulp*( abs( a( istop, istop ) )+& abs( a( istop-1,istop-1 ) ) ) ) ) then a( istop, istop-1 ) = zero istop = istop-1 ld = 0_${ik}$ eshift = zero end if ! check deflations at the start if ( abs( a( istart+2, istart+1 ) ) <= max( smlnum,ulp*( abs( a( istart+1, istart+1 & ) )+abs( a( istart+2,istart+2 ) ) ) ) ) then a( istart+2, istart+1 ) = zero istart = istart+2 ld = 0_${ik}$ eshift = zero else if ( abs( a( istart+1, istart ) ) <= max( smlnum,ulp*( abs( a( istart, istart )& )+abs( a( istart+1,istart+1 ) ) ) ) ) then a( istart+1, istart ) = zero istart = istart+1 ld = 0_${ik}$ eshift = zero end if if ( istart+1 >= istop ) then exit end if ! check interior deflations istart2 = istart do k = istop, istart+1, -1 if ( abs( a( k, k-1 ) ) <= max( smlnum, ulp*( abs( a( k,k ) )+abs( a( k-1, k-1 ) & ) ) ) ) then a( k, k-1 ) = zero istart2 = k exit end if end do ! get range to apply rotations to if ( ilschur ) then istartm = 1_${ik}$ istopm = n else istartm = istart2 istopm = istop end if ! check infinite eigenvalues, this is done without blocking so might ! slow down the method when many infinite eigenvalues are present k = istop do while ( k>=istart2 ) temp = zero if( k < istop ) then temp = temp+abs( b( k, k+1 ) ) end if if( k > istart2 ) then temp = temp+abs( b( k-1, k ) ) end if if( abs( b( k, k ) ) < max( smlnum, ulp*temp ) ) then ! a diagonal element of b is negligable, move it ! to the top and deflate it do k2 = k, istart2+1, -1 call stdlib${ii}$_dlartg( b( k2-1, k2 ), b( k2-1, k2-1 ), c1, s1,temp ) b( k2-1, k2 ) = temp b( k2-1, k2-1 ) = zero call stdlib${ii}$_drot( k2-2-istartm+1, b( istartm, k2 ), 1_${ik}$,b( istartm, k2-1 ), & 1_${ik}$, c1, s1 ) call stdlib${ii}$_drot( min( k2+1, istop )-istartm+1, a( istartm,k2 ), 1_${ik}$, a( & istartm, k2-1 ), 1_${ik}$, c1, s1 ) if ( ilz ) then call stdlib${ii}$_drot( n, z( 1_${ik}$, k2 ), 1_${ik}$, z( 1_${ik}$, k2-1 ), 1_${ik}$, c1,s1 ) end if if( k2<istop ) then call stdlib${ii}$_dlartg( a( k2, k2-1 ), a( k2+1, k2-1 ), c1,s1, temp ) a( k2, k2-1 ) = temp a( k2+1, k2-1 ) = zero call stdlib${ii}$_drot( istopm-k2+1, a( k2, k2 ), lda, a( k2+1,k2 ), lda, c1, & s1 ) call stdlib${ii}$_drot( istopm-k2+1, b( k2, k2 ), ldb, b( k2+1,k2 ), ldb, c1, & s1 ) if( ilq ) then call stdlib${ii}$_drot( n, q( 1_${ik}$, k2 ), 1_${ik}$, q( 1_${ik}$, k2+1 ), 1_${ik}$,c1, s1 ) end if end if end do if( istart2<istop )then call stdlib${ii}$_dlartg( a( istart2, istart2 ), a( istart2+1,istart2 ), c1, s1, & temp ) a( istart2, istart2 ) = temp a( istart2+1, istart2 ) = zero call stdlib${ii}$_drot( istopm-( istart2+1 )+1_${ik}$, a( istart2,istart2+1 ), lda, a( & istart2+1,istart2+1 ), lda, c1, s1 ) call stdlib${ii}$_drot( istopm-( istart2+1 )+1_${ik}$, b( istart2,istart2+1 ), ldb, b( & istart2+1,istart2+1 ), ldb, c1, s1 ) if( ilq ) then call stdlib${ii}$_drot( n, q( 1_${ik}$, istart2 ), 1_${ik}$, q( 1_${ik}$,istart2+1 ), 1_${ik}$, c1, s1 ) end if end if istart2 = istart2+1 end if k = k-1 end do ! istart2 now points to the top of the bottom right ! unreduced hessenberg block if ( istart2 >= istop ) then istop = istart2-1 ld = 0_${ik}$ eshift = zero cycle end if nw = nwr nshifts = nsr nblock = nbr if ( istop-istart2+1 < nmin ) then ! setting nw to the size of the subblock will make aed deflate ! all the eigenvalues. this is slightly more efficient than just ! using stdlib${ii}$_dhgeqz because the off diagonal part gets updated via blas. if ( istop-istart+1 < nmin ) then nw = istop-istart+1 istart2 = istart else nw = istop-istart2+1 end if end if ! time for aed call stdlib${ii}$_dlaqz3( ilschur, ilq, ilz, n, istart2, istop, nw, a, lda,b, ldb, q, ldq,& z, ldz, n_undeflated, n_deflated,alphar, alphai, beta, work, nw, work( nw**2_${ik}$+1 ),& nw, work( 2_${ik}$*nw**2_${ik}$+1 ), lwork-2*nw**2_${ik}$, rec,aed_info ) if ( n_deflated > 0_${ik}$ ) then istop = istop-n_deflated ld = 0_${ik}$ eshift = zero end if if ( 100_${ik}$*n_deflated > nibble*( n_deflated+n_undeflated ) .or.istop-istart2+1 < nmin & ) then ! aed has uncovered many eigenvalues. skip a qz sweep and run ! aed again. cycle end if ld = ld+1 ns = min( nshifts, istop-istart2 ) ns = min( ns, n_undeflated ) shiftpos = istop-n_deflated-n_undeflated+1 ! shuffle shifts to put double shifts in front ! this ensures that we don't split up a double shift do i = shiftpos, shiftpos+n_undeflated-1, 2 if( alphai( i )/=-alphai( i+1 ) ) then swap = alphar( i ) alphar( i ) = alphar( i+1 ) alphar( i+1 ) = alphar( i+2 ) alphar( i+2 ) = swap swap = alphai( i ) alphai( i ) = alphai( i+1 ) alphai( i+1 ) = alphai( i+2 ) alphai( i+2 ) = swap swap = beta( i ) beta( i ) = beta( i+1 ) beta( i+1 ) = beta( i+2 ) beta( i+2 ) = swap end if end do if ( mod( ld, 6_${ik}$ ) == 0_${ik}$ ) then ! exceptional shift. chosen for no particularly good reason. if( ( real( maxit,KIND=dp)*safmin )*abs( a( istop,istop-1 ) )<abs( a( istop-1, & istop-1 ) ) ) then eshift = a( istop, istop-1 )/b( istop-1, istop-1 ) else eshift = eshift+one/( safmin*real( maxit,KIND=dp) ) end if alphar( shiftpos ) = one alphar( shiftpos+1 ) = zero alphai( shiftpos ) = zero alphai( shiftpos+1 ) = zero beta( shiftpos ) = eshift beta( shiftpos+1 ) = eshift ns = 2_${ik}$ end if ! time for a qz sweep call stdlib${ii}$_dlaqz4( ilschur, ilq, ilz, n, istart2, istop, ns, nblock,alphar( & shiftpos ), alphai( shiftpos ),beta( shiftpos ), a, lda, b, ldb, q, ldq, z, ldz,& work, nblock, work( nblock**2_${ik}$+1 ), nblock,work( 2_${ik}$*nblock**2_${ik}$+1 ), lwork-2*nblock**2_${ik}$,& sweep_info ) end do ! call stdlib${ii}$_dhgeqz to normalize the eigenvalue blocks and set the eigenvalues ! if all the eigenvalues have been found, stdlib${ii}$_dhgeqz will not do any iterations ! and only normalize the blocks. in case of a rare convergence failure, ! the single shift might perform better. 80 call stdlib${ii}$_dhgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & beta, q, ldq, z, ldz, work, lwork,norm_info ) info = norm_info end subroutine stdlib${ii}$_dlaqz0 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] recursive module subroutine stdlib${ii}$_${ri}$laqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, alphar, & !! DLAQZ0: computes the eigenvalues of a real matrix pair (H,T), !! where H is an upper Hessenberg matrix and T is upper triangular, !! using the double-shift QZ method. !! Matrix pairs of this type are produced by the reduction to !! generalized upper Hessenberg form of a real matrix pair (A,B): !! A = Q1*H*Z1**T, B = Q1*T*Z1**T, !! as computed by DGGHRD. !! If JOB='S', then the Hessenberg-triangular pair (H,T) is !! also reduced to generalized Schur form, !! H = Q*S*Z**T, T = Q*P*Z**T, !! where Q and Z are orthogonal matrices, P is an upper triangular !! matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2 !! diagonal blocks. !! The 1-by-1 blocks correspond to real eigenvalues of the matrix pair !! (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of !! eigenvalues. !! Additionally, the 2-by-2 upper triangular diagonal blocks of P !! corresponding to 2-by-2 blocks of S are reduced to positive diagonal !! form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0, !! P(j,j) > 0, and P(j+1,j+1) > 0. !! Optionally, the orthogonal matrix Q from the generalized Schur !! factorization may be postmultiplied into an input matrix Q1, and the !! orthogonal matrix Z may be postmultiplied into an input matrix Z1. !! If Q1 and Z1 are the orthogonal matrices from DGGHRD that reduced !! the matrix pair (A,B) to generalized upper Hessenberg form, then the !! output matrices Q1*Q and Z1*Z are the orthogonal factors from the !! generalized Schur factorization of (A,B): !! A = (Q1*Q)*S*(Z1*Z)**T, B = (Q1*Q)*P*(Z1*Z)**T. !! To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, !! of (A,B)) are computed as a pair of values (alpha,beta), where alpha is !! complex and beta real. !! If beta is nonzero, lambda = alpha / beta is an eigenvalue of the !! generalized nonsymmetric eigenvalue problem (GNEP) !! A*x = lambda*B*x !! and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the !! alternate form of the GNEP !! mu*A*y = B*y. !! Real eigenvalues can be read directly from the generalized Schur !! form: !! alpha = S(i,i), beta = P(i,i). !! Ref: C.B. Moler !! Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), !! pp. 241--256. !! Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ !! Algorithm with Aggressive Early Deflation", SIAM J. Numer. !! Anal., 29(2006), pp. 199--227. !! Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift, !! multipole rational QZ method with agressive early deflation" alphai, beta,q, ldq, z, ldz, work, lwork, rec,info ) use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments character, intent( in ) :: wants, wantq, wantz integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,rec integer(${ik}$), intent( out ) :: info real(${rk}$), intent( inout ) :: a( lda, * ), b( ldb, * ),q( ldq, * ), z( ldz, * ), alphar(& * ),alphai( * ), beta( * ), work( * ) ! ================================================================ ! local scalars real(${rk}$) :: smlnum, ulp, eshift, safmin, safmax, c1, s1, temp, swap integer(${ik}$) :: istart, istop, iiter, maxit, istart2, k, ld, nshifts, nblock, nw, nmin,& nibble, n_undeflated, n_qeflated, ns, sweep_info, shiftpos, lworkreq, k2, istartm, & istopm, iwants, iwantq, iwantz, norm_info, aed_info, nwr, nbr, nsr, itemp1, itemp2, & rcost, i logical(lk) :: ilschur, ilq, ilz character(len=3_${ik}$) :: jbcmpz if( stdlib_lsame( wants, 'E' ) ) then ilschur = .false. iwants = 1_${ik}$ else if( stdlib_lsame( wants, 'S' ) ) then ilschur = .true. iwants = 2_${ik}$ else iwants = 0_${ik}$ end if if( stdlib_lsame( wantq, 'N' ) ) then ilq = .false. iwantq = 1_${ik}$ else if( stdlib_lsame( wantq, 'V' ) ) then ilq = .true. iwantq = 2_${ik}$ else if( stdlib_lsame( wantq, 'I' ) ) then ilq = .true. iwantq = 3_${ik}$ else iwantq = 0_${ik}$ end if if( stdlib_lsame( wantz, 'N' ) ) then ilz = .false. iwantz = 1_${ik}$ else if( stdlib_lsame( wantz, 'V' ) ) then ilz = .true. iwantz = 2_${ik}$ else if( stdlib_lsame( wantz, 'I' ) ) then ilz = .true. iwantz = 3_${ik}$ else iwantz = 0_${ik}$ end if ! check argument values info = 0_${ik}$ if( iwants==0_${ik}$ ) then info = -1_${ik}$ else if( iwantq==0_${ik}$ ) then info = -2_${ik}$ else if( iwantz==0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( ilo<1_${ik}$ ) then info = -5_${ik}$ else if( ihi>n .or. ihi<ilo-1 ) then info = -6_${ik}$ else if( lda<n ) then info = -8_${ik}$ else if( ldb<n ) then info = -10_${ik}$ else if( ldq<1_${ik}$ .or. ( ilq .and. ldq<n ) ) then info = -15_${ik}$ else if( ldz<1_${ik}$ .or. ( ilz .and. ldz<n ) ) then info = -17_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLAQZ0', -info ) return end if ! quick return if possible if( n<=0_${ik}$ ) then work( 1_${ik}$ ) = real( 1_${ik}$,KIND=${rk}$) return end if ! get the parameters jbcmpz( 1_${ik}$:1_${ik}$ ) = wants jbcmpz( 2_${ik}$:2_${ik}$ ) = wantq jbcmpz( 3_${ik}$:3_${ik}$ ) = wantz nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nwr = max( 2_${ik}$, nwr ) nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr ) nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nsr = min( nsr, ( n+6 ) / 9_${ik}$, ihi-ilo ) nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) ) rcost = stdlib${ii}$_ilaenv( 17_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork ) itemp1 = int( nsr/sqrt( 1_${ik}$+2*nsr/( real( rcost,KIND=${rk}$)/100_${ik}$*n ) ),KIND=${ik}$) itemp1 = ( ( itemp1-1 )/4_${ik}$ )*4_${ik}$+4 nbr = nsr+itemp1 if( n < nmin .or. rec >= 2_${ik}$ ) then call stdlib${ii}$_${ri}$hgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alphar, alphai,& beta, q, ldq, z, ldz, work,lwork, info ) return end if ! find out required workspace ! workspace query to stdlib${ii}$_${ri}$laqz3 nw = max( nwr, nmin ) call stdlib${ii}$_${ri}$laqz3( ilschur, ilq, ilz, n, ilo, ihi, nw, a, lda, b, ldb,q, ldq, z, ldz, & n_undeflated, n_qeflated, alphar,alphai, beta, work, nw, work, nw, work, -1_${ik}$, rec,& aed_info ) itemp1 = int( work( 1_${ik}$ ),KIND=${ik}$) ! workspace query to stdlib${ii}$_${ri}$laqz4 call stdlib${ii}$_${ri}$laqz4( ilschur, ilq, ilz, n, ilo, ihi, nsr, nbr, alphar,alphai, beta, a, & lda, b, ldb, q, ldq, z, ldz, work,nbr, work, nbr, work, -1_${ik}$, sweep_info ) itemp2 = int( work( 1_${ik}$ ),KIND=${ik}$) lworkreq = max( itemp1+2*nw**2_${ik}$, itemp2+2*nbr**2_${ik}$ ) if ( lwork ==-1_${ik}$ ) then work( 1_${ik}$ ) = real( lworkreq,KIND=${rk}$) return else if ( lwork < lworkreq ) then info = -19_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLAQZ0', info ) return end if ! initialize q and z if( iwantq==3_${ik}$ ) call stdlib${ii}$_${ri}$laset( 'FULL', n, n, zero, one, q, ldq ) if( iwantz==3_${ik}$ ) call stdlib${ii}$_${ri}$laset( 'FULL', n, n, zero, one, z, ldz ) ! get machine constants safmin = stdlib${ii}$_${ri}$lamch( 'SAFE MINIMUM' ) safmax = one/safmin call stdlib${ii}$_${ri}$labad( safmin, safmax ) ulp = stdlib${ii}$_${ri}$lamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=${rk}$)/ulp ) istart = ilo istop = ihi maxit = 3_${ik}$*( ihi-ilo+1 ) ld = 0_${ik}$ do iiter = 1, maxit if( iiter >= maxit ) then info = istop+1 goto 80 end if if ( istart+1 >= istop ) then istop = istart exit end if ! check deflations at the end if ( abs( a( istop-1, istop-2 ) ) <= max( smlnum,ulp*( abs( a( istop-1, istop-1 ) )+& abs( a( istop-2,istop-2 ) ) ) ) ) then a( istop-1, istop-2 ) = zero istop = istop-2 ld = 0_${ik}$ eshift = zero else if ( abs( a( istop, istop-1 ) ) <= max( smlnum,ulp*( abs( a( istop, istop ) )+& abs( a( istop-1,istop-1 ) ) ) ) ) then a( istop, istop-1 ) = zero istop = istop-1 ld = 0_${ik}$ eshift = zero end if ! check deflations at the start if ( abs( a( istart+2, istart+1 ) ) <= max( smlnum,ulp*( abs( a( istart+1, istart+1 & ) )+abs( a( istart+2,istart+2 ) ) ) ) ) then a( istart+2, istart+1 ) = zero istart = istart+2 ld = 0_${ik}$ eshift = zero else if ( abs( a( istart+1, istart ) ) <= max( smlnum,ulp*( abs( a( istart, istart )& )+abs( a( istart+1,istart+1 ) ) ) ) ) then a( istart+1, istart ) = zero istart = istart+1 ld = 0_${ik}$ eshift = zero end if if ( istart+1 >= istop ) then exit end if ! check interior deflations istart2 = istart do k = istop, istart+1, -1 if ( abs( a( k, k-1 ) ) <= max( smlnum, ulp*( abs( a( k,k ) )+abs( a( k-1, k-1 ) & ) ) ) ) then a( k, k-1 ) = zero istart2 = k exit end if end do ! get range to apply rotations to if ( ilschur ) then istartm = 1_${ik}$ istopm = n else istartm = istart2 istopm = istop end if ! check infinite eigenvalues, this is done without blocking so might ! slow down the method when many infinite eigenvalues are present k = istop do while ( k>=istart2 ) temp = zero if( k < istop ) then temp = temp+abs( b( k, k+1 ) ) end if if( k > istart2 ) then temp = temp+abs( b( k-1, k ) ) end if if( abs( b( k, k ) ) < max( smlnum, ulp*temp ) ) then ! a diagonal element of b is negligable, move it ! to the top and deflate it do k2 = k, istart2+1, -1 call stdlib${ii}$_${ri}$lartg( b( k2-1, k2 ), b( k2-1, k2-1 ), c1, s1,temp ) b( k2-1, k2 ) = temp b( k2-1, k2-1 ) = zero call stdlib${ii}$_${ri}$rot( k2-2-istartm+1, b( istartm, k2 ), 1_${ik}$,b( istartm, k2-1 ), & 1_${ik}$, c1, s1 ) call stdlib${ii}$_${ri}$rot( min( k2+1, istop )-istartm+1, a( istartm,k2 ), 1_${ik}$, a( & istartm, k2-1 ), 1_${ik}$, c1, s1 ) if ( ilz ) then call stdlib${ii}$_${ri}$rot( n, z( 1_${ik}$, k2 ), 1_${ik}$, z( 1_${ik}$, k2-1 ), 1_${ik}$, c1,s1 ) end if if( k2<istop ) then call stdlib${ii}$_${ri}$lartg( a( k2, k2-1 ), a( k2+1, k2-1 ), c1,s1, temp ) a( k2, k2-1 ) = temp a( k2+1, k2-1 ) = zero call stdlib${ii}$_${ri}$rot( istopm-k2+1, a( k2, k2 ), lda, a( k2+1,k2 ), lda, c1, & s1 ) call stdlib${ii}$_${ri}$rot( istopm-k2+1, b( k2, k2 ), ldb, b( k2+1,k2 ), ldb, c1, & s1 ) if( ilq ) then call stdlib${ii}$_${ri}$rot( n, q( 1_${ik}$, k2 ), 1_${ik}$, q( 1_${ik}$, k2+1 ), 1_${ik}$,c1, s1 ) end if end if end do if( istart2<istop )then call stdlib${ii}$_${ri}$lartg( a( istart2, istart2 ), a( istart2+1,istart2 ), c1, s1, & temp ) a( istart2, istart2 ) = temp a( istart2+1, istart2 ) = zero call stdlib${ii}$_${ri}$rot( istopm-( istart2+1 )+1_${ik}$, a( istart2,istart2+1 ), lda, a( & istart2+1,istart2+1 ), lda, c1, s1 ) call stdlib${ii}$_${ri}$rot( istopm-( istart2+1 )+1_${ik}$, b( istart2,istart2+1 ), ldb, b( & istart2+1,istart2+1 ), ldb, c1, s1 ) if( ilq ) then call stdlib${ii}$_${ri}$rot( n, q( 1_${ik}$, istart2 ), 1_${ik}$, q( 1_${ik}$,istart2+1 ), 1_${ik}$, c1, s1 ) end if end if istart2 = istart2+1 end if k = k-1 end do ! istart2 now points to the top of the bottom right ! unreduced hessenberg block if ( istart2 >= istop ) then istop = istart2-1 ld = 0_${ik}$ eshift = zero cycle end if nw = nwr nshifts = nsr nblock = nbr if ( istop-istart2+1 < nmin ) then ! setting nw to the size of the subblock will make aed deflate ! all the eigenvalues. this is slightly more efficient than just ! using stdlib${ii}$_${ri}$hgeqz because the off diagonal part gets updated via blas. if ( istop-istart+1 < nmin ) then nw = istop-istart+1 istart2 = istart else nw = istop-istart2+1 end if end if ! time for aed call stdlib${ii}$_${ri}$laqz3( ilschur, ilq, ilz, n, istart2, istop, nw, a, lda,b, ldb, q, ldq,& z, ldz, n_undeflated, n_qeflated,alphar, alphai, beta, work, nw, work( nw**2_${ik}$+1 ),& nw, work( 2_${ik}$*nw**2_${ik}$+1 ), lwork-2*nw**2_${ik}$, rec,aed_info ) if ( n_qeflated > 0_${ik}$ ) then istop = istop-n_qeflated ld = 0_${ik}$ eshift = zero end if if ( 100_${ik}$*n_qeflated > nibble*( n_qeflated+n_undeflated ) .or.istop-istart2+1 < nmin & ) then ! aed has uncovered many eigenvalues. skip a qz sweep and run ! aed again. cycle end if ld = ld+1 ns = min( nshifts, istop-istart2 ) ns = min( ns, n_undeflated ) shiftpos = istop-n_qeflated-n_undeflated+1 ! shuffle shifts to put double shifts in front ! this ensures that we don't split up a double shift do i = shiftpos, shiftpos+n_undeflated-1, 2 if( alphai( i )/=-alphai( i+1 ) ) then swap = alphar( i ) alphar( i ) = alphar( i+1 ) alphar( i+1 ) = alphar( i+2 ) alphar( i+2 ) = swap swap = alphai( i ) alphai( i ) = alphai( i+1 ) alphai( i+1 ) = alphai( i+2 ) alphai( i+2 ) = swap swap = beta( i ) beta( i ) = beta( i+1 ) beta( i+1 ) = beta( i+2 ) beta( i+2 ) = swap end if end do if ( mod( ld, 6_${ik}$ ) == 0_${ik}$ ) then ! exceptional shift. chosen for no particularly good reason. if( ( real( maxit,KIND=${rk}$)*safmin )*abs( a( istop,istop-1 ) )<abs( a( istop-1, & istop-1 ) ) ) then eshift = a( istop, istop-1 )/b( istop-1, istop-1 ) else eshift = eshift+one/( safmin*real( maxit,KIND=${rk}$) ) end if alphar( shiftpos ) = one alphar( shiftpos+1 ) = zero alphai( shiftpos ) = zero alphai( shiftpos+1 ) = zero beta( shiftpos ) = eshift beta( shiftpos+1 ) = eshift ns = 2_${ik}$ end if ! time for a qz sweep call stdlib${ii}$_${ri}$laqz4( ilschur, ilq, ilz, n, istart2, istop, ns, nblock,alphar( & shiftpos ), alphai( shiftpos ),beta( shiftpos ), a, lda, b, ldb, q, ldq, z, ldz,& work, nblock, work( nblock**2_${ik}$+1 ), nblock,work( 2_${ik}$*nblock**2_${ik}$+1 ), lwork-2*nblock**2_${ik}$,& sweep_info ) end do ! call stdlib${ii}$_${ri}$hgeqz to normalize the eigenvalue blocks and set the eigenvalues ! if all the eigenvalues have been found, stdlib${ii}$_${ri}$hgeqz will not do any iterations ! and only normalize the blocks. in case of a rare convergence failure, ! the single shift might perform better. 80 continue call stdlib${ii}$_${ri}$hgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, & beta, q, ldq, z, ldz, work, lwork,norm_info ) info = norm_info end subroutine stdlib${ii}$_${ri}$laqz0 #:endif #:endfor recursive module subroutine stdlib${ii}$_claqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, alpha, & !! CLAQZ0 computes the eigenvalues of a matrix pair (H,T), !! where H is an upper Hessenberg matrix and T is upper triangular, !! using the double-shift QZ method. !! Matrix pairs of this type are produced by the reduction to !! generalized upper Hessenberg form of a matrix pair (A,B): !! A = Q1*H*Z1**H, B = Q1*T*Z1**H, !! as computed by CGGHRD. !! If JOB='S', then the Hessenberg-triangular pair (H,T) is !! also reduced to generalized Schur form, !! H = Q*S*Z**H, T = Q*P*Z**H, !! where Q and Z are unitary matrices, P and S are an upper triangular !! matrices. !! Optionally, the unitary matrix Q from the generalized Schur !! factorization may be postmultiplied into an input matrix Q1, and the !! unitary matrix Z may be postmultiplied into an input matrix Z1. !! If Q1 and Z1 are the unitary matrices from CGGHRD that reduced !! the matrix pair (A,B) to generalized upper Hessenberg form, then the !! output matrices Q1*Q and Z1*Z are the unitary factors from the !! generalized Schur factorization of (A,B): !! A = (Q1*Q)*S*(Z1*Z)**H, B = (Q1*Q)*P*(Z1*Z)**H. !! To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, !! of (A,B)) are computed as a pair of values (alpha,beta), where alpha is !! complex and beta real. !! If beta is nonzero, lambda = alpha / beta is an eigenvalue of the !! generalized nonsymmetric eigenvalue problem (GNEP) !! A*x = lambda*B*x !! and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the !! alternate form of the GNEP !! mu*A*y = B*y. !! Eigenvalues can be read directly from the generalized Schur !! form: !! alpha = S(i,i), beta = P(i,i). !! Ref: C.B. Moler !! Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), !! pp. 241--256. !! Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ !! Algorithm with Aggressive Early Deflation", SIAM J. Numer. !! Anal., 29(2006), pp. 199--227. !! Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift, !! multipole rational QZ method with agressive early deflation" beta, q, ldq, z,ldz, work, lwork, rwork, rec,info ) use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments character, intent( in ) :: wants, wantq, wantz integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,rec integer(${ik}$), intent( out ) :: info complex(sp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq, * ),z( ldz, * ), & alpha( * ), beta( * ), work( * ) real(sp), intent( out ) :: rwork( * ) ! ================================================================ ! local scalars real(sp) :: smlnum, ulp, safmin, safmax, c1, tempr complex(sp) :: eshift, s1, temp integer(${ik}$) :: istart, istop, iiter, maxit, istart2, k, ld, nshifts, nblock, nw, nmin,& nibble, n_undeflated, n_deflated, ns, sweep_info, shiftpos, lworkreq, k2, istartm, & istopm, iwants, iwantq, iwantz, norm_info, aed_info, nwr, nbr, nsr, itemp1, itemp2, & rcost logical(lk) :: ilschur, ilq, ilz character(len=3) :: jbcmpz if( stdlib_lsame( wants, 'E' ) ) then ilschur = .false. iwants = 1_${ik}$ else if( stdlib_lsame( wants, 'S' ) ) then ilschur = .true. iwants = 2_${ik}$ else iwants = 0_${ik}$ end if if( stdlib_lsame( wantq, 'N' ) ) then ilq = .false. iwantq = 1_${ik}$ else if( stdlib_lsame( wantq, 'V' ) ) then ilq = .true. iwantq = 2_${ik}$ else if( stdlib_lsame( wantq, 'I' ) ) then ilq = .true. iwantq = 3_${ik}$ else iwantq = 0_${ik}$ end if if( stdlib_lsame( wantz, 'N' ) ) then ilz = .false. iwantz = 1_${ik}$ else if( stdlib_lsame( wantz, 'V' ) ) then ilz = .true. iwantz = 2_${ik}$ else if( stdlib_lsame( wantz, 'I' ) ) then ilz = .true. iwantz = 3_${ik}$ else iwantz = 0_${ik}$ end if ! check argument values info = 0_${ik}$ if( iwants==0_${ik}$ ) then info = -1_${ik}$ else if( iwantq==0_${ik}$ ) then info = -2_${ik}$ else if( iwantz==0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( ilo<1_${ik}$ ) then info = -5_${ik}$ else if( ihi>n .or. ihi<ilo-1 ) then info = -6_${ik}$ else if( lda<n ) then info = -8_${ik}$ else if( ldb<n ) then info = -10_${ik}$ else if( ldq<1_${ik}$ .or. ( ilq .and. ldq<n ) ) then info = -15_${ik}$ else if( ldz<1_${ik}$ .or. ( ilz .and. ldz<n ) ) then info = -17_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CLAQZ0', -info ) return end if ! quick return if possible if( n<=0_${ik}$ ) then work( 1_${ik}$ ) = real( 1_${ik}$,KIND=sp) return end if ! get the parameters jbcmpz( 1_${ik}$:1_${ik}$ ) = wants jbcmpz( 2_${ik}$:2_${ik}$ ) = wantq jbcmpz( 3_${ik}$:3_${ik}$ ) = wantz nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'CLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'CLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nwr = max( 2_${ik}$, nwr ) nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr ) nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'CLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'CLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nsr = min( nsr, ( n+6 ) / 9_${ik}$, ihi-ilo ) nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) ) rcost = stdlib${ii}$_ilaenv( 17_${ik}$, 'CLAQZ0', jbcmpz, n, ilo, ihi, lwork ) itemp1 = int( nsr/sqrt( 1_${ik}$+2*nsr/( real( rcost,KIND=sp)/100_${ik}$*n ) ),KIND=${ik}$) itemp1 = ( ( itemp1-1 )/4_${ik}$ )*4_${ik}$+4 nbr = nsr+itemp1 if( n < nmin .or. rec >= 2_${ik}$ ) then call stdlib${ii}$_chgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alpha, beta, q,& ldq, z, ldz, work, lwork, rwork,info ) return end if ! find out required workspace ! workspace query to stdlib${ii}$_claqz2 nw = max( nwr, nmin ) call stdlib${ii}$_claqz2( ilschur, ilq, ilz, n, ilo, ihi, nw, a, lda, b, ldb,q, ldq, z, ldz, & n_undeflated, n_deflated, alpha,beta, work, nw, work, nw, work, -1_${ik}$, rwork, rec,& aed_info ) itemp1 = int( work( 1_${ik}$ ),KIND=${ik}$) ! workspace query to stdlib${ii}$_claqz3 call stdlib${ii}$_claqz3( ilschur, ilq, ilz, n, ilo, ihi, nsr, nbr, alpha,beta, a, lda, b, & ldb, q, ldq, z, ldz, work, nbr,work, nbr, work, -1_${ik}$, sweep_info ) itemp2 = int( work( 1_${ik}$ ),KIND=${ik}$) lworkreq = max( itemp1+2*nw**2_${ik}$, itemp2+2*nbr**2_${ik}$ ) if ( lwork ==-1_${ik}$ ) then work( 1_${ik}$ ) = real( lworkreq,KIND=sp) return else if ( lwork < lworkreq ) then info = -19_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CLAQZ0', info ) return end if ! initialize q and z if( iwantq==3_${ik}$ ) call stdlib${ii}$_claset( 'FULL', n, n, czero, cone, q,ldq ) if( iwantz==3_${ik}$ ) call stdlib${ii}$_claset( 'FULL', n, n, czero, cone, z,ldz ) ! get machine constants safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' ) safmax = one/safmin call stdlib${ii}$_slabad( safmin, safmax ) ulp = stdlib${ii}$_slamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=sp)/ulp ) istart = ilo istop = ihi maxit = 30_${ik}$*( ihi-ilo+1 ) ld = 0_${ik}$ do iiter = 1, maxit if( iiter >= maxit ) then info = istop+1 goto 80 end if if ( istart+1 >= istop ) then istop = istart exit end if ! check deflations at the end if ( abs( a( istop, istop-1 ) ) <= max( smlnum,ulp*( abs( a( istop, istop ) )+abs( & a( istop-1,istop-1 ) ) ) ) ) then a( istop, istop-1 ) = czero istop = istop-1 ld = 0_${ik}$ eshift = czero end if ! check deflations at the start if ( abs( a( istart+1, istart ) ) <= max( smlnum,ulp*( abs( a( istart, istart ) )+& abs( a( istart+1,istart+1 ) ) ) ) ) then a( istart+1, istart ) = czero istart = istart+1 ld = 0_${ik}$ eshift = czero end if if ( istart+1 >= istop ) then exit end if ! check interior deflations istart2 = istart do k = istop, istart+1, -1 if ( abs( a( k, k-1 ) ) <= max( smlnum, ulp*( abs( a( k,k ) )+abs( a( k-1, k-1 ) & ) ) ) ) then a( k, k-1 ) = czero istart2 = k exit end if end do ! get range to apply rotations to if ( ilschur ) then istartm = 1_${ik}$ istopm = n else istartm = istart2 istopm = istop end if ! check infinite eigenvalues, this is done without blocking so might ! slow down the method when many infinite eigenvalues are present k = istop do while ( k>=istart2 ) tempr = zero if( k < istop ) then tempr = tempr+abs( b( k, k+1 ) ) end if if( k > istart2 ) then tempr = tempr+abs( b( k-1, k ) ) end if if( abs( b( k, k ) ) < max( smlnum, ulp*tempr ) ) then ! a diagonal element of b is negligable, move it ! to the top and deflate it do k2 = k, istart2+1, -1 call stdlib${ii}$_clartg( b( k2-1, k2 ), b( k2-1, k2-1 ), c1, s1,temp ) b( k2-1, k2 ) = temp b( k2-1, k2-1 ) = czero call stdlib${ii}$_crot( k2-2-istartm+1, b( istartm, k2 ), 1_${ik}$,b( istartm, k2-1 ), & 1_${ik}$, c1, s1 ) call stdlib${ii}$_crot( min( k2+1, istop )-istartm+1, a( istartm,k2 ), 1_${ik}$, a( & istartm, k2-1 ), 1_${ik}$, c1, s1 ) if ( ilz ) then call stdlib${ii}$_crot( n, z( 1_${ik}$, k2 ), 1_${ik}$, z( 1_${ik}$, k2-1 ), 1_${ik}$, c1,s1 ) end if if( k2<istop ) then call stdlib${ii}$_clartg( a( k2, k2-1 ), a( k2+1, k2-1 ), c1,s1, temp ) a( k2, k2-1 ) = temp a( k2+1, k2-1 ) = czero call stdlib${ii}$_crot( istopm-k2+1, a( k2, k2 ), lda, a( k2+1,k2 ), lda, c1, & s1 ) call stdlib${ii}$_crot( istopm-k2+1, b( k2, k2 ), ldb, b( k2+1,k2 ), ldb, c1, & s1 ) if( ilq ) then call stdlib${ii}$_crot( n, q( 1_${ik}$, k2 ), 1_${ik}$, q( 1_${ik}$, k2+1 ), 1_${ik}$,c1, conjg( s1 ) ) end if end if end do if( istart2<istop )then call stdlib${ii}$_clartg( a( istart2, istart2 ), a( istart2+1,istart2 ), c1, s1, & temp ) a( istart2, istart2 ) = temp a( istart2+1, istart2 ) = czero call stdlib${ii}$_crot( istopm-( istart2+1 )+1_${ik}$, a( istart2,istart2+1 ), lda, a( & istart2+1,istart2+1 ), lda, c1, s1 ) call stdlib${ii}$_crot( istopm-( istart2+1 )+1_${ik}$, b( istart2,istart2+1 ), ldb, b( & istart2+1,istart2+1 ), ldb, c1, s1 ) if( ilq ) then call stdlib${ii}$_crot( n, q( 1_${ik}$, istart2 ), 1_${ik}$, q( 1_${ik}$,istart2+1 ), 1_${ik}$, c1, conjg(& s1 ) ) end if end if istart2 = istart2+1 end if k = k-1 end do ! istart2 now points to the top of the bottom right ! unreduced hessenberg block if ( istart2 >= istop ) then istop = istart2-1 ld = 0_${ik}$ eshift = czero cycle end if nw = nwr nshifts = nsr nblock = nbr if ( istop-istart2+1 < nmin ) then ! setting nw to the size of the subblock will make aed deflate ! all the eigenvalues. this is slightly more efficient than just ! using stdlib${ii}$_chgeqz because the off diagonal part gets updated via blas. if ( istop-istart+1 < nmin ) then nw = istop-istart+1 istart2 = istart else nw = istop-istart2+1 end if end if ! time for aed call stdlib${ii}$_claqz2( ilschur, ilq, ilz, n, istart2, istop, nw, a, lda,b, ldb, q, ldq,& z, ldz, n_undeflated, n_deflated,alpha, beta, work, nw, work( nw**2_${ik}$+1 ), nw,work( & 2_${ik}$*nw**2_${ik}$+1 ), lwork-2*nw**2_${ik}$, rwork, rec,aed_info ) if ( n_deflated > 0_${ik}$ ) then istop = istop-n_deflated ld = 0_${ik}$ eshift = czero end if if ( 100_${ik}$*n_deflated > nibble*( n_deflated+n_undeflated ) .or.istop-istart2+1 < nmin & ) then ! aed has uncovered many eigenvalues. skip a qz sweep and run ! aed again. cycle end if ld = ld+1 ns = min( nshifts, istop-istart2 ) ns = min( ns, n_undeflated ) shiftpos = istop-n_deflated-n_undeflated+1 if ( mod( ld, 6_${ik}$ ) == 0_${ik}$ ) then ! exceptional shift. chosen for no particularly good reason. if( ( real( maxit,KIND=sp)*safmin )*abs( a( istop,istop-1 ) )<abs( a( istop-1, & istop-1 ) ) ) then eshift = a( istop, istop-1 )/b( istop-1, istop-1 ) else eshift = eshift+cone/( safmin*real( maxit,KIND=sp) ) end if alpha( shiftpos ) = cone beta( shiftpos ) = eshift ns = 1_${ik}$ end if ! time for a qz sweep call stdlib${ii}$_claqz3( ilschur, ilq, ilz, n, istart2, istop, ns, nblock,alpha( & shiftpos ), beta( shiftpos ), a, lda, b,ldb, q, ldq, z, ldz, work, nblock, work( & nblock**2_${ik}$+1 ), nblock, work( 2_${ik}$*nblock**2_${ik}$+1 ),lwork-2*nblock**2_${ik}$, sweep_info ) end do ! call stdlib${ii}$_chgeqz to normalize the eigenvalue blocks and set the eigenvalues ! if all the eigenvalues have been found, stdlib${ii}$_chgeqz will not do any iterations ! and only normalize the blocks. in case of a rare convergence failure, ! the single shift might perform better. 80 call stdlib${ii}$_chgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alpha, beta, q, & ldq, z, ldz, work, lwork, rwork,norm_info ) info = norm_info end subroutine stdlib${ii}$_claqz0 recursive module subroutine stdlib${ii}$_zlaqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, alpha, & !! ZLAQZ0 computes the eigenvalues of a real matrix pair (H,T), !! where H is an upper Hessenberg matrix and T is upper triangular, !! using the double-shift QZ method. !! Matrix pairs of this type are produced by the reduction to !! generalized upper Hessenberg form of a real matrix pair (A,B): !! A = Q1*H*Z1**H, B = Q1*T*Z1**H, !! as computed by ZGGHRD. !! If JOB='S', then the Hessenberg-triangular pair (H,T) is !! also reduced to generalized Schur form, !! H = Q*S*Z**H, T = Q*P*Z**H, !! where Q and Z are unitary matrices, P and S are an upper triangular !! matrices. !! Optionally, the unitary matrix Q from the generalized Schur !! factorization may be postmultiplied into an input matrix Q1, and the !! unitary matrix Z may be postmultiplied into an input matrix Z1. !! If Q1 and Z1 are the unitary matrices from ZGGHRD that reduced !! the matrix pair (A,B) to generalized upper Hessenberg form, then the !! output matrices Q1*Q and Z1*Z are the unitary factors from the !! generalized Schur factorization of (A,B): !! A = (Q1*Q)*S*(Z1*Z)**H, B = (Q1*Q)*P*(Z1*Z)**H. !! To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, !! of (A,B)) are computed as a pair of values (alpha,beta), where alpha is !! complex and beta real. !! If beta is nonzero, lambda = alpha / beta is an eigenvalue of the !! generalized nonsymmetric eigenvalue problem (GNEP) !! A*x = lambda*B*x !! and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the !! alternate form of the GNEP !! mu*A*y = B*y. !! Eigenvalues can be read directly from the generalized Schur !! form: !! alpha = S(i,i), beta = P(i,i). !! Ref: C.B. Moler !! Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), !! pp. 241--256. !! Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ !! Algorithm with Aggressive Early Deflation", SIAM J. Numer. !! Anal., 29(2006), pp. 199--227. !! Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift, !! multipole rational QZ method with agressive early deflation" beta, q, ldq, z,ldz, work, lwork, rwork, rec,info ) use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments character, intent( in ) :: wants, wantq, wantz integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,rec integer(${ik}$), intent( out ) :: info complex(dp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq,* ), z( ldz, * ), & alpha( * ), beta( * ), work( * ) real(dp), intent( out ) :: rwork( * ) ! ================================================================ ! local scalars real(dp) :: smlnum, ulp, safmin, safmax, c1, tempr complex(dp) :: eshift, s1, temp integer(${ik}$) :: istart, istop, iiter, maxit, istart2, k, ld, nshifts, nblock, nw, nmin,& nibble, n_undeflated, n_deflated, ns, sweep_info, shiftpos, lworkreq, k2, istartm, & istopm, iwants, iwantq, iwantz, norm_info, aed_info, nwr, nbr, nsr, itemp1, itemp2, & rcost logical(lk) :: ilschur, ilq, ilz character(len=3) :: jbcmpz if( stdlib_lsame( wants, 'E' ) ) then ilschur = .false. iwants = 1_${ik}$ else if( stdlib_lsame( wants, 'S' ) ) then ilschur = .true. iwants = 2_${ik}$ else iwants = 0_${ik}$ end if if( stdlib_lsame( wantq, 'N' ) ) then ilq = .false. iwantq = 1_${ik}$ else if( stdlib_lsame( wantq, 'V' ) ) then ilq = .true. iwantq = 2_${ik}$ else if( stdlib_lsame( wantq, 'I' ) ) then ilq = .true. iwantq = 3_${ik}$ else iwantq = 0_${ik}$ end if if( stdlib_lsame( wantz, 'N' ) ) then ilz = .false. iwantz = 1_${ik}$ else if( stdlib_lsame( wantz, 'V' ) ) then ilz = .true. iwantz = 2_${ik}$ else if( stdlib_lsame( wantz, 'I' ) ) then ilz = .true. iwantz = 3_${ik}$ else iwantz = 0_${ik}$ end if ! check argument values info = 0_${ik}$ if( iwants==0_${ik}$ ) then info = -1_${ik}$ else if( iwantq==0_${ik}$ ) then info = -2_${ik}$ else if( iwantz==0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( ilo<1_${ik}$ ) then info = -5_${ik}$ else if( ihi>n .or. ihi<ilo-1 ) then info = -6_${ik}$ else if( lda<n ) then info = -8_${ik}$ else if( ldb<n ) then info = -10_${ik}$ else if( ldq<1_${ik}$ .or. ( ilq .and. ldq<n ) ) then info = -15_${ik}$ else if( ldz<1_${ik}$ .or. ( ilz .and. ldz<n ) ) then info = -17_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZLAQZ0', -info ) return end if ! quick return if possible if( n<=0_${ik}$ ) then work( 1_${ik}$ ) = real( 1_${ik}$,KIND=dp) return end if ! get the parameters jbcmpz( 1_${ik}$:1_${ik}$ ) = wants jbcmpz( 2_${ik}$:2_${ik}$ ) = wantq jbcmpz( 3_${ik}$:3_${ik}$ ) = wantz nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nwr = max( 2_${ik}$, nwr ) nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr ) nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nsr = min( nsr, ( n+6 ) / 9_${ik}$, ihi-ilo ) nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) ) rcost = stdlib${ii}$_ilaenv( 17_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork ) itemp1 = int( nsr/sqrt( 1_${ik}$+2*nsr/( real( rcost,KIND=dp)/100_${ik}$*n ) ),KIND=${ik}$) itemp1 = ( ( itemp1-1 )/4_${ik}$ )*4_${ik}$+4 nbr = nsr+itemp1 if( n < nmin .or. rec >= 2_${ik}$ ) then call stdlib${ii}$_zhgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alpha, beta, q,& ldq, z, ldz, work, lwork, rwork,info ) return end if ! find out required workspace ! workspace query to stdlib${ii}$_zlaqz2 nw = max( nwr, nmin ) call stdlib${ii}$_zlaqz2( ilschur, ilq, ilz, n, ilo, ihi, nw, a, lda, b, ldb,q, ldq, z, ldz, & n_undeflated, n_deflated, alpha,beta, work, nw, work, nw, work, -1_${ik}$, rwork, rec,& aed_info ) itemp1 = int( work( 1_${ik}$ ),KIND=${ik}$) ! workspace query to stdlib${ii}$_zlaqz3 call stdlib${ii}$_zlaqz3( ilschur, ilq, ilz, n, ilo, ihi, nsr, nbr, alpha,beta, a, lda, b, & ldb, q, ldq, z, ldz, work, nbr,work, nbr, work, -1_${ik}$, sweep_info ) itemp2 = int( work( 1_${ik}$ ),KIND=${ik}$) lworkreq = max( itemp1+2*nw**2_${ik}$, itemp2+2*nbr**2_${ik}$ ) if ( lwork ==-1_${ik}$ ) then work( 1_${ik}$ ) = real( lworkreq,KIND=dp) return else if ( lwork < lworkreq ) then info = -19_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZLAQZ0', info ) return end if ! initialize q and z if( iwantq==3_${ik}$ ) call stdlib${ii}$_zlaset( 'FULL', n, n, czero, cone, q,ldq ) if( iwantz==3_${ik}$ ) call stdlib${ii}$_zlaset( 'FULL', n, n, czero, cone, z,ldz ) ! get machine constants safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' ) safmax = one/safmin call stdlib${ii}$_dlabad( safmin, safmax ) ulp = stdlib${ii}$_dlamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=dp)/ulp ) istart = ilo istop = ihi maxit = 30_${ik}$*( ihi-ilo+1 ) ld = 0_${ik}$ do iiter = 1, maxit if( iiter >= maxit ) then info = istop+1 goto 80 end if if ( istart+1 >= istop ) then istop = istart exit end if ! check deflations at the end if ( abs( a( istop, istop-1 ) ) <= max( smlnum,ulp*( abs( a( istop, istop ) )+abs( & a( istop-1,istop-1 ) ) ) ) ) then a( istop, istop-1 ) = czero istop = istop-1 ld = 0_${ik}$ eshift = czero end if ! check deflations at the start if ( abs( a( istart+1, istart ) ) <= max( smlnum,ulp*( abs( a( istart, istart ) )+& abs( a( istart+1,istart+1 ) ) ) ) ) then a( istart+1, istart ) = czero istart = istart+1 ld = 0_${ik}$ eshift = czero end if if ( istart+1 >= istop ) then exit end if ! check interior deflations istart2 = istart do k = istop, istart+1, -1 if ( abs( a( k, k-1 ) ) <= max( smlnum, ulp*( abs( a( k,k ) )+abs( a( k-1, k-1 ) & ) ) ) ) then a( k, k-1 ) = czero istart2 = k exit end if end do ! get range to apply rotations to if ( ilschur ) then istartm = 1_${ik}$ istopm = n else istartm = istart2 istopm = istop end if ! check infinite eigenvalues, this is done without blocking so might ! slow down the method when many infinite eigenvalues are present k = istop do while ( k>=istart2 ) tempr = zero if( k < istop ) then tempr = tempr+abs( b( k, k+1 ) ) end if if( k > istart2 ) then tempr = tempr+abs( b( k-1, k ) ) end if if( abs( b( k, k ) ) < max( smlnum, ulp*tempr ) ) then ! a diagonal element of b is negligable, move it ! to the top and deflate it do k2 = k, istart2+1, -1 call stdlib${ii}$_zlartg( b( k2-1, k2 ), b( k2-1, k2-1 ), c1, s1,temp ) b( k2-1, k2 ) = temp b( k2-1, k2-1 ) = czero call stdlib${ii}$_zrot( k2-2-istartm+1, b( istartm, k2 ), 1_${ik}$,b( istartm, k2-1 ), & 1_${ik}$, c1, s1 ) call stdlib${ii}$_zrot( min( k2+1, istop )-istartm+1, a( istartm,k2 ), 1_${ik}$, a( & istartm, k2-1 ), 1_${ik}$, c1, s1 ) if ( ilz ) then call stdlib${ii}$_zrot( n, z( 1_${ik}$, k2 ), 1_${ik}$, z( 1_${ik}$, k2-1 ), 1_${ik}$, c1,s1 ) end if if( k2<istop ) then call stdlib${ii}$_zlartg( a( k2, k2-1 ), a( k2+1, k2-1 ), c1,s1, temp ) a( k2, k2-1 ) = temp a( k2+1, k2-1 ) = czero call stdlib${ii}$_zrot( istopm-k2+1, a( k2, k2 ), lda, a( k2+1,k2 ), lda, c1, & s1 ) call stdlib${ii}$_zrot( istopm-k2+1, b( k2, k2 ), ldb, b( k2+1,k2 ), ldb, c1, & s1 ) if( ilq ) then call stdlib${ii}$_zrot( n, q( 1_${ik}$, k2 ), 1_${ik}$, q( 1_${ik}$, k2+1 ), 1_${ik}$,c1, conjg( s1 ) ) end if end if end do if( istart2<istop )then call stdlib${ii}$_zlartg( a( istart2, istart2 ), a( istart2+1,istart2 ), c1, s1, & temp ) a( istart2, istart2 ) = temp a( istart2+1, istart2 ) = czero call stdlib${ii}$_zrot( istopm-( istart2+1 )+1_${ik}$, a( istart2,istart2+1 ), lda, a( & istart2+1,istart2+1 ), lda, c1, s1 ) call stdlib${ii}$_zrot( istopm-( istart2+1 )+1_${ik}$, b( istart2,istart2+1 ), ldb, b( & istart2+1,istart2+1 ), ldb, c1, s1 ) if( ilq ) then call stdlib${ii}$_zrot( n, q( 1_${ik}$, istart2 ), 1_${ik}$, q( 1_${ik}$,istart2+1 ), 1_${ik}$, c1, conjg(& s1 ) ) end if end if istart2 = istart2+1 end if k = k-1 end do ! istart2 now points to the top of the bottom right ! unreduced hessenberg block if ( istart2 >= istop ) then istop = istart2-1 ld = 0_${ik}$ eshift = czero cycle end if nw = nwr nshifts = nsr nblock = nbr if ( istop-istart2+1 < nmin ) then ! setting nw to the size of the subblock will make aed deflate ! all the eigenvalues. this is slightly more efficient than just ! using qz_small because the off diagonal part gets updated via blas. if ( istop-istart+1 < nmin ) then nw = istop-istart+1 istart2 = istart else nw = istop-istart2+1 end if end if ! time for aed call stdlib${ii}$_zlaqz2( ilschur, ilq, ilz, n, istart2, istop, nw, a, lda,b, ldb, q, ldq,& z, ldz, n_undeflated, n_deflated,alpha, beta, work, nw, work( nw**2_${ik}$+1 ), nw,work( & 2_${ik}$*nw**2_${ik}$+1 ), lwork-2*nw**2_${ik}$, rwork, rec,aed_info ) if ( n_deflated > 0_${ik}$ ) then istop = istop-n_deflated ld = 0_${ik}$ eshift = czero end if if ( 100_${ik}$*n_deflated > nibble*( n_deflated+n_undeflated ) .or.istop-istart2+1 < nmin & ) then ! aed has uncovered many eigenvalues. skip a qz sweep and run ! aed again. cycle end if ld = ld+1 ns = min( nshifts, istop-istart2 ) ns = min( ns, n_undeflated ) shiftpos = istop-n_deflated-n_undeflated+1 if ( mod( ld, 6_${ik}$ ) == 0_${ik}$ ) then ! exceptional shift. chosen for no particularly good reason. if( ( real( maxit,KIND=dp)*safmin )*abs( a( istop,istop-1 ) )<abs( a( istop-1, & istop-1 ) ) ) then eshift = a( istop, istop-1 )/b( istop-1, istop-1 ) else eshift = eshift+cone/( safmin*real( maxit,KIND=dp) ) end if alpha( shiftpos ) = cone beta( shiftpos ) = eshift ns = 1_${ik}$ end if ! time for a qz sweep call stdlib${ii}$_zlaqz3( ilschur, ilq, ilz, n, istart2, istop, ns, nblock,alpha( & shiftpos ), beta( shiftpos ), a, lda, b,ldb, q, ldq, z, ldz, work, nblock, work( & nblock**2_${ik}$+1 ), nblock, work( 2_${ik}$*nblock**2_${ik}$+1 ),lwork-2*nblock**2_${ik}$, sweep_info ) end do ! call stdlib${ii}$_zhgeqz to normalize the eigenvalue blocks and set the eigenvalues ! if all the eigenvalues have been found, stdlib${ii}$_zhgeqz will not do any iterations ! and only normalize the blocks. in case of a rare convergence failure, ! the single shift might perform better. 80 call stdlib${ii}$_zhgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alpha, beta, q, & ldq, z, ldz, work, lwork, rwork,norm_info ) info = norm_info end subroutine stdlib${ii}$_zlaqz0 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] recursive module subroutine stdlib${ii}$_${ci}$laqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, alpha, & !! ZLAQZ0: computes the eigenvalues of a real matrix pair (H,T), !! where H is an upper Hessenberg matrix and T is upper triangular, !! using the double-shift QZ method. !! Matrix pairs of this type are produced by the reduction to !! generalized upper Hessenberg form of a real matrix pair (A,B): !! A = Q1*H*Z1**H, B = Q1*T*Z1**H, !! as computed by ZGGHRD. !! If JOB='S', then the Hessenberg-triangular pair (H,T) is !! also reduced to generalized Schur form, !! H = Q*S*Z**H, T = Q*P*Z**H, !! where Q and Z are unitary matrices, P and S are an upper triangular !! matrices. !! Optionally, the unitary matrix Q from the generalized Schur !! factorization may be postmultiplied into an input matrix Q1, and the !! unitary matrix Z may be postmultiplied into an input matrix Z1. !! If Q1 and Z1 are the unitary matrices from ZGGHRD that reduced !! the matrix pair (A,B) to generalized upper Hessenberg form, then the !! output matrices Q1*Q and Z1*Z are the unitary factors from the !! generalized Schur factorization of (A,B): !! A = (Q1*Q)*S*(Z1*Z)**H, B = (Q1*Q)*P*(Z1*Z)**H. !! To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, !! of (A,B)) are computed as a pair of values (alpha,beta), where alpha is !! complex and beta real. !! If beta is nonzero, lambda = alpha / beta is an eigenvalue of the !! generalized nonsymmetric eigenvalue problem (GNEP) !! A*x = lambda*B*x !! and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the !! alternate form of the GNEP !! mu*A*y = B*y. !! Eigenvalues can be read directly from the generalized Schur !! form: !! alpha = S(i,i), beta = P(i,i). !! Ref: C.B. Moler !! Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), !! pp. 241--256. !! Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ !! Algorithm with Aggressive Early Deflation", SIAM J. Numer. !! Anal., 29(2006), pp. 199--227. !! Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift, !! multipole rational QZ method with agressive early deflation" beta, q, ldq, z,ldz, work, lwork, rwork, rec,info ) use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments character, intent( in ) :: wants, wantq, wantz integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,rec integer(${ik}$), intent( out ) :: info complex(${ck}$), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq,* ), z( ldz, * ), & alpha( * ), beta( * ), work( * ) real(${ck}$), intent( out ) :: rwork( * ) ! ================================================================ ! local scalars real(${ck}$) :: smlnum, ulp, safmin, safmax, c1, tempr complex(${ck}$) :: eshift, s1, temp integer(${ik}$) :: istart, istop, iiter, maxit, istart2, k, ld, nshifts, nblock, nw, nmin,& nibble, n_undeflated, n_qeflated, ns, sweep_info, shiftpos, lworkreq, k2, istartm, & istopm, iwants, iwantq, iwantz, norm_info, aed_info, nwr, nbr, nsr, itemp1, itemp2, & rcost logical(lk) :: ilschur, ilq, ilz character(len=3) :: jbcmpz if( stdlib_lsame( wants, 'E' ) ) then ilschur = .false. iwants = 1_${ik}$ else if( stdlib_lsame( wants, 'S' ) ) then ilschur = .true. iwants = 2_${ik}$ else iwants = 0_${ik}$ end if if( stdlib_lsame( wantq, 'N' ) ) then ilq = .false. iwantq = 1_${ik}$ else if( stdlib_lsame( wantq, 'V' ) ) then ilq = .true. iwantq = 2_${ik}$ else if( stdlib_lsame( wantq, 'I' ) ) then ilq = .true. iwantq = 3_${ik}$ else iwantq = 0_${ik}$ end if if( stdlib_lsame( wantz, 'N' ) ) then ilz = .false. iwantz = 1_${ik}$ else if( stdlib_lsame( wantz, 'V' ) ) then ilz = .true. iwantz = 2_${ik}$ else if( stdlib_lsame( wantz, 'I' ) ) then ilz = .true. iwantz = 3_${ik}$ else iwantz = 0_${ik}$ end if ! check argument values info = 0_${ik}$ if( iwants==0_${ik}$ ) then info = -1_${ik}$ else if( iwantq==0_${ik}$ ) then info = -2_${ik}$ else if( iwantz==0_${ik}$ ) then info = -3_${ik}$ else if( n<0_${ik}$ ) then info = -4_${ik}$ else if( ilo<1_${ik}$ ) then info = -5_${ik}$ else if( ihi>n .or. ihi<ilo-1 ) then info = -6_${ik}$ else if( lda<n ) then info = -8_${ik}$ else if( ldb<n ) then info = -10_${ik}$ else if( ldq<1_${ik}$ .or. ( ilq .and. ldq<n ) ) then info = -15_${ik}$ else if( ldz<1_${ik}$ .or. ( ilz .and. ldz<n ) ) then info = -17_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZLAQZ0', -info ) return end if ! quick return if possible if( n<=0_${ik}$ ) then work( 1_${ik}$ ) = real( 1_${ik}$,KIND=${ck}$) return end if ! get the parameters jbcmpz( 1_${ik}$:1_${ik}$ ) = wants jbcmpz( 2_${ik}$:2_${ik}$ ) = wantq jbcmpz( 3_${ik}$:3_${ik}$ ) = wantz nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nwr = max( 2_${ik}$, nwr ) nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr ) nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork ) nsr = min( nsr, ( n+6 ) / 9_${ik}$, ihi-ilo ) nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) ) rcost = stdlib${ii}$_ilaenv( 17_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork ) itemp1 = int( nsr/sqrt( 1_${ik}$+2*nsr/( real( rcost,KIND=${ck}$)/100_${ik}$*n ) ),KIND=${ik}$) itemp1 = ( ( itemp1-1 )/4_${ik}$ )*4_${ik}$+4 nbr = nsr+itemp1 if( n < nmin .or. rec >= 2_${ik}$ ) then call stdlib${ii}$_${ci}$hgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alpha, beta, q,& ldq, z, ldz, work, lwork, rwork,info ) return end if ! find out required workspace ! workspace query to stdlib${ii}$_${ci}$laqz2 nw = max( nwr, nmin ) call stdlib${ii}$_${ci}$laqz2( ilschur, ilq, ilz, n, ilo, ihi, nw, a, lda, b, ldb,q, ldq, z, ldz, & n_undeflated, n_qeflated, alpha,beta, work, nw, work, nw, work, -1_${ik}$, rwork, rec,& aed_info ) itemp1 = int( work( 1_${ik}$ ),KIND=${ik}$) ! workspace query to stdlib${ii}$_${ci}$laqz3 call stdlib${ii}$_${ci}$laqz3( ilschur, ilq, ilz, n, ilo, ihi, nsr, nbr, alpha,beta, a, lda, b, & ldb, q, ldq, z, ldz, work, nbr,work, nbr, work, -1_${ik}$, sweep_info ) itemp2 = int( work( 1_${ik}$ ),KIND=${ik}$) lworkreq = max( itemp1+2*nw**2_${ik}$, itemp2+2*nbr**2_${ik}$ ) if ( lwork ==-1_${ik}$ ) then work( 1_${ik}$ ) = real( lworkreq,KIND=${ck}$) return else if ( lwork < lworkreq ) then info = -19_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZLAQZ0', info ) return end if ! initialize q and z if( iwantq==3_${ik}$ ) call stdlib${ii}$_${ci}$laset( 'FULL', n, n, czero, cone, q,ldq ) if( iwantz==3_${ik}$ ) call stdlib${ii}$_${ci}$laset( 'FULL', n, n, czero, cone, z,ldz ) ! get machine constants safmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'SAFE MINIMUM' ) safmax = one/safmin call stdlib${ii}$_${c2ri(ci)}$labad( safmin, safmax ) ulp = stdlib${ii}$_${c2ri(ci)}$lamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=${ck}$)/ulp ) istart = ilo istop = ihi maxit = 30_${ik}$*( ihi-ilo+1 ) ld = 0_${ik}$ do iiter = 1, maxit if( iiter >= maxit ) then info = istop+1 goto 80 end if if ( istart+1 >= istop ) then istop = istart exit end if ! check deflations at the end if ( abs( a( istop, istop-1 ) ) <= max( smlnum,ulp*( abs( a( istop, istop ) )+abs( & a( istop-1,istop-1 ) ) ) ) ) then a( istop, istop-1 ) = czero istop = istop-1 ld = 0_${ik}$ eshift = czero end if ! check deflations at the start if ( abs( a( istart+1, istart ) ) <= max( smlnum,ulp*( abs( a( istart, istart ) )+& abs( a( istart+1,istart+1 ) ) ) ) ) then a( istart+1, istart ) = czero istart = istart+1 ld = 0_${ik}$ eshift = czero end if if ( istart+1 >= istop ) then exit end if ! check interior deflations istart2 = istart do k = istop, istart+1, -1 if ( abs( a( k, k-1 ) ) <= max( smlnum, ulp*( abs( a( k,k ) )+abs( a( k-1, k-1 ) & ) ) ) ) then a( k, k-1 ) = czero istart2 = k exit end if end do ! get range to apply rotations to if ( ilschur ) then istartm = 1_${ik}$ istopm = n else istartm = istart2 istopm = istop end if ! check infinite eigenvalues, this is done without blocking so might ! slow down the method when many infinite eigenvalues are present k = istop do while ( k>=istart2 ) tempr = zero if( k < istop ) then tempr = tempr+abs( b( k, k+1 ) ) end if if( k > istart2 ) then tempr = tempr+abs( b( k-1, k ) ) end if if( abs( b( k, k ) ) < max( smlnum, ulp*tempr ) ) then ! a diagonal element of b is negligable, move it ! to the top and deflate it do k2 = k, istart2+1, -1 call stdlib${ii}$_${ci}$lartg( b( k2-1, k2 ), b( k2-1, k2-1 ), c1, s1,temp ) b( k2-1, k2 ) = temp b( k2-1, k2-1 ) = czero call stdlib${ii}$_${ci}$rot( k2-2-istartm+1, b( istartm, k2 ), 1_${ik}$,b( istartm, k2-1 ), & 1_${ik}$, c1, s1 ) call stdlib${ii}$_${ci}$rot( min( k2+1, istop )-istartm+1, a( istartm,k2 ), 1_${ik}$, a( & istartm, k2-1 ), 1_${ik}$, c1, s1 ) if ( ilz ) then call stdlib${ii}$_${ci}$rot( n, z( 1_${ik}$, k2 ), 1_${ik}$, z( 1_${ik}$, k2-1 ), 1_${ik}$, c1,s1 ) end if if( k2<istop ) then call stdlib${ii}$_${ci}$lartg( a( k2, k2-1 ), a( k2+1, k2-1 ), c1,s1, temp ) a( k2, k2-1 ) = temp a( k2+1, k2-1 ) = czero call stdlib${ii}$_${ci}$rot( istopm-k2+1, a( k2, k2 ), lda, a( k2+1,k2 ), lda, c1, & s1 ) call stdlib${ii}$_${ci}$rot( istopm-k2+1, b( k2, k2 ), ldb, b( k2+1,k2 ), ldb, c1, & s1 ) if( ilq ) then call stdlib${ii}$_${ci}$rot( n, q( 1_${ik}$, k2 ), 1_${ik}$, q( 1_${ik}$, k2+1 ), 1_${ik}$,c1, conjg( s1 ) ) end if end if end do if( istart2<istop )then call stdlib${ii}$_${ci}$lartg( a( istart2, istart2 ), a( istart2+1,istart2 ), c1, s1, & temp ) a( istart2, istart2 ) = temp a( istart2+1, istart2 ) = czero call stdlib${ii}$_${ci}$rot( istopm-( istart2+1 )+1_${ik}$, a( istart2,istart2+1 ), lda, a( & istart2+1,istart2+1 ), lda, c1, s1 ) call stdlib${ii}$_${ci}$rot( istopm-( istart2+1 )+1_${ik}$, b( istart2,istart2+1 ), ldb, b( & istart2+1,istart2+1 ), ldb, c1, s1 ) if( ilq ) then call stdlib${ii}$_${ci}$rot( n, q( 1_${ik}$, istart2 ), 1_${ik}$, q( 1_${ik}$,istart2+1 ), 1_${ik}$, c1, conjg(& s1 ) ) end if end if istart2 = istart2+1 end if k = k-1 end do ! istart2 now points to the top of the bottom right ! unreduced hessenberg block if ( istart2 >= istop ) then istop = istart2-1 ld = 0_${ik}$ eshift = czero cycle end if nw = nwr nshifts = nsr nblock = nbr if ( istop-istart2+1 < nmin ) then ! setting nw to the size of the subblock will make aed deflate ! all the eigenvalues. this is slightly more efficient than just ! using qz_small because the off diagonal part gets updated via blas. if ( istop-istart+1 < nmin ) then nw = istop-istart+1 istart2 = istart else nw = istop-istart2+1 end if end if ! time for aed call stdlib${ii}$_${ci}$laqz2( ilschur, ilq, ilz, n, istart2, istop, nw, a, lda,b, ldb, q, ldq,& z, ldz, n_undeflated, n_qeflated,alpha, beta, work, nw, work( nw**2_${ik}$+1 ), nw,work( & 2_${ik}$*nw**2_${ik}$+1 ), lwork-2*nw**2_${ik}$, rwork, rec,aed_info ) if ( n_qeflated > 0_${ik}$ ) then istop = istop-n_qeflated ld = 0_${ik}$ eshift = czero end if if ( 100_${ik}$*n_qeflated > nibble*( n_qeflated+n_undeflated ) .or.istop-istart2+1 < nmin & ) then ! aed has uncovered many eigenvalues. skip a qz sweep and run ! aed again. cycle end if ld = ld+1 ns = min( nshifts, istop-istart2 ) ns = min( ns, n_undeflated ) shiftpos = istop-n_qeflated-n_undeflated+1 if ( mod( ld, 6_${ik}$ ) == 0_${ik}$ ) then ! exceptional shift. chosen for no particularly good reason. if( ( real( maxit,KIND=${ck}$)*safmin )*abs( a( istop,istop-1 ) )<abs( a( istop-1, & istop-1 ) ) ) then eshift = a( istop, istop-1 )/b( istop-1, istop-1 ) else eshift = eshift+cone/( safmin*real( maxit,KIND=${ck}$) ) end if alpha( shiftpos ) = cone beta( shiftpos ) = eshift ns = 1_${ik}$ end if ! time for a qz sweep call stdlib${ii}$_${ci}$laqz3( ilschur, ilq, ilz, n, istart2, istop, ns, nblock,alpha( & shiftpos ), beta( shiftpos ), a, lda, b,ldb, q, ldq, z, ldz, work, nblock, work( & nblock**2_${ik}$+1 ), nblock, work( 2_${ik}$*nblock**2_${ik}$+1 ),lwork-2*nblock**2_${ik}$, sweep_info ) end do ! call stdlib${ii}$_${ci}$hgeqz to normalize the eigenvalue blocks and set the eigenvalues ! if all the eigenvalues have been found, stdlib${ii}$_${ci}$hgeqz will not do any iterations ! and only normalize the blocks. in case of a rare convergence failure, ! the single shift might perform better. 80 call stdlib${ii}$_${ci}$hgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alpha, beta, q, & ldq, z, ldz, work, lwork, rwork,norm_info ) info = norm_info end subroutine stdlib${ii}$_${ci}$laqz0 #:endif #:endfor pure module subroutine stdlib${ii}$_slaqz1( a, lda, b, ldb, sr1, sr2, si, beta1, beta2,v ) !! Given a 3-by-3 matrix pencil (A,B), SLAQZ1: sets v to a !! scalar multiple of the first column of the product !! (*) K = (A - (beta2*sr2 - i*si)*B)*B^(-1)*(beta1*A - (sr2 + i*si2)*B)*B^(-1). !! It is assumed that either !! 1) sr1 = sr2 !! or !! 2) si = 0. !! This is useful for starting double implicit shift bulges !! in the QZ algorithm. use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments integer(${ik}$), intent( in ) :: lda, ldb real(sp), intent( in ) :: a( lda, * ), b( ldb, * ), sr1, sr2, si,beta1, beta2 real(sp), intent( out ) :: v( * ) ! ================================================================ ! local scalars real(sp) :: w(2_${ik}$), safmin, safmax, scale1, scale2 safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' ) safmax = one/safmin ! calculate first shifted vector w( 1_${ik}$ ) = beta1*a( 1_${ik}$, 1_${ik}$ )-sr1*b( 1_${ik}$, 1_${ik}$ ) w( 2_${ik}$ ) = beta1*a( 2_${ik}$, 1_${ik}$ )-sr1*b( 2_${ik}$, 1_${ik}$ ) scale1 = sqrt( abs( w( 1_${ik}$ ) ) ) * sqrt( abs( w( 2_${ik}$ ) ) ) if( scale1 >= safmin .and. scale1 <= safmax ) then w( 1_${ik}$ ) = w( 1_${ik}$ )/scale1 w( 2_${ik}$ ) = w( 2_${ik}$ )/scale1 end if ! solve linear system w( 2_${ik}$ ) = w( 2_${ik}$ )/b( 2_${ik}$, 2_${ik}$ ) w( 1_${ik}$ ) = ( w( 1_${ik}$ )-b( 1_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )/b( 1_${ik}$, 1_${ik}$ ) scale2 = sqrt( abs( w( 1_${ik}$ ) ) ) * sqrt( abs( w( 2_${ik}$ ) ) ) if( scale2 >= safmin .and. scale2 <= safmax ) then w( 1_${ik}$ ) = w( 1_${ik}$ )/scale2 w( 2_${ik}$ ) = w( 2_${ik}$ )/scale2 end if ! apply second shift v( 1_${ik}$ ) = beta2*( a( 1_${ik}$, 1_${ik}$ )*w( 1_${ik}$ )+a( 1_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )-sr2*( b( 1_${ik}$,1_${ik}$ )*w( 1_${ik}$ )+b( 1_${ik}$, 2_${ik}$ )*w(& 2_${ik}$ ) ) v( 2_${ik}$ ) = beta2*( a( 2_${ik}$, 1_${ik}$ )*w( 1_${ik}$ )+a( 2_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )-sr2*( b( 2_${ik}$,1_${ik}$ )*w( 1_${ik}$ )+b( 2_${ik}$, 2_${ik}$ )*w(& 2_${ik}$ ) ) v( 3_${ik}$ ) = beta2*( a( 3_${ik}$, 1_${ik}$ )*w( 1_${ik}$ )+a( 3_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )-sr2*( b( 3_${ik}$,1_${ik}$ )*w( 1_${ik}$ )+b( 3_${ik}$, 2_${ik}$ )*w(& 2_${ik}$ ) ) ! account for imaginary part v( 1_${ik}$ ) = v( 1_${ik}$ )+si*si*b( 1_${ik}$, 1_${ik}$ )/scale1/scale2 ! check for overflow if( abs( v( 1_${ik}$ ) )>safmax .or. abs( v( 2_${ik}$ ) ) > safmax .or.abs( v( 3_${ik}$ ) )>safmax .or. & stdlib${ii}$_sisnan( v( 1_${ik}$ ) ) .or.stdlib${ii}$_sisnan( v( 2_${ik}$ ) ) .or. stdlib${ii}$_sisnan( v( 3_${ik}$ ) ) ) & then v( 1_${ik}$ ) = zero v( 2_${ik}$ ) = zero v( 3_${ik}$ ) = zero end if end subroutine stdlib${ii}$_slaqz1 pure module subroutine stdlib${ii}$_dlaqz1( a, lda, b, ldb, sr1, sr2, si, beta1, beta2,v ) !! Given a 3-by-3 matrix pencil (A,B), DLAQZ1: sets v to a !! scalar multiple of the first column of the product !! (*) K = (A - (beta2*sr2 - i*si)*B)*B^(-1)*(beta1*A - (sr2 + i*si2)*B)*B^(-1). !! It is assumed that either !! 1) sr1 = sr2 !! or !! 2) si = 0. !! This is useful for starting double implicit shift bulges !! in the QZ algorithm. use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments integer(${ik}$), intent( in ) :: lda, ldb real(dp), intent( in ) :: a( lda, * ), b( ldb, * ), sr1,sr2, si, beta1, beta2 real(dp), intent( out ) :: v( * ) ! ================================================================ ! local scalars real(dp) :: w(2_${ik}$), safmin, safmax, scale1, scale2 safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' ) safmax = one/safmin ! calculate first shifted vector w( 1_${ik}$ ) = beta1*a( 1_${ik}$, 1_${ik}$ )-sr1*b( 1_${ik}$, 1_${ik}$ ) w( 2_${ik}$ ) = beta1*a( 2_${ik}$, 1_${ik}$ )-sr1*b( 2_${ik}$, 1_${ik}$ ) scale1 = sqrt( abs( w( 1_${ik}$ ) ) ) * sqrt( abs( w( 2_${ik}$ ) ) ) if( scale1 >= safmin .and. scale1 <= safmax ) then w( 1_${ik}$ ) = w( 1_${ik}$ )/scale1 w( 2_${ik}$ ) = w( 2_${ik}$ )/scale1 end if ! solve linear system w( 2_${ik}$ ) = w( 2_${ik}$ )/b( 2_${ik}$, 2_${ik}$ ) w( 1_${ik}$ ) = ( w( 1_${ik}$ )-b( 1_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )/b( 1_${ik}$, 1_${ik}$ ) scale2 = sqrt( abs( w( 1_${ik}$ ) ) ) * sqrt( abs( w( 2_${ik}$ ) ) ) if( scale2 >= safmin .and. scale2 <= safmax ) then w( 1_${ik}$ ) = w( 1_${ik}$ )/scale2 w( 2_${ik}$ ) = w( 2_${ik}$ )/scale2 end if ! apply second shift v( 1_${ik}$ ) = beta2*( a( 1_${ik}$, 1_${ik}$ )*w( 1_${ik}$ )+a( 1_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )-sr2*( b( 1_${ik}$,1_${ik}$ )*w( 1_${ik}$ )+b( 1_${ik}$, 2_${ik}$ )*w(& 2_${ik}$ ) ) v( 2_${ik}$ ) = beta2*( a( 2_${ik}$, 1_${ik}$ )*w( 1_${ik}$ )+a( 2_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )-sr2*( b( 2_${ik}$,1_${ik}$ )*w( 1_${ik}$ )+b( 2_${ik}$, 2_${ik}$ )*w(& 2_${ik}$ ) ) v( 3_${ik}$ ) = beta2*( a( 3_${ik}$, 1_${ik}$ )*w( 1_${ik}$ )+a( 3_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )-sr2*( b( 3_${ik}$,1_${ik}$ )*w( 1_${ik}$ )+b( 3_${ik}$, 2_${ik}$ )*w(& 2_${ik}$ ) ) ! account for imaginary part v( 1_${ik}$ ) = v( 1_${ik}$ )+si*si*b( 1_${ik}$, 1_${ik}$ )/scale1/scale2 ! check for overflow if( abs( v( 1_${ik}$ ) )>safmax .or. abs( v( 2_${ik}$ ) ) > safmax .or.abs( v( 3_${ik}$ ) )>safmax .or. & stdlib${ii}$_disnan( v( 1_${ik}$ ) ) .or.stdlib${ii}$_disnan( v( 2_${ik}$ ) ) .or. stdlib${ii}$_disnan( v( 3_${ik}$ ) ) ) & then v( 1_${ik}$ ) = zero v( 2_${ik}$ ) = zero v( 3_${ik}$ ) = zero end if end subroutine stdlib${ii}$_dlaqz1 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$laqz1( a, lda, b, ldb, sr1, sr2, si, beta1, beta2,v ) !! Given a 3-by-3 matrix pencil (A,B), DLAQZ1: sets v to a !! scalar multiple of the first column of the product !! (*) K = (A - (beta2*sr2 - i*si)*B)*B^(-1)*(beta1*A - (sr2 + i*si2)*B)*B^(-1). !! It is assumed that either !! 1) sr1 = sr2 !! or !! 2) si = 0. !! This is useful for starting double implicit shift bulges !! in the QZ algorithm. use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments integer(${ik}$), intent( in ) :: lda, ldb real(${rk}$), intent( in ) :: a( lda, * ), b( ldb, * ), sr1,sr2, si, beta1, beta2 real(${rk}$), intent( out ) :: v( * ) ! ================================================================ ! local scalars real(${rk}$) :: w(2_${ik}$), safmin, safmax, scale1, scale2 safmin = stdlib${ii}$_${ri}$lamch( 'SAFE MINIMUM' ) safmax = one/safmin ! calculate first shifted vector w( 1_${ik}$ ) = beta1*a( 1_${ik}$, 1_${ik}$ )-sr1*b( 1_${ik}$, 1_${ik}$ ) w( 2_${ik}$ ) = beta1*a( 2_${ik}$, 1_${ik}$ )-sr1*b( 2_${ik}$, 1_${ik}$ ) scale1 = sqrt( abs( w( 1_${ik}$ ) ) ) * sqrt( abs( w( 2_${ik}$ ) ) ) if( scale1 >= safmin .and. scale1 <= safmax ) then w( 1_${ik}$ ) = w( 1_${ik}$ )/scale1 w( 2_${ik}$ ) = w( 2_${ik}$ )/scale1 end if ! solve linear system w( 2_${ik}$ ) = w( 2_${ik}$ )/b( 2_${ik}$, 2_${ik}$ ) w( 1_${ik}$ ) = ( w( 1_${ik}$ )-b( 1_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )/b( 1_${ik}$, 1_${ik}$ ) scale2 = sqrt( abs( w( 1_${ik}$ ) ) ) * sqrt( abs( w( 2_${ik}$ ) ) ) if( scale2 >= safmin .and. scale2 <= safmax ) then w( 1_${ik}$ ) = w( 1_${ik}$ )/scale2 w( 2_${ik}$ ) = w( 2_${ik}$ )/scale2 end if ! apply second shift v( 1_${ik}$ ) = beta2*( a( 1_${ik}$, 1_${ik}$ )*w( 1_${ik}$ )+a( 1_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )-sr2*( b( 1_${ik}$,1_${ik}$ )*w( 1_${ik}$ )+b( 1_${ik}$, 2_${ik}$ )*w(& 2_${ik}$ ) ) v( 2_${ik}$ ) = beta2*( a( 2_${ik}$, 1_${ik}$ )*w( 1_${ik}$ )+a( 2_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )-sr2*( b( 2_${ik}$,1_${ik}$ )*w( 1_${ik}$ )+b( 2_${ik}$, 2_${ik}$ )*w(& 2_${ik}$ ) ) v( 3_${ik}$ ) = beta2*( a( 3_${ik}$, 1_${ik}$ )*w( 1_${ik}$ )+a( 3_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )-sr2*( b( 3_${ik}$,1_${ik}$ )*w( 1_${ik}$ )+b( 3_${ik}$, 2_${ik}$ )*w(& 2_${ik}$ ) ) ! account for imaginary part v( 1_${ik}$ ) = v( 1_${ik}$ )+si*si*b( 1_${ik}$, 1_${ik}$ )/scale1/scale2 ! check for overflow if( abs( v( 1_${ik}$ ) )>safmax .or. abs( v( 2_${ik}$ ) ) > safmax .or.abs( v( 3_${ik}$ ) )>safmax .or. & stdlib${ii}$_${ri}$isnan( v( 1_${ik}$ ) ) .or.stdlib${ii}$_${ri}$isnan( v( 2_${ik}$ ) ) .or. stdlib${ii}$_${ri}$isnan( v( 3_${ik}$ ) ) ) & then v( 1_${ik}$ ) = zero v( 2_${ik}$ ) = zero v( 3_${ik}$ ) = zero end if end subroutine stdlib${ii}$_${ri}$laqz1 #:endif #:endfor pure module subroutine stdlib${ii}$_claqz1( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, qstart, & !! CLAQZ1 chases a 1x1 shift bulge in a matrix pencil down a single position q, ldq, nz, zstart, z, ldz ) use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments logical(lk), intent( in ) :: ilq, ilz integer(${ik}$), intent( in ) :: k, lda, ldb, ldq, ldz, istartm, istopm,nq, nz, qstart, & zstart, ihi complex(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) ! ================================================================ ! local variables real(sp) :: c complex(sp) :: s, temp if( k+1 == ihi ) then ! shift is located on the edge of the matrix, remove it call stdlib${ii}$_clartg( b( ihi, ihi ), b( ihi, ihi-1 ), c, s, temp ) b( ihi, ihi ) = temp b( ihi, ihi-1 ) = czero call stdlib${ii}$_crot( ihi-istartm, b( istartm, ihi ), 1_${ik}$, b( istartm,ihi-1 ), 1_${ik}$, c, s ) call stdlib${ii}$_crot( ihi-istartm+1, a( istartm, ihi ), 1_${ik}$, a( istartm,ihi-1 ), 1_${ik}$, c, s ) if ( ilz ) then call stdlib${ii}$_crot( nz, z( 1_${ik}$, ihi-zstart+1 ), 1_${ik}$, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, c, s ) end if else ! normal operation, move bulge down ! apply transformation from the right call stdlib${ii}$_clartg( b( k+1, k+1 ), b( k+1, k ), c, s, temp ) b( k+1, k+1 ) = temp b( k+1, k ) = czero call stdlib${ii}$_crot( k+2-istartm+1, a( istartm, k+1 ), 1_${ik}$, a( istartm,k ), 1_${ik}$, c, s ) call stdlib${ii}$_crot( k-istartm+1, b( istartm, k+1 ), 1_${ik}$, b( istartm, k ),1_${ik}$, c, s ) if ( ilz ) then call stdlib${ii}$_crot( nz, z( 1_${ik}$, k+1-zstart+1 ), 1_${ik}$, z( 1_${ik}$, k-zstart+1 ),1_${ik}$, c, s ) end if ! apply transformation from the left call stdlib${ii}$_clartg( a( k+1, k ), a( k+2, k ), c, s, temp ) a( k+1, k ) = temp a( k+2, k ) = czero call stdlib${ii}$_crot( istopm-k, a( k+1, k+1 ), lda, a( k+2, k+1 ), lda, c,s ) call stdlib${ii}$_crot( istopm-k, b( k+1, k+1 ), ldb, b( k+2, k+1 ), ldb, c,s ) if ( ilq ) then call stdlib${ii}$_crot( nq, q( 1_${ik}$, k+1-qstart+1 ), 1_${ik}$, q( 1_${ik}$, k+2-qstart+1 ), 1_${ik}$, c, conjg(& s ) ) end if end if end subroutine stdlib${ii}$_claqz1 pure module subroutine stdlib${ii}$_zlaqz1( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, qstart, & !! ZLAQZ1 chases a 1x1 shift bulge in a matrix pencil down a single position q, ldq, nz, zstart, z, ldz ) use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments logical(lk), intent( in ) :: ilq, ilz integer(${ik}$), intent( in ) :: k, lda, ldb, ldq, ldz, istartm, istopm,nq, nz, qstart, & zstart, ihi complex(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) ! ================================================================ ! local variables real(dp) :: c complex(dp) :: s, temp if( k+1 == ihi ) then ! shift is located on the edge of the matrix, remove it call stdlib${ii}$_zlartg( b( ihi, ihi ), b( ihi, ihi-1 ), c, s, temp ) b( ihi, ihi ) = temp b( ihi, ihi-1 ) = czero call stdlib${ii}$_zrot( ihi-istartm, b( istartm, ihi ), 1_${ik}$, b( istartm,ihi-1 ), 1_${ik}$, c, s ) call stdlib${ii}$_zrot( ihi-istartm+1, a( istartm, ihi ), 1_${ik}$, a( istartm,ihi-1 ), 1_${ik}$, c, s ) if ( ilz ) then call stdlib${ii}$_zrot( nz, z( 1_${ik}$, ihi-zstart+1 ), 1_${ik}$, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, c, s ) end if else ! normal operation, move bulge down ! apply transformation from the right call stdlib${ii}$_zlartg( b( k+1, k+1 ), b( k+1, k ), c, s, temp ) b( k+1, k+1 ) = temp b( k+1, k ) = czero call stdlib${ii}$_zrot( k+2-istartm+1, a( istartm, k+1 ), 1_${ik}$, a( istartm,k ), 1_${ik}$, c, s ) call stdlib${ii}$_zrot( k-istartm+1, b( istartm, k+1 ), 1_${ik}$, b( istartm, k ),1_${ik}$, c, s ) if ( ilz ) then call stdlib${ii}$_zrot( nz, z( 1_${ik}$, k+1-zstart+1 ), 1_${ik}$, z( 1_${ik}$, k-zstart+1 ),1_${ik}$, c, s ) end if ! apply transformation from the left call stdlib${ii}$_zlartg( a( k+1, k ), a( k+2, k ), c, s, temp ) a( k+1, k ) = temp a( k+2, k ) = czero call stdlib${ii}$_zrot( istopm-k, a( k+1, k+1 ), lda, a( k+2, k+1 ), lda, c,s ) call stdlib${ii}$_zrot( istopm-k, b( k+1, k+1 ), ldb, b( k+2, k+1 ), ldb, c,s ) if ( ilq ) then call stdlib${ii}$_zrot( nq, q( 1_${ik}$, k+1-qstart+1 ), 1_${ik}$, q( 1_${ik}$, k+2-qstart+1 ), 1_${ik}$, c, conjg(& s ) ) end if end if end subroutine stdlib${ii}$_zlaqz1 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$laqz1( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, qstart, & !! ZLAQZ1: chases a 1x1 shift bulge in a matrix pencil down a single position q, ldq, nz, zstart, z, ldz ) use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments logical(lk), intent( in ) :: ilq, ilz integer(${ik}$), intent( in ) :: k, lda, ldb, ldq, ldz, istartm, istopm,nq, nz, qstart, & zstart, ihi complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) ! ================================================================ ! local variables real(${ck}$) :: c complex(${ck}$) :: s, temp if( k+1 == ihi ) then ! shift is located on the edge of the matrix, remove it call stdlib${ii}$_${ci}$lartg( b( ihi, ihi ), b( ihi, ihi-1 ), c, s, temp ) b( ihi, ihi ) = temp b( ihi, ihi-1 ) = czero call stdlib${ii}$_${ci}$rot( ihi-istartm, b( istartm, ihi ), 1_${ik}$, b( istartm,ihi-1 ), 1_${ik}$, c, s ) call stdlib${ii}$_${ci}$rot( ihi-istartm+1, a( istartm, ihi ), 1_${ik}$, a( istartm,ihi-1 ), 1_${ik}$, c, s ) if ( ilz ) then call stdlib${ii}$_${ci}$rot( nz, z( 1_${ik}$, ihi-zstart+1 ), 1_${ik}$, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, c, s ) end if else ! normal operation, move bulge down ! apply transformation from the right call stdlib${ii}$_${ci}$lartg( b( k+1, k+1 ), b( k+1, k ), c, s, temp ) b( k+1, k+1 ) = temp b( k+1, k ) = czero call stdlib${ii}$_${ci}$rot( k+2-istartm+1, a( istartm, k+1 ), 1_${ik}$, a( istartm,k ), 1_${ik}$, c, s ) call stdlib${ii}$_${ci}$rot( k-istartm+1, b( istartm, k+1 ), 1_${ik}$, b( istartm, k ),1_${ik}$, c, s ) if ( ilz ) then call stdlib${ii}$_${ci}$rot( nz, z( 1_${ik}$, k+1-zstart+1 ), 1_${ik}$, z( 1_${ik}$, k-zstart+1 ),1_${ik}$, c, s ) end if ! apply transformation from the left call stdlib${ii}$_${ci}$lartg( a( k+1, k ), a( k+2, k ), c, s, temp ) a( k+1, k ) = temp a( k+2, k ) = czero call stdlib${ii}$_${ci}$rot( istopm-k, a( k+1, k+1 ), lda, a( k+2, k+1 ), lda, c,s ) call stdlib${ii}$_${ci}$rot( istopm-k, b( k+1, k+1 ), ldb, b( k+2, k+1 ), ldb, c,s ) if ( ilq ) then call stdlib${ii}$_${ci}$rot( nq, q( 1_${ik}$, k+1-qstart+1 ), 1_${ik}$, q( 1_${ik}$, k+2-qstart+1 ), 1_${ik}$, c, conjg(& s ) ) end if end if end subroutine stdlib${ii}$_${ci}$laqz1 #:endif #:endfor pure module subroutine stdlib${ii}$_slaqz2( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, qstart, & !! SLAQZ2 chases a 2x2 shift bulge in a matrix pencil down a single position q, ldq, nz, zstart, z, ldz ) use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments logical(lk), intent( in ) :: ilq, ilz integer(${ik}$), intent( in ) :: k, lda, ldb, ldq, ldz, istartm, istopm,nq, nz, qstart, & zstart, ihi real(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) ! ================================================================ ! local variables real(sp) :: h(2_${ik}$,3_${ik}$), c1, s1, c2, s2, temp if( k+2 == ihi ) then ! shift is located on the edge of the matrix, remove it h = b( ihi-1:ihi, ihi-2:ihi ) ! make h upper triangular call stdlib${ii}$_slartg( h( 1_${ik}$, 1_${ik}$ ), h( 2_${ik}$, 1_${ik}$ ), c1, s1, temp ) h( 2_${ik}$, 1_${ik}$ ) = zero h( 1_${ik}$, 1_${ik}$ ) = temp call stdlib${ii}$_srot( 2_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 2_${ik}$, h( 2_${ik}$, 2_${ik}$ ), 2_${ik}$, c1, s1 ) call stdlib${ii}$_slartg( h( 2_${ik}$, 3_${ik}$ ), h( 2_${ik}$, 2_${ik}$ ), c1, s1, temp ) call stdlib${ii}$_srot( 1_${ik}$, h( 1_${ik}$, 3_${ik}$ ), 1_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_slartg( h( 1_${ik}$, 2_${ik}$ ), h( 1_${ik}$, 1_${ik}$ ), c2, s2, temp ) call stdlib${ii}$_srot( ihi-istartm+1, b( istartm, ihi ), 1_${ik}$, b( istartm,ihi-1 ), 1_${ik}$, c1, & s1 ) call stdlib${ii}$_srot( ihi-istartm+1, b( istartm, ihi-1 ), 1_${ik}$, b( istartm,ihi-2 ), 1_${ik}$, c2, & s2 ) b( ihi-1, ihi-2 ) = zero b( ihi, ihi-2 ) = zero call stdlib${ii}$_srot( ihi-istartm+1, a( istartm, ihi ), 1_${ik}$, a( istartm,ihi-1 ), 1_${ik}$, c1, & s1 ) call stdlib${ii}$_srot( ihi-istartm+1, a( istartm, ihi-1 ), 1_${ik}$, a( istartm,ihi-2 ), 1_${ik}$, c2, & s2 ) if ( ilz ) then call stdlib${ii}$_srot( nz, z( 1_${ik}$, ihi-zstart+1 ), 1_${ik}$, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, c1, s1 & ) call stdlib${ii}$_srot( nz, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, z( 1_${ik}$,ihi-2-zstart+1 ), 1_${ik}$, c2, & s2 ) end if call stdlib${ii}$_slartg( a( ihi-1, ihi-2 ), a( ihi, ihi-2 ), c1, s1,temp ) a( ihi-1, ihi-2 ) = temp a( ihi, ihi-2 ) = zero call stdlib${ii}$_srot( istopm-ihi+2, a( ihi-1, ihi-1 ), lda, a( ihi,ihi-1 ), lda, c1, s1 & ) call stdlib${ii}$_srot( istopm-ihi+2, b( ihi-1, ihi-1 ), ldb, b( ihi,ihi-1 ), ldb, c1, s1 & ) if ( ilq ) then call stdlib${ii}$_srot( nq, q( 1_${ik}$, ihi-1-qstart+1 ), 1_${ik}$, q( 1_${ik}$, ihi-qstart+1 ), 1_${ik}$, c1, s1 & ) end if call stdlib${ii}$_slartg( b( ihi, ihi ), b( ihi, ihi-1 ), c1, s1, temp ) b( ihi, ihi ) = temp b( ihi, ihi-1 ) = zero call stdlib${ii}$_srot( ihi-istartm, b( istartm, ihi ), 1_${ik}$, b( istartm,ihi-1 ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_srot( ihi-istartm+1, a( istartm, ihi ), 1_${ik}$, a( istartm,ihi-1 ), 1_${ik}$, c1, & s1 ) if ( ilz ) then call stdlib${ii}$_srot( nz, z( 1_${ik}$, ihi-zstart+1 ), 1_${ik}$, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, c1, s1 & ) end if else ! normal operation, move bulge down h = b( k+1:k+2, k:k+2 ) ! make h upper triangular call stdlib${ii}$_slartg( h( 1_${ik}$, 1_${ik}$ ), h( 2_${ik}$, 1_${ik}$ ), c1, s1, temp ) h( 2_${ik}$, 1_${ik}$ ) = zero h( 1_${ik}$, 1_${ik}$ ) = temp call stdlib${ii}$_srot( 2_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 2_${ik}$, h( 2_${ik}$, 2_${ik}$ ), 2_${ik}$, c1, s1 ) ! calculate z1 and z2 call stdlib${ii}$_slartg( h( 2_${ik}$, 3_${ik}$ ), h( 2_${ik}$, 2_${ik}$ ), c1, s1, temp ) call stdlib${ii}$_srot( 1_${ik}$, h( 1_${ik}$, 3_${ik}$ ), 1_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_slartg( h( 1_${ik}$, 2_${ik}$ ), h( 1_${ik}$, 1_${ik}$ ), c2, s2, temp ) ! apply transformations from the right call stdlib${ii}$_srot( k+3-istartm+1, a( istartm, k+2 ), 1_${ik}$, a( istartm,k+1 ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_srot( k+3-istartm+1, a( istartm, k+1 ), 1_${ik}$, a( istartm,k ), 1_${ik}$, c2, s2 ) call stdlib${ii}$_srot( k+2-istartm+1, b( istartm, k+2 ), 1_${ik}$, b( istartm,k+1 ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_srot( k+2-istartm+1, b( istartm, k+1 ), 1_${ik}$, b( istartm,k ), 1_${ik}$, c2, s2 ) if ( ilz ) then call stdlib${ii}$_srot( nz, z( 1_${ik}$, k+2-zstart+1 ), 1_${ik}$, z( 1_${ik}$, k+1-zstart+1 ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_srot( nz, z( 1_${ik}$, k+1-zstart+1 ), 1_${ik}$, z( 1_${ik}$, k-zstart+1 ),1_${ik}$, c2, s2 ) end if b( k+1, k ) = zero b( k+2, k ) = zero ! calculate q1 and q2 call stdlib${ii}$_slartg( a( k+2, k ), a( k+3, k ), c1, s1, temp ) a( k+2, k ) = temp a( k+3, k ) = zero call stdlib${ii}$_slartg( a( k+1, k ), a( k+2, k ), c2, s2, temp ) a( k+1, k ) = temp a( k+2, k ) = zero ! apply transformations from the left call stdlib${ii}$_srot( istopm-k, a( k+2, k+1 ), lda, a( k+3, k+1 ), lda,c1, s1 ) call stdlib${ii}$_srot( istopm-k, a( k+1, k+1 ), lda, a( k+2, k+1 ), lda,c2, s2 ) call stdlib${ii}$_srot( istopm-k, b( k+2, k+1 ), ldb, b( k+3, k+1 ), ldb,c1, s1 ) call stdlib${ii}$_srot( istopm-k, b( k+1, k+1 ), ldb, b( k+2, k+1 ), ldb,c2, s2 ) if ( ilq ) then call stdlib${ii}$_srot( nq, q( 1_${ik}$, k+2-qstart+1 ), 1_${ik}$, q( 1_${ik}$, k+3-qstart+1 ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_srot( nq, q( 1_${ik}$, k+1-qstart+1 ), 1_${ik}$, q( 1_${ik}$, k+2-qstart+1 ), 1_${ik}$, c2, s2 ) end if end if end subroutine stdlib${ii}$_slaqz2 pure module subroutine stdlib${ii}$_dlaqz2( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, qstart, & !! DLAQZ2 chases a 2x2 shift bulge in a matrix pencil down a single position q, ldq, nz, zstart, z, ldz ) use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments logical(lk), intent( in ) :: ilq, ilz integer(${ik}$), intent( in ) :: k, lda, ldb, ldq, ldz, istartm, istopm,nq, nz, qstart, & zstart, ihi real(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) ! ================================================================ ! local variables real(dp) :: h(2_${ik}$,3_${ik}$), c1, s1, c2, s2, temp if( k+2 == ihi ) then ! shift is located on the edge of the matrix, remove it h = b( ihi-1:ihi, ihi-2:ihi ) ! make h upper triangular call stdlib${ii}$_dlartg( h( 1_${ik}$, 1_${ik}$ ), h( 2_${ik}$, 1_${ik}$ ), c1, s1, temp ) h( 2_${ik}$, 1_${ik}$ ) = zero h( 1_${ik}$, 1_${ik}$ ) = temp call stdlib${ii}$_drot( 2_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 2_${ik}$, h( 2_${ik}$, 2_${ik}$ ), 2_${ik}$, c1, s1 ) call stdlib${ii}$_dlartg( h( 2_${ik}$, 3_${ik}$ ), h( 2_${ik}$, 2_${ik}$ ), c1, s1, temp ) call stdlib${ii}$_drot( 1_${ik}$, h( 1_${ik}$, 3_${ik}$ ), 1_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_dlartg( h( 1_${ik}$, 2_${ik}$ ), h( 1_${ik}$, 1_${ik}$ ), c2, s2, temp ) call stdlib${ii}$_drot( ihi-istartm+1, b( istartm, ihi ), 1_${ik}$, b( istartm,ihi-1 ), 1_${ik}$, c1, & s1 ) call stdlib${ii}$_drot( ihi-istartm+1, b( istartm, ihi-1 ), 1_${ik}$, b( istartm,ihi-2 ), 1_${ik}$, c2, & s2 ) b( ihi-1, ihi-2 ) = zero b( ihi, ihi-2 ) = zero call stdlib${ii}$_drot( ihi-istartm+1, a( istartm, ihi ), 1_${ik}$, a( istartm,ihi-1 ), 1_${ik}$, c1, & s1 ) call stdlib${ii}$_drot( ihi-istartm+1, a( istartm, ihi-1 ), 1_${ik}$, a( istartm,ihi-2 ), 1_${ik}$, c2, & s2 ) if ( ilz ) then call stdlib${ii}$_drot( nz, z( 1_${ik}$, ihi-zstart+1 ), 1_${ik}$, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, c1, s1 & ) call stdlib${ii}$_drot( nz, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, z( 1_${ik}$,ihi-2-zstart+1 ), 1_${ik}$, c2, & s2 ) end if call stdlib${ii}$_dlartg( a( ihi-1, ihi-2 ), a( ihi, ihi-2 ), c1, s1,temp ) a( ihi-1, ihi-2 ) = temp a( ihi, ihi-2 ) = zero call stdlib${ii}$_drot( istopm-ihi+2, a( ihi-1, ihi-1 ), lda, a( ihi,ihi-1 ), lda, c1, s1 & ) call stdlib${ii}$_drot( istopm-ihi+2, b( ihi-1, ihi-1 ), ldb, b( ihi,ihi-1 ), ldb, c1, s1 & ) if ( ilq ) then call stdlib${ii}$_drot( nq, q( 1_${ik}$, ihi-1-qstart+1 ), 1_${ik}$, q( 1_${ik}$, ihi-qstart+1 ), 1_${ik}$, c1, s1 & ) end if call stdlib${ii}$_dlartg( b( ihi, ihi ), b( ihi, ihi-1 ), c1, s1, temp ) b( ihi, ihi ) = temp b( ihi, ihi-1 ) = zero call stdlib${ii}$_drot( ihi-istartm, b( istartm, ihi ), 1_${ik}$, b( istartm,ihi-1 ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_drot( ihi-istartm+1, a( istartm, ihi ), 1_${ik}$, a( istartm,ihi-1 ), 1_${ik}$, c1, & s1 ) if ( ilz ) then call stdlib${ii}$_drot( nz, z( 1_${ik}$, ihi-zstart+1 ), 1_${ik}$, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, c1, s1 & ) end if else ! normal operation, move bulge down h = b( k+1:k+2, k:k+2 ) ! make h upper triangular call stdlib${ii}$_dlartg( h( 1_${ik}$, 1_${ik}$ ), h( 2_${ik}$, 1_${ik}$ ), c1, s1, temp ) h( 2_${ik}$, 1_${ik}$ ) = zero h( 1_${ik}$, 1_${ik}$ ) = temp call stdlib${ii}$_drot( 2_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 2_${ik}$, h( 2_${ik}$, 2_${ik}$ ), 2_${ik}$, c1, s1 ) ! calculate z1 and z2 call stdlib${ii}$_dlartg( h( 2_${ik}$, 3_${ik}$ ), h( 2_${ik}$, 2_${ik}$ ), c1, s1, temp ) call stdlib${ii}$_drot( 1_${ik}$, h( 1_${ik}$, 3_${ik}$ ), 1_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_dlartg( h( 1_${ik}$, 2_${ik}$ ), h( 1_${ik}$, 1_${ik}$ ), c2, s2, temp ) ! apply transformations from the right call stdlib${ii}$_drot( k+3-istartm+1, a( istartm, k+2 ), 1_${ik}$, a( istartm,k+1 ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_drot( k+3-istartm+1, a( istartm, k+1 ), 1_${ik}$, a( istartm,k ), 1_${ik}$, c2, s2 ) call stdlib${ii}$_drot( k+2-istartm+1, b( istartm, k+2 ), 1_${ik}$, b( istartm,k+1 ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_drot( k+2-istartm+1, b( istartm, k+1 ), 1_${ik}$, b( istartm,k ), 1_${ik}$, c2, s2 ) if ( ilz ) then call stdlib${ii}$_drot( nz, z( 1_${ik}$, k+2-zstart+1 ), 1_${ik}$, z( 1_${ik}$, k+1-zstart+1 ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_drot( nz, z( 1_${ik}$, k+1-zstart+1 ), 1_${ik}$, z( 1_${ik}$, k-zstart+1 ),1_${ik}$, c2, s2 ) end if b( k+1, k ) = zero b( k+2, k ) = zero ! calculate q1 and q2 call stdlib${ii}$_dlartg( a( k+2, k ), a( k+3, k ), c1, s1, temp ) a( k+2, k ) = temp a( k+3, k ) = zero call stdlib${ii}$_dlartg( a( k+1, k ), a( k+2, k ), c2, s2, temp ) a( k+1, k ) = temp a( k+2, k ) = zero ! apply transformations from the left call stdlib${ii}$_drot( istopm-k, a( k+2, k+1 ), lda, a( k+3, k+1 ), lda,c1, s1 ) call stdlib${ii}$_drot( istopm-k, a( k+1, k+1 ), lda, a( k+2, k+1 ), lda,c2, s2 ) call stdlib${ii}$_drot( istopm-k, b( k+2, k+1 ), ldb, b( k+3, k+1 ), ldb,c1, s1 ) call stdlib${ii}$_drot( istopm-k, b( k+1, k+1 ), ldb, b( k+2, k+1 ), ldb,c2, s2 ) if ( ilq ) then call stdlib${ii}$_drot( nq, q( 1_${ik}$, k+2-qstart+1 ), 1_${ik}$, q( 1_${ik}$, k+3-qstart+1 ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_drot( nq, q( 1_${ik}$, k+1-qstart+1 ), 1_${ik}$, q( 1_${ik}$, k+2-qstart+1 ), 1_${ik}$, c2, s2 ) end if end if end subroutine stdlib${ii}$_dlaqz2 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$laqz2( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, qstart, & !! DLAQZ2: chases a 2x2 shift bulge in a matrix pencil down a single position q, ldq, nz, zstart, z, ldz ) use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments logical(lk), intent( in ) :: ilq, ilz integer(${ik}$), intent( in ) :: k, lda, ldb, ldq, ldz, istartm, istopm,nq, nz, qstart, & zstart, ihi real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*) ! ================================================================ ! local variables real(${rk}$) :: h(2_${ik}$,3_${ik}$), c1, s1, c2, s2, temp if( k+2 == ihi ) then ! shift is located on the edge of the matrix, remove it h = b( ihi-1:ihi, ihi-2:ihi ) ! make h upper triangular call stdlib${ii}$_${ri}$lartg( h( 1_${ik}$, 1_${ik}$ ), h( 2_${ik}$, 1_${ik}$ ), c1, s1, temp ) h( 2_${ik}$, 1_${ik}$ ) = zero h( 1_${ik}$, 1_${ik}$ ) = temp call stdlib${ii}$_${ri}$rot( 2_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 2_${ik}$, h( 2_${ik}$, 2_${ik}$ ), 2_${ik}$, c1, s1 ) call stdlib${ii}$_${ri}$lartg( h( 2_${ik}$, 3_${ik}$ ), h( 2_${ik}$, 2_${ik}$ ), c1, s1, temp ) call stdlib${ii}$_${ri}$rot( 1_${ik}$, h( 1_${ik}$, 3_${ik}$ ), 1_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_${ri}$lartg( h( 1_${ik}$, 2_${ik}$ ), h( 1_${ik}$, 1_${ik}$ ), c2, s2, temp ) call stdlib${ii}$_${ri}$rot( ihi-istartm+1, b( istartm, ihi ), 1_${ik}$, b( istartm,ihi-1 ), 1_${ik}$, c1, & s1 ) call stdlib${ii}$_${ri}$rot( ihi-istartm+1, b( istartm, ihi-1 ), 1_${ik}$, b( istartm,ihi-2 ), 1_${ik}$, c2, & s2 ) b( ihi-1, ihi-2 ) = zero b( ihi, ihi-2 ) = zero call stdlib${ii}$_${ri}$rot( ihi-istartm+1, a( istartm, ihi ), 1_${ik}$, a( istartm,ihi-1 ), 1_${ik}$, c1, & s1 ) call stdlib${ii}$_${ri}$rot( ihi-istartm+1, a( istartm, ihi-1 ), 1_${ik}$, a( istartm,ihi-2 ), 1_${ik}$, c2, & s2 ) if ( ilz ) then call stdlib${ii}$_${ri}$rot( nz, z( 1_${ik}$, ihi-zstart+1 ), 1_${ik}$, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, c1, s1 & ) call stdlib${ii}$_${ri}$rot( nz, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, z( 1_${ik}$,ihi-2-zstart+1 ), 1_${ik}$, c2, & s2 ) end if call stdlib${ii}$_${ri}$lartg( a( ihi-1, ihi-2 ), a( ihi, ihi-2 ), c1, s1,temp ) a( ihi-1, ihi-2 ) = temp a( ihi, ihi-2 ) = zero call stdlib${ii}$_${ri}$rot( istopm-ihi+2, a( ihi-1, ihi-1 ), lda, a( ihi,ihi-1 ), lda, c1, s1 & ) call stdlib${ii}$_${ri}$rot( istopm-ihi+2, b( ihi-1, ihi-1 ), ldb, b( ihi,ihi-1 ), ldb, c1, s1 & ) if ( ilq ) then call stdlib${ii}$_${ri}$rot( nq, q( 1_${ik}$, ihi-1-qstart+1 ), 1_${ik}$, q( 1_${ik}$, ihi-qstart+1 ), 1_${ik}$, c1, s1 & ) end if call stdlib${ii}$_${ri}$lartg( b( ihi, ihi ), b( ihi, ihi-1 ), c1, s1, temp ) b( ihi, ihi ) = temp b( ihi, ihi-1 ) = zero call stdlib${ii}$_${ri}$rot( ihi-istartm, b( istartm, ihi ), 1_${ik}$, b( istartm,ihi-1 ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_${ri}$rot( ihi-istartm+1, a( istartm, ihi ), 1_${ik}$, a( istartm,ihi-1 ), 1_${ik}$, c1, & s1 ) if ( ilz ) then call stdlib${ii}$_${ri}$rot( nz, z( 1_${ik}$, ihi-zstart+1 ), 1_${ik}$, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, c1, s1 & ) end if else ! normal operation, move bulge down h = b( k+1:k+2, k:k+2 ) ! make h upper triangular call stdlib${ii}$_${ri}$lartg( h( 1_${ik}$, 1_${ik}$ ), h( 2_${ik}$, 1_${ik}$ ), c1, s1, temp ) h( 2_${ik}$, 1_${ik}$ ) = zero h( 1_${ik}$, 1_${ik}$ ) = temp call stdlib${ii}$_${ri}$rot( 2_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 2_${ik}$, h( 2_${ik}$, 2_${ik}$ ), 2_${ik}$, c1, s1 ) ! calculate z1 and z2 call stdlib${ii}$_${ri}$lartg( h( 2_${ik}$, 3_${ik}$ ), h( 2_${ik}$, 2_${ik}$ ), c1, s1, temp ) call stdlib${ii}$_${ri}$rot( 1_${ik}$, h( 1_${ik}$, 3_${ik}$ ), 1_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_${ri}$lartg( h( 1_${ik}$, 2_${ik}$ ), h( 1_${ik}$, 1_${ik}$ ), c2, s2, temp ) ! apply transformations from the right call stdlib${ii}$_${ri}$rot( k+3-istartm+1, a( istartm, k+2 ), 1_${ik}$, a( istartm,k+1 ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_${ri}$rot( k+3-istartm+1, a( istartm, k+1 ), 1_${ik}$, a( istartm,k ), 1_${ik}$, c2, s2 ) call stdlib${ii}$_${ri}$rot( k+2-istartm+1, b( istartm, k+2 ), 1_${ik}$, b( istartm,k+1 ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_${ri}$rot( k+2-istartm+1, b( istartm, k+1 ), 1_${ik}$, b( istartm,k ), 1_${ik}$, c2, s2 ) if ( ilz ) then call stdlib${ii}$_${ri}$rot( nz, z( 1_${ik}$, k+2-zstart+1 ), 1_${ik}$, z( 1_${ik}$, k+1-zstart+1 ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_${ri}$rot( nz, z( 1_${ik}$, k+1-zstart+1 ), 1_${ik}$, z( 1_${ik}$, k-zstart+1 ),1_${ik}$, c2, s2 ) end if b( k+1, k ) = zero b( k+2, k ) = zero ! calculate q1 and q2 call stdlib${ii}$_${ri}$lartg( a( k+2, k ), a( k+3, k ), c1, s1, temp ) a( k+2, k ) = temp a( k+3, k ) = zero call stdlib${ii}$_${ri}$lartg( a( k+1, k ), a( k+2, k ), c2, s2, temp ) a( k+1, k ) = temp a( k+2, k ) = zero ! apply transformations from the left call stdlib${ii}$_${ri}$rot( istopm-k, a( k+2, k+1 ), lda, a( k+3, k+1 ), lda,c1, s1 ) call stdlib${ii}$_${ri}$rot( istopm-k, a( k+1, k+1 ), lda, a( k+2, k+1 ), lda,c2, s2 ) call stdlib${ii}$_${ri}$rot( istopm-k, b( k+2, k+1 ), ldb, b( k+3, k+1 ), ldb,c1, s1 ) call stdlib${ii}$_${ri}$rot( istopm-k, b( k+1, k+1 ), ldb, b( k+2, k+1 ), ldb,c2, s2 ) if ( ilq ) then call stdlib${ii}$_${ri}$rot( nq, q( 1_${ik}$, k+2-qstart+1 ), 1_${ik}$, q( 1_${ik}$, k+3-qstart+1 ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_${ri}$rot( nq, q( 1_${ik}$, k+1-qstart+1 ), 1_${ik}$, q( 1_${ik}$, k+2-qstart+1 ), 1_${ik}$, c2, s2 ) end if end if end subroutine stdlib${ii}$_${ri}$laqz2 #:endif #:endfor recursive module subroutine stdlib${ii}$_claqz2( ilschur, ilq, ilz, n, ilo, ihi, nw,a, lda, b, ldb, q, & !! CLAQZ2 performs AED ldq, z, ldz, ns,nd, alpha, beta, qc, ldqc, zc, ldzc,work, lwork, rwork, rec, info ) use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments logical(lk), intent( in ) :: ilschur, ilq, ilz integer(${ik}$), intent( in ) :: n, ilo, ihi, nw, lda, ldb, ldq, ldz,ldqc, ldzc, lwork, & rec complex(sp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq, * ),z( ldz, * ), & alpha( * ), beta( * ) integer(${ik}$), intent( out ) :: ns, nd, info complex(sp), intent(inout) :: qc(ldqc,*), zc(ldzc,*) complex(sp), intent(out) :: work(*) real(sp), intent(out) :: rwork(*) ! ================================================================ ! local scalars integer(${ik}$) :: jw, kwtop, kwbot, istopm, istartm, k, k2, ctgexc_info, ifst, ilst, & lworkreq, qz_small_info real(sp) :: smlnum, ulp, safmin, safmax, c1, tempr complex(sp) :: s, s1, temp info = 0_${ik}$ ! set up deflation window jw = min( nw, ihi-ilo+1 ) kwtop = ihi-jw+1 if ( kwtop == ilo ) then s = czero else s = a( kwtop, kwtop-1 ) end if ! determine required workspace ifst = 1_${ik}$ ilst = jw call stdlib${ii}$_claqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& ldb, alpha, beta, qc, ldqc, zc,ldzc, work, -1_${ik}$, rwork, rec+1, qz_small_info ) lworkreq = int( work( 1_${ik}$ ),KIND=${ik}$)+2_${ik}$*jw**2_${ik}$ lworkreq = max( lworkreq, n*nw, 2_${ik}$*nw**2_${ik}$+n ) if ( lwork ==-1_${ik}$ ) then ! workspace query, quick return work( 1_${ik}$ ) = lworkreq return else if ( lwork < lworkreq ) then info = -26_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CLAQZ2', -info ) return end if ! get machine constants safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' ) safmax = one/safmin call stdlib${ii}$_slabad( safmin, safmax ) ulp = stdlib${ii}$_slamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=sp)/ulp ) if ( ihi == kwtop ) then ! 1 by 1 deflation window, just try a regular deflation alpha( kwtop ) = a( kwtop, kwtop ) beta( kwtop ) = b( kwtop, kwtop ) ns = 1_${ik}$ nd = 0_${ik}$ if ( abs( s ) <= max( smlnum, ulp*abs( a( kwtop,kwtop ) ) ) ) then ns = 0_${ik}$ nd = 1_${ik}$ if ( kwtop > ilo ) then a( kwtop, kwtop-1 ) = czero end if end if end if ! store window in case of convergence failure call stdlib${ii}$_clacpy( 'ALL', jw, jw, a( kwtop, kwtop ), lda, work, jw ) call stdlib${ii}$_clacpy( 'ALL', jw, jw, b( kwtop, kwtop ), ldb, work( jw**2_${ik}$+1 ), jw ) ! transform window to real schur form call stdlib${ii}$_claset( 'FULL', jw, jw, czero, cone, qc, ldqc ) call stdlib${ii}$_claset( 'FULL', jw, jw, czero, cone, zc, ldzc ) call stdlib${ii}$_claqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& ldb, alpha, beta, qc, ldqc, zc,ldzc, work( 2_${ik}$*jw**2_${ik}$+1 ), lwork-2*jw**2_${ik}$, rwork,rec+1, & qz_small_info ) if( qz_small_info /= 0_${ik}$ ) then ! convergence failure, restore the window and exit nd = 0_${ik}$ ns = jw-qz_small_info call stdlib${ii}$_clacpy( 'ALL', jw, jw, work, jw, a( kwtop, kwtop ), lda ) call stdlib${ii}$_clacpy( 'ALL', jw, jw, work( jw**2_${ik}$+1 ), jw, b( kwtop,kwtop ), ldb ) return end if ! deflation detection loop if ( kwtop == ilo .or. s == czero ) then kwbot = kwtop-1 else kwbot = ihi k = 1_${ik}$ k2 = 1_${ik}$ do while ( k <= jw ) ! try to deflate eigenvalue tempr = abs( a( kwbot, kwbot ) ) if( tempr == zero ) then tempr = abs( s ) end if if ( ( abs( s*qc( 1_${ik}$, kwbot-kwtop+1 ) ) ) <= max( ulp*tempr, smlnum ) ) & then ! deflatable kwbot = kwbot-1 else ! not deflatable, move out of the way ifst = kwbot-kwtop+1 ilst = k2 call stdlib${ii}$_ctgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, & kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, ctgexc_info ) k2 = k2+1 end if k = k+1 end do end if ! store eigenvalues nd = ihi-kwbot ns = jw-nd k = kwtop do while ( k <= ihi ) alpha( k ) = a( k, k ) beta( k ) = b( k, k ) k = k+1 end do if ( kwtop /= ilo .and. s /= czero ) then ! reflect spike back, this will create optimally packed bulges a( kwtop:kwbot, kwtop-1 ) = a( kwtop, kwtop-1 ) *conjg( qc( 1_${ik}$,1_${ik}$:jw-nd ) ) do k = kwbot-1, kwtop, -1 call stdlib${ii}$_clartg( a( k, kwtop-1 ), a( k+1, kwtop-1 ), c1, s1,temp ) a( k, kwtop-1 ) = temp a( k+1, kwtop-1 ) = czero k2 = max( kwtop, k-1 ) call stdlib${ii}$_crot( ihi-k2+1, a( k, k2 ), lda, a( k+1, k2 ), lda, c1,s1 ) call stdlib${ii}$_crot( ihi-( k-1 )+1_${ik}$, b( k, k-1 ), ldb, b( k+1, k-1 ),ldb, c1, s1 ) call stdlib${ii}$_crot( jw, qc( 1_${ik}$, k-kwtop+1 ), 1_${ik}$, qc( 1_${ik}$, k+1-kwtop+1 ),1_${ik}$, c1, conjg( & s1 ) ) end do ! chase bulges down istartm = kwtop istopm = ihi k = kwbot-1 do while ( k >= kwtop ) ! move bulge down and remove it do k2 = k, kwbot-1 call stdlib${ii}$_claqz1( .true., .true., k2, kwtop, kwtop+jw-1,kwbot, a, lda, b, & ldb, jw, kwtop, qc, ldqc,jw, kwtop, zc, ldzc ) end do k = k-1 end do end if ! apply qc and zc to rest of the matrix if ( ilschur ) then istartm = 1_${ik}$ istopm = n else istartm = ilo istopm = ihi end if if ( istopm-ihi > 0_${ik}$ ) then call stdlib${ii}$_cgemm( 'C', 'N', jw, istopm-ihi, jw, cone, qc, ldqc,a( kwtop, ihi+1 ), & lda, czero, work, jw ) call stdlib${ii}$_clacpy( 'ALL', jw, istopm-ihi, work, jw, a( kwtop,ihi+1 ), lda ) call stdlib${ii}$_cgemm( 'C', 'N', jw, istopm-ihi, jw, cone, qc, ldqc,b( kwtop, ihi+1 ), & ldb, czero, work, jw ) call stdlib${ii}$_clacpy( 'ALL', jw, istopm-ihi, work, jw, b( kwtop,ihi+1 ), ldb ) end if if ( ilq ) then call stdlib${ii}$_cgemm( 'N', 'N', n, jw, jw, cone, q( 1_${ik}$, kwtop ), ldq, qc,ldqc, czero, & work, n ) call stdlib${ii}$_clacpy( 'ALL', n, jw, work, n, q( 1_${ik}$, kwtop ), ldq ) end if if ( kwtop-1-istartm+1 > 0_${ik}$ ) then call stdlib${ii}$_cgemm( 'N', 'N', kwtop-istartm, jw, jw, cone, a( istartm,kwtop ), lda, & zc, ldzc, czero, work,kwtop-istartm ) call stdlib${ii}$_clacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,a( istartm, kwtop )& , lda ) call stdlib${ii}$_cgemm( 'N', 'N', kwtop-istartm, jw, jw, cone, b( istartm,kwtop ), ldb, & zc, ldzc, czero, work,kwtop-istartm ) call stdlib${ii}$_clacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,b( istartm, kwtop )& , ldb ) end if if ( ilz ) then call stdlib${ii}$_cgemm( 'N', 'N', n, jw, jw, cone, z( 1_${ik}$, kwtop ), ldz, zc,ldzc, czero, & work, n ) call stdlib${ii}$_clacpy( 'ALL', n, jw, work, n, z( 1_${ik}$, kwtop ), ldz ) end if end subroutine stdlib${ii}$_claqz2 recursive module subroutine stdlib${ii}$_zlaqz2( ilschur, ilq, ilz, n, ilo, ihi, nw,a, lda, b, ldb, q, & !! ZLAQZ2 performs AED ldq, z, ldz, ns,nd, alpha, beta, qc, ldqc, zc, ldzc,work, lwork, rwork, rec, info ) use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments logical(lk), intent( in ) :: ilschur, ilq, ilz integer(${ik}$), intent( in ) :: n, ilo, ihi, nw, lda, ldb, ldq, ldz,ldqc, ldzc, lwork, & rec complex(dp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq,* ), z( ldz, * ), & alpha( * ), beta( * ) integer(${ik}$), intent( out ) :: ns, nd, info complex(dp), intent(inout) :: qc(ldqc,*), zc(ldzc,*) complex(dp), intent(out) :: work(*) real(dp), intent(out) :: rwork(*) ! ================================================================ ! local scalars integer(${ik}$) :: jw, kwtop, kwbot, istopm, istartm, k, k2, ztgexc_info, ifst, ilst, & lworkreq, qz_small_info real(dp) ::smlnum, ulp, safmin, safmax, c1, tempr complex(dp) :: s, s1, temp info = 0_${ik}$ ! set up deflation window jw = min( nw, ihi-ilo+1 ) kwtop = ihi-jw+1 if ( kwtop == ilo ) then s = czero else s = a( kwtop, kwtop-1 ) end if ! determine required workspace ifst = 1_${ik}$ ilst = jw call stdlib${ii}$_zlaqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& ldb, alpha, beta, qc, ldqc, zc,ldzc, work, -1_${ik}$, rwork, rec+1, qz_small_info ) lworkreq = int( work( 1_${ik}$ ),KIND=${ik}$)+2_${ik}$*jw**2_${ik}$ lworkreq = max( lworkreq, n*nw, 2_${ik}$*nw**2_${ik}$+n ) if ( lwork ==-1_${ik}$ ) then ! workspace query, quick return work( 1_${ik}$ ) = lworkreq return else if ( lwork < lworkreq ) then info = -26_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZLAQZ2', -info ) return end if ! get machine constants safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' ) safmax = one/safmin call stdlib${ii}$_dlabad( safmin, safmax ) ulp = stdlib${ii}$_dlamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=dp)/ulp ) if ( ihi == kwtop ) then ! 1 by 1 deflation window, just try a regular deflation alpha( kwtop ) = a( kwtop, kwtop ) beta( kwtop ) = b( kwtop, kwtop ) ns = 1_${ik}$ nd = 0_${ik}$ if ( abs( s ) <= max( smlnum, ulp*abs( a( kwtop,kwtop ) ) ) ) then ns = 0_${ik}$ nd = 1_${ik}$ if ( kwtop > ilo ) then a( kwtop, kwtop-1 ) = czero end if end if end if ! store window in case of convergence failure call stdlib${ii}$_zlacpy( 'ALL', jw, jw, a( kwtop, kwtop ), lda, work, jw ) call stdlib${ii}$_zlacpy( 'ALL', jw, jw, b( kwtop, kwtop ), ldb, work( jw**2_${ik}$+1 ), jw ) ! transform window to real schur form call stdlib${ii}$_zlaset( 'FULL', jw, jw, czero, cone, qc, ldqc ) call stdlib${ii}$_zlaset( 'FULL', jw, jw, czero, cone, zc, ldzc ) call stdlib${ii}$_zlaqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& ldb, alpha, beta, qc, ldqc, zc,ldzc, work( 2_${ik}$*jw**2_${ik}$+1 ), lwork-2*jw**2_${ik}$, rwork,rec+1, & qz_small_info ) if( qz_small_info /= 0_${ik}$ ) then ! convergence failure, restore the window and exit nd = 0_${ik}$ ns = jw-qz_small_info call stdlib${ii}$_zlacpy( 'ALL', jw, jw, work, jw, a( kwtop, kwtop ), lda ) call stdlib${ii}$_zlacpy( 'ALL', jw, jw, work( jw**2_${ik}$+1 ), jw, b( kwtop,kwtop ), ldb ) return end if ! deflation detection loop if ( kwtop == ilo .or. s == czero ) then kwbot = kwtop-1 else kwbot = ihi k = 1_${ik}$ k2 = 1_${ik}$ do while ( k <= jw ) ! try to deflate eigenvalue tempr = abs( a( kwbot, kwbot ) ) if( tempr == zero ) then tempr = abs( s ) end if if ( ( abs( s*qc( 1_${ik}$, kwbot-kwtop+1 ) ) ) <= max( ulp*tempr, smlnum ) ) & then ! deflatable kwbot = kwbot-1 else ! not deflatable, move out of the way ifst = kwbot-kwtop+1 ilst = k2 call stdlib${ii}$_ztgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, & kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, ztgexc_info ) k2 = k2+1 end if k = k+1 end do end if ! store eigenvalues nd = ihi-kwbot ns = jw-nd k = kwtop do while ( k <= ihi ) alpha( k ) = a( k, k ) beta( k ) = b( k, k ) k = k+1 end do if ( kwtop /= ilo .and. s /= czero ) then ! reflect spike back, this will create optimally packed bulges a( kwtop:kwbot, kwtop-1 ) = a( kwtop, kwtop-1 ) *conjg( qc( 1_${ik}$,1_${ik}$:jw-nd ) ) do k = kwbot-1, kwtop, -1 call stdlib${ii}$_zlartg( a( k, kwtop-1 ), a( k+1, kwtop-1 ), c1, s1,temp ) a( k, kwtop-1 ) = temp a( k+1, kwtop-1 ) = czero k2 = max( kwtop, k-1 ) call stdlib${ii}$_zrot( ihi-k2+1, a( k, k2 ), lda, a( k+1, k2 ), lda, c1,s1 ) call stdlib${ii}$_zrot( ihi-( k-1 )+1_${ik}$, b( k, k-1 ), ldb, b( k+1, k-1 ),ldb, c1, s1 ) call stdlib${ii}$_zrot( jw, qc( 1_${ik}$, k-kwtop+1 ), 1_${ik}$, qc( 1_${ik}$, k+1-kwtop+1 ),1_${ik}$, c1, conjg( & s1 ) ) end do ! chase bulges down istartm = kwtop istopm = ihi k = kwbot-1 do while ( k >= kwtop ) ! move bulge down and remove it do k2 = k, kwbot-1 call stdlib${ii}$_zlaqz1( .true., .true., k2, kwtop, kwtop+jw-1,kwbot, a, lda, b, & ldb, jw, kwtop, qc, ldqc,jw, kwtop, zc, ldzc ) end do k = k-1 end do end if ! apply qc and zc to rest of the matrix if ( ilschur ) then istartm = 1_${ik}$ istopm = n else istartm = ilo istopm = ihi end if if ( istopm-ihi > 0_${ik}$ ) then call stdlib${ii}$_zgemm( 'C', 'N', jw, istopm-ihi, jw, cone, qc, ldqc,a( kwtop, ihi+1 ), & lda, czero, work, jw ) call stdlib${ii}$_zlacpy( 'ALL', jw, istopm-ihi, work, jw, a( kwtop,ihi+1 ), lda ) call stdlib${ii}$_zgemm( 'C', 'N', jw, istopm-ihi, jw, cone, qc, ldqc,b( kwtop, ihi+1 ), & ldb, czero, work, jw ) call stdlib${ii}$_zlacpy( 'ALL', jw, istopm-ihi, work, jw, b( kwtop,ihi+1 ), ldb ) end if if ( ilq ) then call stdlib${ii}$_zgemm( 'N', 'N', n, jw, jw, cone, q( 1_${ik}$, kwtop ), ldq, qc,ldqc, czero, & work, n ) call stdlib${ii}$_zlacpy( 'ALL', n, jw, work, n, q( 1_${ik}$, kwtop ), ldq ) end if if ( kwtop-1-istartm+1 > 0_${ik}$ ) then call stdlib${ii}$_zgemm( 'N', 'N', kwtop-istartm, jw, jw, cone, a( istartm,kwtop ), lda, & zc, ldzc, czero, work,kwtop-istartm ) call stdlib${ii}$_zlacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,a( istartm, kwtop )& , lda ) call stdlib${ii}$_zgemm( 'N', 'N', kwtop-istartm, jw, jw, cone, b( istartm,kwtop ), ldb, & zc, ldzc, czero, work,kwtop-istartm ) call stdlib${ii}$_zlacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,b( istartm, kwtop )& , ldb ) end if if ( ilz ) then call stdlib${ii}$_zgemm( 'N', 'N', n, jw, jw, cone, z( 1_${ik}$, kwtop ), ldz, zc,ldzc, czero, & work, n ) call stdlib${ii}$_zlacpy( 'ALL', n, jw, work, n, z( 1_${ik}$, kwtop ), ldz ) end if end subroutine stdlib${ii}$_zlaqz2 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] recursive module subroutine stdlib${ii}$_${ci}$laqz2( ilschur, ilq, ilz, n, ilo, ihi, nw,a, lda, b, ldb, q, & !! ZLAQZ2: performs AED ldq, z, ldz, ns,nd, alpha, beta, qc, ldqc, zc, ldzc,work, lwork, rwork, rec, info ) use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments logical(lk), intent( in ) :: ilschur, ilq, ilz integer(${ik}$), intent( in ) :: n, ilo, ihi, nw, lda, ldb, ldq, ldz,ldqc, ldzc, lwork, & rec complex(${ck}$), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq,* ), z( ldz, * ), & alpha( * ), beta( * ) integer(${ik}$), intent( out ) :: ns, nd, info complex(${ck}$), intent(inout) :: qc(ldqc,*), zc(ldzc,*) complex(${ck}$), intent(out) :: work(*) real(${ck}$), intent(out) :: rwork(*) ! ================================================================ ! local scalars integer(${ik}$) :: jw, kwtop, kwbot, istopm, istartm, k, k2, ztgexc_info, ifst, ilst, & lworkreq, qz_small_info real(${ck}$) ::smlnum, ulp, safmin, safmax, c1, tempr complex(${ck}$) :: s, s1, temp info = 0_${ik}$ ! set up deflation window jw = min( nw, ihi-ilo+1 ) kwtop = ihi-jw+1 if ( kwtop == ilo ) then s = czero else s = a( kwtop, kwtop-1 ) end if ! determine required workspace ifst = 1_${ik}$ ilst = jw call stdlib${ii}$_${ci}$laqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& ldb, alpha, beta, qc, ldqc, zc,ldzc, work, -1_${ik}$, rwork, rec+1, qz_small_info ) lworkreq = int( work( 1_${ik}$ ),KIND=${ik}$)+2_${ik}$*jw**2_${ik}$ lworkreq = max( lworkreq, n*nw, 2_${ik}$*nw**2_${ik}$+n ) if ( lwork ==-1_${ik}$ ) then ! workspace query, quick return work( 1_${ik}$ ) = lworkreq return else if ( lwork < lworkreq ) then info = -26_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZLAQZ2', -info ) return end if ! get machine constants safmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'SAFE MINIMUM' ) safmax = one/safmin call stdlib${ii}$_${c2ri(ci)}$labad( safmin, safmax ) ulp = stdlib${ii}$_${c2ri(ci)}$lamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=${ck}$)/ulp ) if ( ihi == kwtop ) then ! 1 by 1 deflation window, just try a regular deflation alpha( kwtop ) = a( kwtop, kwtop ) beta( kwtop ) = b( kwtop, kwtop ) ns = 1_${ik}$ nd = 0_${ik}$ if ( abs( s ) <= max( smlnum, ulp*abs( a( kwtop,kwtop ) ) ) ) then ns = 0_${ik}$ nd = 1_${ik}$ if ( kwtop > ilo ) then a( kwtop, kwtop-1 ) = czero end if end if end if ! store window in case of convergence failure call stdlib${ii}$_${ci}$lacpy( 'ALL', jw, jw, a( kwtop, kwtop ), lda, work, jw ) call stdlib${ii}$_${ci}$lacpy( 'ALL', jw, jw, b( kwtop, kwtop ), ldb, work( jw**2_${ik}$+1 ), jw ) ! transform window to real schur form call stdlib${ii}$_${ci}$laset( 'FULL', jw, jw, czero, cone, qc, ldqc ) call stdlib${ii}$_${ci}$laset( 'FULL', jw, jw, czero, cone, zc, ldzc ) call stdlib${ii}$_${ci}$laqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& ldb, alpha, beta, qc, ldqc, zc,ldzc, work( 2_${ik}$*jw**2_${ik}$+1 ), lwork-2*jw**2_${ik}$, rwork,rec+1, & qz_small_info ) if( qz_small_info /= 0_${ik}$ ) then ! convergence failure, restore the window and exit nd = 0_${ik}$ ns = jw-qz_small_info call stdlib${ii}$_${ci}$lacpy( 'ALL', jw, jw, work, jw, a( kwtop, kwtop ), lda ) call stdlib${ii}$_${ci}$lacpy( 'ALL', jw, jw, work( jw**2_${ik}$+1 ), jw, b( kwtop,kwtop ), ldb ) return end if ! deflation detection loop if ( kwtop == ilo .or. s == czero ) then kwbot = kwtop-1 else kwbot = ihi k = 1_${ik}$ k2 = 1_${ik}$ do while ( k <= jw ) ! try to deflate eigenvalue tempr = abs( a( kwbot, kwbot ) ) if( tempr == zero ) then tempr = abs( s ) end if if ( ( abs( s*qc( 1_${ik}$, kwbot-kwtop+1 ) ) ) <= max( ulp*tempr, smlnum ) ) & then ! deflatable kwbot = kwbot-1 else ! not deflatable, move out of the way ifst = kwbot-kwtop+1 ilst = k2 call stdlib${ii}$_${ci}$tgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, & kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, ztgexc_info ) k2 = k2+1 end if k = k+1 end do end if ! store eigenvalues nd = ihi-kwbot ns = jw-nd k = kwtop do while ( k <= ihi ) alpha( k ) = a( k, k ) beta( k ) = b( k, k ) k = k+1 end do if ( kwtop /= ilo .and. s /= czero ) then ! reflect spike back, this will create optimally packed bulges a( kwtop:kwbot, kwtop-1 ) = a( kwtop, kwtop-1 ) *conjg( qc( 1_${ik}$,1_${ik}$:jw-nd ) ) do k = kwbot-1, kwtop, -1 call stdlib${ii}$_${ci}$lartg( a( k, kwtop-1 ), a( k+1, kwtop-1 ), c1, s1,temp ) a( k, kwtop-1 ) = temp a( k+1, kwtop-1 ) = czero k2 = max( kwtop, k-1 ) call stdlib${ii}$_${ci}$rot( ihi-k2+1, a( k, k2 ), lda, a( k+1, k2 ), lda, c1,s1 ) call stdlib${ii}$_${ci}$rot( ihi-( k-1 )+1_${ik}$, b( k, k-1 ), ldb, b( k+1, k-1 ),ldb, c1, s1 ) call stdlib${ii}$_${ci}$rot( jw, qc( 1_${ik}$, k-kwtop+1 ), 1_${ik}$, qc( 1_${ik}$, k+1-kwtop+1 ),1_${ik}$, c1, conjg( & s1 ) ) end do ! chase bulges down istartm = kwtop istopm = ihi k = kwbot-1 do while ( k >= kwtop ) ! move bulge down and remove it do k2 = k, kwbot-1 call stdlib${ii}$_${ci}$laqz1( .true., .true., k2, kwtop, kwtop+jw-1,kwbot, a, lda, b, & ldb, jw, kwtop, qc, ldqc,jw, kwtop, zc, ldzc ) end do k = k-1 end do end if ! apply qc and zc to rest of the matrix if ( ilschur ) then istartm = 1_${ik}$ istopm = n else istartm = ilo istopm = ihi end if if ( istopm-ihi > 0_${ik}$ ) then call stdlib${ii}$_${ci}$gemm( 'C', 'N', jw, istopm-ihi, jw, cone, qc, ldqc,a( kwtop, ihi+1 ), & lda, czero, work, jw ) call stdlib${ii}$_${ci}$lacpy( 'ALL', jw, istopm-ihi, work, jw, a( kwtop,ihi+1 ), lda ) call stdlib${ii}$_${ci}$gemm( 'C', 'N', jw, istopm-ihi, jw, cone, qc, ldqc,b( kwtop, ihi+1 ), & ldb, czero, work, jw ) call stdlib${ii}$_${ci}$lacpy( 'ALL', jw, istopm-ihi, work, jw, b( kwtop,ihi+1 ), ldb ) end if if ( ilq ) then call stdlib${ii}$_${ci}$gemm( 'N', 'N', n, jw, jw, cone, q( 1_${ik}$, kwtop ), ldq, qc,ldqc, czero, & work, n ) call stdlib${ii}$_${ci}$lacpy( 'ALL', n, jw, work, n, q( 1_${ik}$, kwtop ), ldq ) end if if ( kwtop-1-istartm+1 > 0_${ik}$ ) then call stdlib${ii}$_${ci}$gemm( 'N', 'N', kwtop-istartm, jw, jw, cone, a( istartm,kwtop ), lda, & zc, ldzc, czero, work,kwtop-istartm ) call stdlib${ii}$_${ci}$lacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,a( istartm, kwtop )& , lda ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', kwtop-istartm, jw, jw, cone, b( istartm,kwtop ), ldb, & zc, ldzc, czero, work,kwtop-istartm ) call stdlib${ii}$_${ci}$lacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,b( istartm, kwtop )& , ldb ) end if if ( ilz ) then call stdlib${ii}$_${ci}$gemm( 'N', 'N', n, jw, jw, cone, z( 1_${ik}$, kwtop ), ldz, zc,ldzc, czero, & work, n ) call stdlib${ii}$_${ci}$lacpy( 'ALL', n, jw, work, n, z( 1_${ik}$, kwtop ), ldz ) end if end subroutine stdlib${ii}$_${ci}$laqz2 #:endif #:endfor recursive module subroutine stdlib${ii}$_slaqz3( ilschur, ilq, ilz, n, ilo, ihi, nw,a, lda, b, ldb, q, & !! SLAQZ3 performs AED ldq, z, ldz, ns,nd, alphar, alphai, beta, qc, ldqc,zc, ldzc, work, lwork, rec, info ) use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments logical(lk), intent( in ) :: ilschur, ilq, ilz integer(${ik}$), intent( in ) :: n, ilo, ihi, nw, lda, ldb, ldq, ldz,ldqc, ldzc, lwork, & rec real(sp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq, * ),z( ldz, * ), alphar(& * ), alphai( * ), beta( * ) integer(${ik}$), intent( out ) :: ns, nd, info real(sp), intent(inout) :: qc(ldqc,*), zc(ldzc,*) real(sp), intent(out) :: work(*) ! ================================================================ ! local scalars logical(lk) :: bulge integer(${ik}$) :: jw, kwtop, kwbot, istopm, istartm, k, k2, stgexc_info, ifst, ilst, & lworkreq, qz_small_info real(sp) :: s, smlnum, ulp, safmin, safmax, c1, s1, temp info = 0_${ik}$ ! set up deflation window jw = min( nw, ihi-ilo+1 ) kwtop = ihi-jw+1 if ( kwtop == ilo ) then s = zero else s = a( kwtop, kwtop-1 ) end if ! determine required workspace ifst = 1_${ik}$ ilst = jw call stdlib${ii}$_stgexc( .true., .true., jw, a, lda, b, ldb, qc, ldqc, zc,ldzc, ifst, ilst, & work, -1_${ik}$, stgexc_info ) lworkreq = int( work( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_slaqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& ldb, alphar, alphai, beta, qc,ldqc, zc, ldzc, work, -1_${ik}$, rec+1, qz_small_info ) lworkreq = max( lworkreq, int( work( 1_${ik}$ ),KIND=${ik}$)+2_${ik}$*jw**2_${ik}$ ) lworkreq = max( lworkreq, n*nw, 2_${ik}$*nw**2_${ik}$+n ) if ( lwork ==-1_${ik}$ ) then ! workspace query, quick return work( 1_${ik}$ ) = lworkreq return else if ( lwork < lworkreq ) then info = -26_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SLAQZ3', -info ) return end if ! get machine constants safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' ) safmax = one/safmin call stdlib${ii}$_slabad( safmin, safmax ) ulp = stdlib${ii}$_slamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=sp)/ulp ) if ( ihi == kwtop ) then ! 1 by 1 deflation window, just try a regular deflation alphar( kwtop ) = a( kwtop, kwtop ) alphai( kwtop ) = zero beta( kwtop ) = b( kwtop, kwtop ) ns = 1_${ik}$ nd = 0_${ik}$ if ( abs( s ) <= max( smlnum, ulp*abs( a( kwtop,kwtop ) ) ) ) then ns = 0_${ik}$ nd = 1_${ik}$ if ( kwtop > ilo ) then a( kwtop, kwtop-1 ) = zero end if end if end if ! store window in case of convergence failure call stdlib${ii}$_slacpy( 'ALL', jw, jw, a( kwtop, kwtop ), lda, work, jw ) call stdlib${ii}$_slacpy( 'ALL', jw, jw, b( kwtop, kwtop ), ldb, work( jw**2_${ik}$+1 ), jw ) ! transform window to real schur form call stdlib${ii}$_slaset( 'FULL', jw, jw, zero, one, qc, ldqc ) call stdlib${ii}$_slaset( 'FULL', jw, jw, zero, one, zc, ldzc ) call stdlib${ii}$_slaqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& ldb, alphar, alphai, beta, qc,ldqc, zc, ldzc, work( 2_${ik}$*jw**2_${ik}$+1 ), lwork-2*jw**2_${ik}$,rec+1, & qz_small_info ) if( qz_small_info /= 0_${ik}$ ) then ! convergence failure, restore the window and exit nd = 0_${ik}$ ns = jw-qz_small_info call stdlib${ii}$_slacpy( 'ALL', jw, jw, work, jw, a( kwtop, kwtop ), lda ) call stdlib${ii}$_slacpy( 'ALL', jw, jw, work( jw**2_${ik}$+1 ), jw, b( kwtop,kwtop ), ldb ) return end if ! deflation detection loop if ( kwtop == ilo .or. s == zero ) then kwbot = kwtop-1 else kwbot = ihi k = 1_${ik}$ k2 = 1_${ik}$ do while ( k <= jw ) bulge = .false. if ( kwbot-kwtop+1 >= 2_${ik}$ ) then bulge = a( kwbot, kwbot-1 ) /= zero end if if ( bulge ) then ! try to deflate complex conjugate eigenvalue pair temp = abs( a( kwbot, kwbot ) )+sqrt( abs( a( kwbot,kwbot-1 ) ) )*sqrt( abs( & a( kwbot-1, kwbot ) ) ) if( temp == zero )then temp = abs( s ) end if if ( max( abs( s*qc( 1_${ik}$, kwbot-kwtop ) ), abs( s*qc( 1_${ik}$,kwbot-kwtop+1 ) ) ) <= & max( smlnum,ulp*temp ) ) then ! deflatable kwbot = kwbot-2 else ! not deflatable, move out of the way ifst = kwbot-kwtop+1 ilst = k2 call stdlib${ii}$_stgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, & kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, work, lwork,stgexc_info ) k2 = k2+2 end if k = k+2 else ! try to deflate real eigenvalue temp = abs( a( kwbot, kwbot ) ) if( temp == zero ) then temp = abs( s ) end if if ( ( abs( s*qc( 1_${ik}$, kwbot-kwtop+1 ) ) ) <= max( ulp*temp, smlnum ) ) & then ! deflatable kwbot = kwbot-1 else ! not deflatable, move out of the way ifst = kwbot-kwtop+1 ilst = k2 call stdlib${ii}$_stgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, & kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, work, lwork,stgexc_info ) k2 = k2+1 end if k = k+1 end if end do end if ! store eigenvalues nd = ihi-kwbot ns = jw-nd k = kwtop do while ( k <= ihi ) bulge = .false. if ( k < ihi ) then if ( a( k+1, k ) /= zero ) then bulge = .true. end if end if if ( bulge ) then ! 2x2 eigenvalue block call stdlib${ii}$_slag2( a( k, k ), lda, b( k, k ), ldb, safmin,beta( k ), beta( k+1 ),& alphar( k ),alphar( k+1 ), alphai( k ) ) alphai( k+1 ) = -alphai( k ) k = k+2 else ! 1x1 eigenvalue block alphar( k ) = a( k, k ) alphai( k ) = zero beta( k ) = b( k, k ) k = k+1 end if end do if ( kwtop /= ilo .and. s /= zero ) then ! reflect spike back, this will create optimally packed bulges a( kwtop:kwbot, kwtop-1 ) = a( kwtop, kwtop-1 )*qc( 1_${ik}$,1_${ik}$:jw-nd ) do k = kwbot-1, kwtop, -1 call stdlib${ii}$_slartg( a( k, kwtop-1 ), a( k+1, kwtop-1 ), c1, s1,temp ) a( k, kwtop-1 ) = temp a( k+1, kwtop-1 ) = zero k2 = max( kwtop, k-1 ) call stdlib${ii}$_srot( ihi-k2+1, a( k, k2 ), lda, a( k+1, k2 ), lda, c1,s1 ) call stdlib${ii}$_srot( ihi-( k-1 )+1_${ik}$, b( k, k-1 ), ldb, b( k+1, k-1 ),ldb, c1, s1 ) call stdlib${ii}$_srot( jw, qc( 1_${ik}$, k-kwtop+1 ), 1_${ik}$, qc( 1_${ik}$, k+1-kwtop+1 ),1_${ik}$, c1, s1 ) end do ! chase bulges down istartm = kwtop istopm = ihi k = kwbot-1 do while ( k >= kwtop ) if ( ( k >= kwtop+1 ) .and. a( k+1, k-1 ) /= zero ) then ! move double pole block down and remove it do k2 = k-1, kwbot-2 call stdlib${ii}$_slaqz2( .true., .true., k2, kwtop, kwtop+jw-1,kwbot, a, lda, b,& ldb, jw, kwtop, qc,ldqc, jw, kwtop, zc, ldzc ) end do k = k-2 else ! k points to single shift do k2 = k, kwbot-2 ! move shift down call stdlib${ii}$_slartg( b( k2+1, k2+1 ), b( k2+1, k2 ), c1, s1,temp ) b( k2+1, k2+1 ) = temp b( k2+1, k2 ) = zero call stdlib${ii}$_srot( k2+2-istartm+1, a( istartm, k2+1 ), 1_${ik}$,a( istartm, k2 ), & 1_${ik}$, c1, s1 ) call stdlib${ii}$_srot( k2-istartm+1, b( istartm, k2+1 ), 1_${ik}$,b( istartm, k2 ), 1_${ik}$, & c1, s1 ) call stdlib${ii}$_srot( jw, zc( 1_${ik}$, k2+1-kwtop+1 ), 1_${ik}$, zc( 1_${ik}$,k2-kwtop+1 ), 1_${ik}$, c1, & s1 ) call stdlib${ii}$_slartg( a( k2+1, k2 ), a( k2+2, k2 ), c1, s1,temp ) a( k2+1, k2 ) = temp a( k2+2, k2 ) = zero call stdlib${ii}$_srot( istopm-k2, a( k2+1, k2+1 ), lda, a( k2+2,k2+1 ), lda, c1,& s1 ) call stdlib${ii}$_srot( istopm-k2, b( k2+1, k2+1 ), ldb, b( k2+2,k2+1 ), ldb, c1,& s1 ) call stdlib${ii}$_srot( jw, qc( 1_${ik}$, k2+1-kwtop+1 ), 1_${ik}$, qc( 1_${ik}$,k2+2-kwtop+1 ), 1_${ik}$, & c1, s1 ) end do ! remove the shift call stdlib${ii}$_slartg( b( kwbot, kwbot ), b( kwbot, kwbot-1 ), c1,s1, temp ) b( kwbot, kwbot ) = temp b( kwbot, kwbot-1 ) = zero call stdlib${ii}$_srot( kwbot-istartm, b( istartm, kwbot ), 1_${ik}$,b( istartm, kwbot-1 ),& 1_${ik}$, c1, s1 ) call stdlib${ii}$_srot( kwbot-istartm+1, a( istartm, kwbot ), 1_${ik}$,a( istartm, kwbot-1 & ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_srot( jw, zc( 1_${ik}$, kwbot-kwtop+1 ), 1_${ik}$, zc( 1_${ik}$,kwbot-1-kwtop+1 ), 1_${ik}$, & c1, s1 ) k = k-1 end if end do end if ! apply qc and zc to rest of the matrix if ( ilschur ) then istartm = 1_${ik}$ istopm = n else istartm = ilo istopm = ihi end if if ( istopm-ihi > 0_${ik}$ ) then call stdlib${ii}$_sgemm( 'T', 'N', jw, istopm-ihi, jw, one, qc, ldqc,a( kwtop, ihi+1 ), & lda, zero, work, jw ) call stdlib${ii}$_slacpy( 'ALL', jw, istopm-ihi, work, jw, a( kwtop,ihi+1 ), lda ) call stdlib${ii}$_sgemm( 'T', 'N', jw, istopm-ihi, jw, one, qc, ldqc,b( kwtop, ihi+1 ), & ldb, zero, work, jw ) call stdlib${ii}$_slacpy( 'ALL', jw, istopm-ihi, work, jw, b( kwtop,ihi+1 ), ldb ) end if if ( ilq ) then call stdlib${ii}$_sgemm( 'N', 'N', n, jw, jw, one, q( 1_${ik}$, kwtop ), ldq, qc,ldqc, zero, & work, n ) call stdlib${ii}$_slacpy( 'ALL', n, jw, work, n, q( 1_${ik}$, kwtop ), ldq ) end if if ( kwtop-1-istartm+1 > 0_${ik}$ ) then call stdlib${ii}$_sgemm( 'N', 'N', kwtop-istartm, jw, jw, one, a( istartm,kwtop ), lda, & zc, ldzc, zero, work,kwtop-istartm ) call stdlib${ii}$_slacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,a( istartm, kwtop & ), lda ) call stdlib${ii}$_sgemm( 'N', 'N', kwtop-istartm, jw, jw, one, b( istartm,kwtop ), ldb, & zc, ldzc, zero, work,kwtop-istartm ) call stdlib${ii}$_slacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,b( istartm, kwtop & ), ldb ) end if if ( ilz ) then call stdlib${ii}$_sgemm( 'N', 'N', n, jw, jw, one, z( 1_${ik}$, kwtop ), ldz, zc,ldzc, zero, & work, n ) call stdlib${ii}$_slacpy( 'ALL', n, jw, work, n, z( 1_${ik}$, kwtop ), ldz ) end if end subroutine stdlib${ii}$_slaqz3 recursive module subroutine stdlib${ii}$_dlaqz3( ilschur, ilq, ilz, n, ilo, ihi, nw,a, lda, b, ldb, q, & !! DLAQZ3 performs AED ldq, z, ldz, ns,nd, alphar, alphai, beta, qc, ldqc,zc, ldzc, work, lwork, rec, info ) use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments logical(lk), intent( in ) :: ilschur, ilq, ilz integer(${ik}$), intent( in ) :: n, ilo, ihi, nw, lda, ldb, ldq, ldz,ldqc, ldzc, lwork, & rec real(dp), intent( inout ) :: a( lda, * ), b( ldb, * ),q( ldq, * ), z( ldz, * ), alphar(& * ),alphai( * ), beta( * ) integer(${ik}$), intent( out ) :: ns, nd, info real(dp), intent(inout) :: qc(ldqc,*), zc(ldzc,*) real(dp), intent(out) :: work(*) ! ================================================================ ! local scalars logical(lk) :: bulge integer(${ik}$) :: jw, kwtop, kwbot, istopm, istartm, k, k2, dtgexc_info, ifst, ilst, & lworkreq, qz_small_info real(dp) :: s, smlnum, ulp, safmin, safmax, c1, s1, temp info = 0_${ik}$ ! set up deflation window jw = min( nw, ihi-ilo+1 ) kwtop = ihi-jw+1 if ( kwtop == ilo ) then s = zero else s = a( kwtop, kwtop-1 ) end if ! determine required workspace ifst = 1_${ik}$ ilst = jw call stdlib${ii}$_dtgexc( .true., .true., jw, a, lda, b, ldb, qc, ldqc, zc,ldzc, ifst, ilst, & work, -1_${ik}$, dtgexc_info ) lworkreq = int( work( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_dlaqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& ldb, alphar, alphai, beta, qc,ldqc, zc, ldzc, work, -1_${ik}$, rec+1, qz_small_info ) lworkreq = max( lworkreq, int( work( 1_${ik}$ ),KIND=${ik}$)+2_${ik}$*jw**2_${ik}$ ) lworkreq = max( lworkreq, n*nw, 2_${ik}$*nw**2_${ik}$+n ) if ( lwork ==-1_${ik}$ ) then ! workspace query, quick return work( 1_${ik}$ ) = lworkreq return else if ( lwork < lworkreq ) then info = -26_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLAQZ3', -info ) return end if ! get machine constants safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' ) safmax = one/safmin call stdlib${ii}$_dlabad( safmin, safmax ) ulp = stdlib${ii}$_dlamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=dp)/ulp ) if ( ihi == kwtop ) then ! 1 by 1 deflation window, just try a regular deflation alphar( kwtop ) = a( kwtop, kwtop ) alphai( kwtop ) = zero beta( kwtop ) = b( kwtop, kwtop ) ns = 1_${ik}$ nd = 0_${ik}$ if ( abs( s ) <= max( smlnum, ulp*abs( a( kwtop,kwtop ) ) ) ) then ns = 0_${ik}$ nd = 1_${ik}$ if ( kwtop > ilo ) then a( kwtop, kwtop-1 ) = zero end if end if end if ! store window in case of convergence failure call stdlib${ii}$_dlacpy( 'ALL', jw, jw, a( kwtop, kwtop ), lda, work, jw ) call stdlib${ii}$_dlacpy( 'ALL', jw, jw, b( kwtop, kwtop ), ldb, work( jw**2_${ik}$+1 ), jw ) ! transform window to real schur form call stdlib${ii}$_dlaset( 'FULL', jw, jw, zero, one, qc, ldqc ) call stdlib${ii}$_dlaset( 'FULL', jw, jw, zero, one, zc, ldzc ) call stdlib${ii}$_dlaqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& ldb, alphar, alphai, beta, qc,ldqc, zc, ldzc, work( 2_${ik}$*jw**2_${ik}$+1 ), lwork-2*jw**2_${ik}$,rec+1, & qz_small_info ) if( qz_small_info /= 0_${ik}$ ) then ! convergence failure, restore the window and exit nd = 0_${ik}$ ns = jw-qz_small_info call stdlib${ii}$_dlacpy( 'ALL', jw, jw, work, jw, a( kwtop, kwtop ), lda ) call stdlib${ii}$_dlacpy( 'ALL', jw, jw, work( jw**2_${ik}$+1 ), jw, b( kwtop,kwtop ), ldb ) return end if ! deflation detection loop if ( kwtop == ilo .or. s == zero ) then kwbot = kwtop-1 else kwbot = ihi k = 1_${ik}$ k2 = 1_${ik}$ do while ( k <= jw ) bulge = .false. if ( kwbot-kwtop+1 >= 2_${ik}$ ) then bulge = a( kwbot, kwbot-1 ) /= zero end if if ( bulge ) then ! try to deflate complex conjugate eigenvalue pair temp = abs( a( kwbot, kwbot ) )+sqrt( abs( a( kwbot,kwbot-1 ) ) )*sqrt( abs( & a( kwbot-1, kwbot ) ) ) if( temp == zero )then temp = abs( s ) end if if ( max( abs( s*qc( 1_${ik}$, kwbot-kwtop ) ), abs( s*qc( 1_${ik}$,kwbot-kwtop+1 ) ) ) <= & max( smlnum,ulp*temp ) ) then ! deflatable kwbot = kwbot-2 else ! not deflatable, move out of the way ifst = kwbot-kwtop+1 ilst = k2 call stdlib${ii}$_dtgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, & kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, work, lwork,dtgexc_info ) k2 = k2+2 end if k = k+2 else ! try to deflate real eigenvalue temp = abs( a( kwbot, kwbot ) ) if( temp == zero ) then temp = abs( s ) end if if ( ( abs( s*qc( 1_${ik}$, kwbot-kwtop+1 ) ) ) <= max( ulp*temp, smlnum ) ) & then ! deflatable kwbot = kwbot-1 else ! not deflatable, move out of the way ifst = kwbot-kwtop+1 ilst = k2 call stdlib${ii}$_dtgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, & kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, work, lwork,dtgexc_info ) k2 = k2+1 end if k = k+1 end if end do end if ! store eigenvalues nd = ihi-kwbot ns = jw-nd k = kwtop do while ( k <= ihi ) bulge = .false. if ( k < ihi ) then if ( a( k+1, k ) /= zero ) then bulge = .true. end if end if if ( bulge ) then ! 2x2 eigenvalue block call stdlib${ii}$_dlag2( a( k, k ), lda, b( k, k ), ldb, safmin,beta( k ), beta( k+1 ),& alphar( k ),alphar( k+1 ), alphai( k ) ) alphai( k+1 ) = -alphai( k ) k = k+2 else ! 1x1 eigenvalue block alphar( k ) = a( k, k ) alphai( k ) = zero beta( k ) = b( k, k ) k = k+1 end if end do if ( kwtop /= ilo .and. s /= zero ) then ! reflect spike back, this will create optimally packed bulges a( kwtop:kwbot, kwtop-1 ) = a( kwtop, kwtop-1 )*qc( 1_${ik}$,1_${ik}$:jw-nd ) do k = kwbot-1, kwtop, -1 call stdlib${ii}$_dlartg( a( k, kwtop-1 ), a( k+1, kwtop-1 ), c1, s1,temp ) a( k, kwtop-1 ) = temp a( k+1, kwtop-1 ) = zero k2 = max( kwtop, k-1 ) call stdlib${ii}$_drot( ihi-k2+1, a( k, k2 ), lda, a( k+1, k2 ), lda, c1,s1 ) call stdlib${ii}$_drot( ihi-( k-1 )+1_${ik}$, b( k, k-1 ), ldb, b( k+1, k-1 ),ldb, c1, s1 ) call stdlib${ii}$_drot( jw, qc( 1_${ik}$, k-kwtop+1 ), 1_${ik}$, qc( 1_${ik}$, k+1-kwtop+1 ),1_${ik}$, c1, s1 ) end do ! chase bulges down istartm = kwtop istopm = ihi k = kwbot-1 do while ( k >= kwtop ) if ( ( k >= kwtop+1 ) .and. a( k+1, k-1 ) /= zero ) then ! move double pole block down and remove it do k2 = k-1, kwbot-2 call stdlib${ii}$_dlaqz2( .true., .true., k2, kwtop, kwtop+jw-1,kwbot, a, lda, b,& ldb, jw, kwtop, qc,ldqc, jw, kwtop, zc, ldzc ) end do k = k-2 else ! k points to single shift do k2 = k, kwbot-2 ! move shift down call stdlib${ii}$_dlartg( b( k2+1, k2+1 ), b( k2+1, k2 ), c1, s1,temp ) b( k2+1, k2+1 ) = temp b( k2+1, k2 ) = zero call stdlib${ii}$_drot( k2+2-istartm+1, a( istartm, k2+1 ), 1_${ik}$,a( istartm, k2 ), & 1_${ik}$, c1, s1 ) call stdlib${ii}$_drot( k2-istartm+1, b( istartm, k2+1 ), 1_${ik}$,b( istartm, k2 ), 1_${ik}$, & c1, s1 ) call stdlib${ii}$_drot( jw, zc( 1_${ik}$, k2+1-kwtop+1 ), 1_${ik}$, zc( 1_${ik}$,k2-kwtop+1 ), 1_${ik}$, c1, & s1 ) call stdlib${ii}$_dlartg( a( k2+1, k2 ), a( k2+2, k2 ), c1, s1,temp ) a( k2+1, k2 ) = temp a( k2+2, k2 ) = zero call stdlib${ii}$_drot( istopm-k2, a( k2+1, k2+1 ), lda, a( k2+2,k2+1 ), lda, c1,& s1 ) call stdlib${ii}$_drot( istopm-k2, b( k2+1, k2+1 ), ldb, b( k2+2,k2+1 ), ldb, c1,& s1 ) call stdlib${ii}$_drot( jw, qc( 1_${ik}$, k2+1-kwtop+1 ), 1_${ik}$, qc( 1_${ik}$,k2+2-kwtop+1 ), 1_${ik}$, & c1, s1 ) end do ! remove the shift call stdlib${ii}$_dlartg( b( kwbot, kwbot ), b( kwbot, kwbot-1 ), c1,s1, temp ) b( kwbot, kwbot ) = temp b( kwbot, kwbot-1 ) = zero call stdlib${ii}$_drot( kwbot-istartm, b( istartm, kwbot ), 1_${ik}$,b( istartm, kwbot-1 ),& 1_${ik}$, c1, s1 ) call stdlib${ii}$_drot( kwbot-istartm+1, a( istartm, kwbot ), 1_${ik}$,a( istartm, kwbot-1 & ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_drot( jw, zc( 1_${ik}$, kwbot-kwtop+1 ), 1_${ik}$, zc( 1_${ik}$,kwbot-1-kwtop+1 ), 1_${ik}$, & c1, s1 ) k = k-1 end if end do end if ! apply qc and zc to rest of the matrix if ( ilschur ) then istartm = 1_${ik}$ istopm = n else istartm = ilo istopm = ihi end if if ( istopm-ihi > 0_${ik}$ ) then call stdlib${ii}$_dgemm( 'T', 'N', jw, istopm-ihi, jw, one, qc, ldqc,a( kwtop, ihi+1 ), & lda, zero, work, jw ) call stdlib${ii}$_dlacpy( 'ALL', jw, istopm-ihi, work, jw, a( kwtop,ihi+1 ), lda ) call stdlib${ii}$_dgemm( 'T', 'N', jw, istopm-ihi, jw, one, qc, ldqc,b( kwtop, ihi+1 ), & ldb, zero, work, jw ) call stdlib${ii}$_dlacpy( 'ALL', jw, istopm-ihi, work, jw, b( kwtop,ihi+1 ), ldb ) end if if ( ilq ) then call stdlib${ii}$_dgemm( 'N', 'N', n, jw, jw, one, q( 1_${ik}$, kwtop ), ldq, qc,ldqc, zero, & work, n ) call stdlib${ii}$_dlacpy( 'ALL', n, jw, work, n, q( 1_${ik}$, kwtop ), ldq ) end if if ( kwtop-1-istartm+1 > 0_${ik}$ ) then call stdlib${ii}$_dgemm( 'N', 'N', kwtop-istartm, jw, jw, one, a( istartm,kwtop ), lda, & zc, ldzc, zero, work,kwtop-istartm ) call stdlib${ii}$_dlacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,a( istartm, kwtop & ), lda ) call stdlib${ii}$_dgemm( 'N', 'N', kwtop-istartm, jw, jw, one, b( istartm,kwtop ), ldb, & zc, ldzc, zero, work,kwtop-istartm ) call stdlib${ii}$_dlacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,b( istartm, kwtop & ), ldb ) end if if ( ilz ) then call stdlib${ii}$_dgemm( 'N', 'N', n, jw, jw, one, z( 1_${ik}$, kwtop ), ldz, zc,ldzc, zero, & work, n ) call stdlib${ii}$_dlacpy( 'ALL', n, jw, work, n, z( 1_${ik}$, kwtop ), ldz ) end if end subroutine stdlib${ii}$_dlaqz3 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] recursive module subroutine stdlib${ii}$_${ri}$laqz3( ilschur, ilq, ilz, n, ilo, ihi, nw,a, lda, b, ldb, q, & !! DLAQZ3: performs AED ldq, z, ldz, ns,nd, alphar, alphai, beta, qc, ldqc,zc, ldzc, work, lwork, rec, info ) use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! arguments logical(lk), intent( in ) :: ilschur, ilq, ilz integer(${ik}$), intent( in ) :: n, ilo, ihi, nw, lda, ldb, ldq, ldz,ldqc, ldzc, lwork, & rec real(${rk}$), intent( inout ) :: a( lda, * ), b( ldb, * ),q( ldq, * ), z( ldz, * ), alphar(& * ),alphai( * ), beta( * ) integer(${ik}$), intent( out ) :: ns, nd, info real(${rk}$), intent(inout) :: qc(ldqc,*), zc(ldzc,*) real(${rk}$), intent(out) :: work(*) ! ================================================================ ! local scalars logical(lk) :: bulge integer(${ik}$) :: jw, kwtop, kwbot, istopm, istartm, k, k2, dtgexc_info, ifst, ilst, & lworkreq, qz_small_info real(${rk}$) :: s, smlnum, ulp, safmin, safmax, c1, s1, temp info = 0_${ik}$ ! set up deflation window jw = min( nw, ihi-ilo+1 ) kwtop = ihi-jw+1 if ( kwtop == ilo ) then s = zero else s = a( kwtop, kwtop-1 ) end if ! determine required workspace ifst = 1_${ik}$ ilst = jw call stdlib${ii}$_${ri}$tgexc( .true., .true., jw, a, lda, b, ldb, qc, ldqc, zc,ldzc, ifst, ilst, & work, -1_${ik}$, dtgexc_info ) lworkreq = int( work( 1_${ik}$ ),KIND=${ik}$) call stdlib${ii}$_${ri}$laqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& ldb, alphar, alphai, beta, qc,ldqc, zc, ldzc, work, -1_${ik}$, rec+1, qz_small_info ) lworkreq = max( lworkreq, int( work( 1_${ik}$ ),KIND=${ik}$)+2_${ik}$*jw**2_${ik}$ ) lworkreq = max( lworkreq, n*nw, 2_${ik}$*nw**2_${ik}$+n ) if ( lwork ==-1_${ik}$ ) then ! workspace query, quick return work( 1_${ik}$ ) = lworkreq return else if ( lwork < lworkreq ) then info = -26_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLAQZ3', -info ) return end if ! get machine constants safmin = stdlib${ii}$_${ri}$lamch( 'SAFE MINIMUM' ) safmax = one/safmin call stdlib${ii}$_${ri}$labad( safmin, safmax ) ulp = stdlib${ii}$_${ri}$lamch( 'PRECISION' ) smlnum = safmin*( real( n,KIND=${rk}$)/ulp ) if ( ihi == kwtop ) then ! 1 by 1 deflation window, just try a regular deflation alphar( kwtop ) = a( kwtop, kwtop ) alphai( kwtop ) = zero beta( kwtop ) = b( kwtop, kwtop ) ns = 1_${ik}$ nd = 0_${ik}$ if ( abs( s ) <= max( smlnum, ulp*abs( a( kwtop,kwtop ) ) ) ) then ns = 0_${ik}$ nd = 1_${ik}$ if ( kwtop > ilo ) then a( kwtop, kwtop-1 ) = zero end if end if end if ! store window in case of convergence failure call stdlib${ii}$_${ri}$lacpy( 'ALL', jw, jw, a( kwtop, kwtop ), lda, work, jw ) call stdlib${ii}$_${ri}$lacpy( 'ALL', jw, jw, b( kwtop, kwtop ), ldb, work( jw**2_${ik}$+1 ), jw ) ! transform window to real schur form call stdlib${ii}$_${ri}$laset( 'FULL', jw, jw, zero, one, qc, ldqc ) call stdlib${ii}$_${ri}$laset( 'FULL', jw, jw, zero, one, zc, ldzc ) call stdlib${ii}$_${ri}$laqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),& ldb, alphar, alphai, beta, qc,ldqc, zc, ldzc, work( 2_${ik}$*jw**2_${ik}$+1 ), lwork-2*jw**2_${ik}$,rec+1, & qz_small_info ) if( qz_small_info /= 0_${ik}$ ) then ! convergence failure, restore the window and exit nd = 0_${ik}$ ns = jw-qz_small_info call stdlib${ii}$_${ri}$lacpy( 'ALL', jw, jw, work, jw, a( kwtop, kwtop ), lda ) call stdlib${ii}$_${ri}$lacpy( 'ALL', jw, jw, work( jw**2_${ik}$+1 ), jw, b( kwtop,kwtop ), ldb ) return end if ! deflation detection loop if ( kwtop == ilo .or. s == zero ) then kwbot = kwtop-1 else kwbot = ihi k = 1_${ik}$ k2 = 1_${ik}$ do while ( k <= jw ) bulge = .false. if ( kwbot-kwtop+1 >= 2_${ik}$ ) then bulge = a( kwbot, kwbot-1 ) /= zero end if if ( bulge ) then ! try to deflate complex conjugate eigenvalue pair temp = abs( a( kwbot, kwbot ) )+sqrt( abs( a( kwbot,kwbot-1 ) ) )*sqrt( abs( & a( kwbot-1, kwbot ) ) ) if( temp == zero )then temp = abs( s ) end if if ( max( abs( s*qc( 1_${ik}$, kwbot-kwtop ) ), abs( s*qc( 1_${ik}$,kwbot-kwtop+1 ) ) ) <= & max( smlnum,ulp*temp ) ) then ! deflatable kwbot = kwbot-2 else ! not deflatable, move out of the way ifst = kwbot-kwtop+1 ilst = k2 call stdlib${ii}$_${ri}$tgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, & kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, work, lwork,dtgexc_info ) k2 = k2+2 end if k = k+2 else ! try to deflate real eigenvalue temp = abs( a( kwbot, kwbot ) ) if( temp == zero ) then temp = abs( s ) end if if ( ( abs( s*qc( 1_${ik}$, kwbot-kwtop+1 ) ) ) <= max( ulp*temp, smlnum ) ) & then ! deflatable kwbot = kwbot-1 else ! not deflatable, move out of the way ifst = kwbot-kwtop+1 ilst = k2 call stdlib${ii}$_${ri}$tgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, & kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, work, lwork,dtgexc_info ) k2 = k2+1 end if k = k+1 end if end do end if ! store eigenvalues nd = ihi-kwbot ns = jw-nd k = kwtop do while ( k <= ihi ) bulge = .false. if ( k < ihi ) then if ( a( k+1, k ) /= zero ) then bulge = .true. end if end if if ( bulge ) then ! 2x2 eigenvalue block call stdlib${ii}$_${ri}$lag2( a( k, k ), lda, b( k, k ), ldb, safmin,beta( k ), beta( k+1 ),& alphar( k ),alphar( k+1 ), alphai( k ) ) alphai( k+1 ) = -alphai( k ) k = k+2 else ! 1x1 eigenvalue block alphar( k ) = a( k, k ) alphai( k ) = zero beta( k ) = b( k, k ) k = k+1 end if end do if ( kwtop /= ilo .and. s /= zero ) then ! reflect spike back, this will create optimally packed bulges a( kwtop:kwbot, kwtop-1 ) = a( kwtop, kwtop-1 )*qc( 1_${ik}$,1_${ik}$:jw-nd ) do k = kwbot-1, kwtop, -1 call stdlib${ii}$_${ri}$lartg( a( k, kwtop-1 ), a( k+1, kwtop-1 ), c1, s1,temp ) a( k, kwtop-1 ) = temp a( k+1, kwtop-1 ) = zero k2 = max( kwtop, k-1 ) call stdlib${ii}$_${ri}$rot( ihi-k2+1, a( k, k2 ), lda, a( k+1, k2 ), lda, c1,s1 ) call stdlib${ii}$_${ri}$rot( ihi-( k-1 )+1_${ik}$, b( k, k-1 ), ldb, b( k+1, k-1 ),ldb, c1, s1 ) call stdlib${ii}$_${ri}$rot( jw, qc( 1_${ik}$, k-kwtop+1 ), 1_${ik}$, qc( 1_${ik}$, k+1-kwtop+1 ),1_${ik}$, c1, s1 ) end do ! chase bulges down istartm = kwtop istopm = ihi k = kwbot-1 do while ( k >= kwtop ) if ( ( k >= kwtop+1 ) .and. a( k+1, k-1 ) /= zero ) then ! move double pole block down and remove it do k2 = k-1, kwbot-2 call stdlib${ii}$_${ri}$laqz2( .true., .true., k2, kwtop, kwtop+jw-1,kwbot, a, lda, b,& ldb, jw, kwtop, qc,ldqc, jw, kwtop, zc, ldzc ) end do k = k-2 else ! k points to single shift do k2 = k, kwbot-2 ! move shift down call stdlib${ii}$_${ri}$lartg( b( k2+1, k2+1 ), b( k2+1, k2 ), c1, s1,temp ) b( k2+1, k2+1 ) = temp b( k2+1, k2 ) = zero call stdlib${ii}$_${ri}$rot( k2+2-istartm+1, a( istartm, k2+1 ), 1_${ik}$,a( istartm, k2 ), & 1_${ik}$, c1, s1 ) call stdlib${ii}$_${ri}$rot( k2-istartm+1, b( istartm, k2+1 ), 1_${ik}$,b( istartm, k2 ), 1_${ik}$, & c1, s1 ) call stdlib${ii}$_${ri}$rot( jw, zc( 1_${ik}$, k2+1-kwtop+1 ), 1_${ik}$, zc( 1_${ik}$,k2-kwtop+1 ), 1_${ik}$, c1, & s1 ) call stdlib${ii}$_${ri}$lartg( a( k2+1, k2 ), a( k2+2, k2 ), c1, s1,temp ) a( k2+1, k2 ) = temp a( k2+2, k2 ) = zero call stdlib${ii}$_${ri}$rot( istopm-k2, a( k2+1, k2+1 ), lda, a( k2+2,k2+1 ), lda, c1,& s1 ) call stdlib${ii}$_${ri}$rot( istopm-k2, b( k2+1, k2+1 ), ldb, b( k2+2,k2+1 ), ldb, c1,& s1 ) call stdlib${ii}$_${ri}$rot( jw, qc( 1_${ik}$, k2+1-kwtop+1 ), 1_${ik}$, qc( 1_${ik}$,k2+2-kwtop+1 ), 1_${ik}$, & c1, s1 ) end do ! remove the shift call stdlib${ii}$_${ri}$lartg( b( kwbot, kwbot ), b( kwbot, kwbot-1 ), c1,s1, temp ) b( kwbot, kwbot ) = temp b( kwbot, kwbot-1 ) = zero call stdlib${ii}$_${ri}$rot( kwbot-istartm, b( istartm, kwbot ), 1_${ik}$,b( istartm, kwbot-1 ),& 1_${ik}$, c1, s1 ) call stdlib${ii}$_${ri}$rot( kwbot-istartm+1, a( istartm, kwbot ), 1_${ik}$,a( istartm, kwbot-1 & ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_${ri}$rot( jw, zc( 1_${ik}$, kwbot-kwtop+1 ), 1_${ik}$, zc( 1_${ik}$,kwbot-1-kwtop+1 ), 1_${ik}$, & c1, s1 ) k = k-1 end if end do end if ! apply qc and zc to rest of the matrix if ( ilschur ) then istartm = 1_${ik}$ istopm = n else istartm = ilo istopm = ihi end if if ( istopm-ihi > 0_${ik}$ ) then call stdlib${ii}$_${ri}$gemm( 'T', 'N', jw, istopm-ihi, jw, one, qc, ldqc,a( kwtop, ihi+1 ), & lda, zero, work, jw ) call stdlib${ii}$_${ri}$lacpy( 'ALL', jw, istopm-ihi, work, jw, a( kwtop,ihi+1 ), lda ) call stdlib${ii}$_${ri}$gemm( 'T', 'N', jw, istopm-ihi, jw, one, qc, ldqc,b( kwtop, ihi+1 ), & ldb, zero, work, jw ) call stdlib${ii}$_${ri}$lacpy( 'ALL', jw, istopm-ihi, work, jw, b( kwtop,ihi+1 ), ldb ) end if if ( ilq ) then call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, jw, jw, one, q( 1_${ik}$, kwtop ), ldq, qc,ldqc, zero, & work, n ) call stdlib${ii}$_${ri}$lacpy( 'ALL', n, jw, work, n, q( 1_${ik}$, kwtop ), ldq ) end if if ( kwtop-1-istartm+1 > 0_${ik}$ ) then call stdlib${ii}$_${ri}$gemm( 'N', 'N', kwtop-istartm, jw, jw, one, a( istartm,kwtop ), lda, & zc, ldzc, zero, work,kwtop-istartm ) call stdlib${ii}$_${ri}$lacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,a( istartm, kwtop & ), lda ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', kwtop-istartm, jw, jw, one, b( istartm,kwtop ), ldb, & zc, ldzc, zero, work,kwtop-istartm ) call stdlib${ii}$_${ri}$lacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,b( istartm, kwtop & ), ldb ) end if if ( ilz ) then call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, jw, jw, one, z( 1_${ik}$, kwtop ), ldz, zc,ldzc, zero, & work, n ) call stdlib${ii}$_${ri}$lacpy( 'ALL', n, jw, work, n, z( 1_${ik}$, kwtop ), ldz ) end if end subroutine stdlib${ii}$_${ri}$laqz3 #:endif #:endfor pure module subroutine stdlib${ii}$_claqz3( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_desired, alpha,& !! CLAQZ3 Executes a single multishift QZ sweep beta, a, lda, b, ldb,q, ldq, z, ldz, qc, ldqc, zc, ldzc, work,lwork, info ) use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! function arguments logical(lk), intent( in ) :: ilschur, ilq, ilz integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,nshifts, & nblock_desired, ldqc, ldzc complex(sp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq, * ),z( ldz, * ), qc( & ldqc, * ), zc( ldzc, * ), work( * ),alpha( * ), beta( * ) integer(${ik}$), intent( out ) :: info ! ================================================================ ! local scalars integer(${ik}$) :: i, j, ns, istartm, istopm, sheight, swidth, k, np, istartb, istopb, & ishift, nblock, npos real(sp) :: safmin, safmax, c, scale complex(sp) :: temp, temp2, temp3, s info = 0_${ik}$ if ( nblock_desired < nshifts+1 ) then info = -8_${ik}$ end if if ( lwork ==-1_${ik}$ ) then ! workspace query, quick return work( 1_${ik}$ ) = n*nblock_desired return else if ( lwork < n*nblock_desired ) then info = -25_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CLAQZ3', -info ) return end if ! executable statements ! get machine constants safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' ) safmax = one/safmin call stdlib${ii}$_slabad( safmin, safmax ) if ( ilo >= ihi ) then return end if if ( ilschur ) then istartm = 1_${ik}$ istopm = n else istartm = ilo istopm = ihi end if ns = nshifts npos = max( nblock_desired-ns, 1_${ik}$ ) ! the following block introduces the shifts and chases ! them down one by one just enough to make space for ! the other shifts. the near-the-diagonal block is ! of size (ns+1) x ns. call stdlib${ii}$_claset( 'FULL', ns+1, ns+1, czero, cone, qc, ldqc ) call stdlib${ii}$_claset( 'FULL', ns, ns, czero, cone, zc, ldzc ) do i = 1, ns ! introduce the shift scale = sqrt( abs( alpha( i ) ) ) * sqrt( abs( beta( i ) ) ) if( scale >= safmin .and. scale <= safmax ) then alpha( i ) = alpha( i )/scale beta( i ) = beta( i )/scale end if temp2 = beta( i )*a( ilo, ilo )-alpha( i )*b( ilo, ilo ) temp3 = beta( i )*a( ilo+1, ilo ) if ( abs( temp2 ) > safmax .or.abs( temp3 ) > safmax ) then temp2 = cone temp3 = czero end if call stdlib${ii}$_clartg( temp2, temp3, c, s, temp ) call stdlib${ii}$_crot( ns, a( ilo, ilo ), lda, a( ilo+1, ilo ), lda, c,s ) call stdlib${ii}$_crot( ns, b( ilo, ilo ), ldb, b( ilo+1, ilo ), ldb, c,s ) call stdlib${ii}$_crot( ns+1, qc( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, qc( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c, conjg( s ) ) ! chase the shift down do j = 1, ns-i call stdlib${ii}$_claqz1( .true., .true., j, 1_${ik}$, ns, ihi-ilo+1, a( ilo,ilo ), lda, b( & ilo, ilo ), ldb, ns+1, 1_${ik}$, qc,ldqc, ns, 1_${ik}$, zc, ldzc ) end do end do ! update the rest of the pencil ! update a(ilo:ilo+ns,ilo+ns:istopm) and b(ilo:ilo+ns,ilo+ns:istopm) ! from the left with qc(1:ns+1,1:ns+1)' sheight = ns+1 swidth = istopm-( ilo+ns )+1_${ik}$ if ( swidth > 0_${ik}$ ) then call stdlib${ii}$_cgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,a( ilo, ilo+& ns ), lda, czero, work, sheight ) call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight, a( ilo,ilo+ns ), lda ) call stdlib${ii}$_cgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,b( ilo, ilo+& ns ), ldb, czero, work, sheight ) call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight, b( ilo,ilo+ns ), ldb ) end if if ( ilq ) then call stdlib${ii}$_cgemm( 'N', 'N', n, sheight, sheight, cone, q( 1_${ik}$, ilo ),ldq, qc, ldqc, & czero, work, n ) call stdlib${ii}$_clacpy( 'ALL', n, sheight, work, n, q( 1_${ik}$, ilo ), ldq ) end if ! update a(istartm:ilo-1,ilo:ilo+ns-1) and b(istartm:ilo-1,ilo:ilo+ns-1) ! from the right with zc(1:ns,1:ns) sheight = ilo-1-istartm+1 swidth = ns if ( sheight > 0_${ik}$ ) then call stdlib${ii}$_cgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, ilo ), lda, & zc, ldzc, czero, work,sheight ) call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ilo ), lda ) call stdlib${ii}$_cgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, ilo ), ldb, & zc, ldzc, czero, work,sheight ) call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ilo ), ldb ) end if if ( ilz ) then call stdlib${ii}$_cgemm( 'N', 'N', n, swidth, swidth, cone, z( 1_${ik}$, ilo ),ldz, zc, ldzc, & czero, work, n ) call stdlib${ii}$_clacpy( 'ALL', n, swidth, work, n, z( 1_${ik}$, ilo ), ldz ) end if ! the following block chases the shifts down to the bottom ! right block. if possible, a shift is moved down npos ! positions at a time k = ilo do while ( k < ihi-ns ) np = min( ihi-ns-k, npos ) ! size of the near-the-diagonal block nblock = ns+np ! istartb points to the first row we will be updating istartb = k+1 ! istopb points to the last column we will be updating istopb = k+nblock-1 call stdlib${ii}$_claset( 'FULL', ns+np, ns+np, czero, cone, qc, ldqc ) call stdlib${ii}$_claset( 'FULL', ns+np, ns+np, czero, cone, zc, ldzc ) ! near the diagonal shift chase do i = ns-1, 0, -1 do j = 0, np-1 ! move down the block with index k+i+j, updating ! the (ns+np x ns+np) block: ! (k:k+ns+np,k:k+ns+np-1) call stdlib${ii}$_claqz1( .true., .true., k+i+j, istartb, istopb, ihi,a, lda, b, & ldb, nblock, k+1, qc, ldqc,nblock, k, zc, ldzc ) end do end do ! update rest of the pencil ! update a(k+1:k+ns+np, k+ns+np:istopm) and ! b(k+1:k+ns+np, k+ns+np:istopm) ! from the left with qc(1:ns+np,1:ns+np)' sheight = ns+np swidth = istopm-( k+ns+np )+1_${ik}$ if ( swidth > 0_${ik}$ ) then call stdlib${ii}$_cgemm( 'C', 'N', sheight, swidth, sheight, cone, qc,ldqc, a( k+1, k+& ns+np ), lda, czero, work,sheight ) call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight, a( k+1,k+ns+np ), lda & ) call stdlib${ii}$_cgemm( 'C', 'N', sheight, swidth, sheight, cone, qc,ldqc, b( k+1, k+& ns+np ), ldb, czero, work,sheight ) call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight, b( k+1,k+ns+np ), ldb & ) end if if ( ilq ) then call stdlib${ii}$_cgemm( 'N', 'N', n, nblock, nblock, cone, q( 1_${ik}$, k+1 ),ldq, qc, ldqc, & czero, work, n ) call stdlib${ii}$_clacpy( 'ALL', n, nblock, work, n, q( 1_${ik}$, k+1 ), ldq ) end if ! update a(istartm:k,k:k+ns+npos-1) and b(istartm:k,k:k+ns+npos-1) ! from the right with zc(1:ns+np,1:ns+np) sheight = k-istartm+1 swidth = nblock if ( sheight > 0_${ik}$ ) then call stdlib${ii}$_cgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, k ), lda, & zc, ldzc, czero, work,sheight ) call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight,a( istartm, k ), lda ) call stdlib${ii}$_cgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, k ), ldb, & zc, ldzc, czero, work,sheight ) call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight,b( istartm, k ), ldb ) end if if ( ilz ) then call stdlib${ii}$_cgemm( 'N', 'N', n, nblock, nblock, cone, z( 1_${ik}$, k ),ldz, zc, ldzc, & czero, work, n ) call stdlib${ii}$_clacpy( 'ALL', n, nblock, work, n, z( 1_${ik}$, k ), ldz ) end if k = k+np end do ! the following block removes the shifts from the bottom right corner ! one by one. updates are initially applied to a(ihi-ns+1:ihi,ihi-ns:ihi). call stdlib${ii}$_claset( 'FULL', ns, ns, czero, cone, qc, ldqc ) call stdlib${ii}$_claset( 'FULL', ns+1, ns+1, czero, cone, zc, ldzc ) ! istartb points to the first row we will be updating istartb = ihi-ns+1 ! istopb points to the last column we will be updating istopb = ihi do i = 1, ns ! chase the shift down to the bottom right corner do ishift = ihi-i, ihi-1 call stdlib${ii}$_claqz1( .true., .true., ishift, istartb, istopb, ihi,a, lda, b, ldb, & ns, ihi-ns+1, qc, ldqc, ns+1,ihi-ns, zc, ldzc ) end do end do ! update rest of the pencil ! update a(ihi-ns+1:ihi, ihi+1:istopm) ! from the left with qc(1:ns,1:ns)' sheight = ns swidth = istopm-( ihi+1 )+1_${ik}$ if ( swidth > 0_${ik}$ ) then call stdlib${ii}$_cgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,a( ihi-ns+1, & ihi+1 ), lda, czero, work, sheight ) call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight,a( ihi-ns+1, ihi+1 ), lda & ) call stdlib${ii}$_cgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,b( ihi-ns+1, & ihi+1 ), ldb, czero, work, sheight ) call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight,b( ihi-ns+1, ihi+1 ), ldb & ) end if if ( ilq ) then call stdlib${ii}$_cgemm( 'N', 'N', n, ns, ns, cone, q( 1_${ik}$, ihi-ns+1 ), ldq,qc, ldqc, czero,& work, n ) call stdlib${ii}$_clacpy( 'ALL', n, ns, work, n, q( 1_${ik}$, ihi-ns+1 ), ldq ) end if ! update a(istartm:ihi-ns,ihi-ns:ihi) ! from the right with zc(1:ns+1,1:ns+1) sheight = ihi-ns-istartm+1 swidth = ns+1 if ( sheight > 0_${ik}$ ) then call stdlib${ii}$_cgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, ihi-ns ), & lda, zc, ldzc, czero, work,sheight ) call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ihi-ns ), lda & ) call stdlib${ii}$_cgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, ihi-ns ), & ldb, zc, ldzc, czero, work,sheight ) call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ihi-ns ), ldb & ) end if if ( ilz ) then call stdlib${ii}$_cgemm( 'N', 'N', n, ns+1, ns+1, cone, z( 1_${ik}$, ihi-ns ), ldz,zc, ldzc, & czero, work, n ) call stdlib${ii}$_clacpy( 'ALL', n, ns+1, work, n, z( 1_${ik}$, ihi-ns ), ldz ) end if end subroutine stdlib${ii}$_claqz3 pure module subroutine stdlib${ii}$_zlaqz3( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_desired, alpha,& !! ZLAQZ3 Executes a single multishift QZ sweep beta, a, lda, b, ldb,q, ldq, z, ldz, qc, ldqc, zc, ldzc, work,lwork, info ) use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! function arguments logical(lk), intent( in ) :: ilschur, ilq, ilz integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,nshifts, & nblock_desired, ldqc, ldzc complex(dp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq,* ), z( ldz, * ), qc( & ldqc, * ), zc( ldzc, * ), work( * ),alpha( * ), beta( * ) integer(${ik}$), intent( out ) :: info ! ================================================================ ! local scalars integer(${ik}$) :: i, j, ns, istartm, istopm, sheight, swidth, k, np, istartb, istopb, & ishift, nblock, npos real(dp) :: safmin, safmax, c, scale complex(dp) :: temp, temp2, temp3, s info = 0_${ik}$ if ( nblock_desired < nshifts+1 ) then info = -8_${ik}$ end if if ( lwork ==-1_${ik}$ ) then ! workspace query, quick return work( 1_${ik}$ ) = n*nblock_desired return else if ( lwork < n*nblock_desired ) then info = -25_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZLAQZ3', -info ) return end if ! executable statements ! get machine constants safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' ) safmax = one/safmin call stdlib${ii}$_dlabad( safmin, safmax ) if ( ilo >= ihi ) then return end if if ( ilschur ) then istartm = 1_${ik}$ istopm = n else istartm = ilo istopm = ihi end if ns = nshifts npos = max( nblock_desired-ns, 1_${ik}$ ) ! the following block introduces the shifts and chases ! them down one by one just enough to make space for ! the other shifts. the near-the-diagonal block is ! of size (ns+1) x ns. call stdlib${ii}$_zlaset( 'FULL', ns+1, ns+1, czero, cone, qc, ldqc ) call stdlib${ii}$_zlaset( 'FULL', ns, ns, czero, cone, zc, ldzc ) do i = 1, ns ! introduce the shift scale = sqrt( abs( alpha( i ) ) ) * sqrt( abs( beta( i ) ) ) if( scale >= safmin .and. scale <= safmax ) then alpha( i ) = alpha( i )/scale beta( i ) = beta( i )/scale end if temp2 = beta( i )*a( ilo, ilo )-alpha( i )*b( ilo, ilo ) temp3 = beta( i )*a( ilo+1, ilo ) if ( abs( temp2 ) > safmax .or.abs( temp3 ) > safmax ) then temp2 = cone temp3 = czero end if call stdlib${ii}$_zlartg( temp2, temp3, c, s, temp ) call stdlib${ii}$_zrot( ns, a( ilo, ilo ), lda, a( ilo+1, ilo ), lda, c,s ) call stdlib${ii}$_zrot( ns, b( ilo, ilo ), ldb, b( ilo+1, ilo ), ldb, c,s ) call stdlib${ii}$_zrot( ns+1, qc( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, qc( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c,conjg( s ) ) ! chase the shift down do j = 1, ns-i call stdlib${ii}$_zlaqz1( .true., .true., j, 1_${ik}$, ns, ihi-ilo+1, a( ilo,ilo ), lda, b( & ilo, ilo ), ldb, ns+1, 1_${ik}$, qc,ldqc, ns, 1_${ik}$, zc, ldzc ) end do end do ! update the rest of the pencil ! update a(ilo:ilo+ns,ilo+ns:istopm) and b(ilo:ilo+ns,ilo+ns:istopm) ! from the left with qc(1:ns+1,1:ns+1)' sheight = ns+1 swidth = istopm-( ilo+ns )+1_${ik}$ if ( swidth > 0_${ik}$ ) then call stdlib${ii}$_zgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,a( ilo, ilo+& ns ), lda, czero, work, sheight ) call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight, a( ilo,ilo+ns ), lda ) call stdlib${ii}$_zgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,b( ilo, ilo+& ns ), ldb, czero, work, sheight ) call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight, b( ilo,ilo+ns ), ldb ) end if if ( ilq ) then call stdlib${ii}$_zgemm( 'N', 'N', n, sheight, sheight, cone, q( 1_${ik}$, ilo ),ldq, qc, ldqc, & czero, work, n ) call stdlib${ii}$_zlacpy( 'ALL', n, sheight, work, n, q( 1_${ik}$, ilo ), ldq ) end if ! update a(istartm:ilo-1,ilo:ilo+ns-1) and b(istartm:ilo-1,ilo:ilo+ns-1) ! from the right with zc(1:ns,1:ns) sheight = ilo-1-istartm+1 swidth = ns if ( sheight > 0_${ik}$ ) then call stdlib${ii}$_zgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, ilo ), lda, & zc, ldzc, czero, work,sheight ) call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ilo ), lda ) call stdlib${ii}$_zgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, ilo ), ldb, & zc, ldzc, czero, work,sheight ) call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ilo ), ldb ) end if if ( ilz ) then call stdlib${ii}$_zgemm( 'N', 'N', n, swidth, swidth, cone, z( 1_${ik}$, ilo ),ldz, zc, ldzc, & czero, work, n ) call stdlib${ii}$_zlacpy( 'ALL', n, swidth, work, n, z( 1_${ik}$, ilo ), ldz ) end if ! the following block chases the shifts down to the bottom ! right block. if possible, a shift is moved down npos ! positions at a time k = ilo do while ( k < ihi-ns ) np = min( ihi-ns-k, npos ) ! size of the near-the-diagonal block nblock = ns+np ! istartb points to the first row we will be updating istartb = k+1 ! istopb points to the last column we will be updating istopb = k+nblock-1 call stdlib${ii}$_zlaset( 'FULL', ns+np, ns+np, czero, cone, qc, ldqc ) call stdlib${ii}$_zlaset( 'FULL', ns+np, ns+np, czero, cone, zc, ldzc ) ! near the diagonal shift chase do i = ns-1, 0, -1 do j = 0, np-1 ! move down the block with index k+i+j, updating ! the (ns+np x ns+np) block: ! (k:k+ns+np,k:k+ns+np-1) call stdlib${ii}$_zlaqz1( .true., .true., k+i+j, istartb, istopb, ihi,a, lda, b, & ldb, nblock, k+1, qc, ldqc,nblock, k, zc, ldzc ) end do end do ! update rest of the pencil ! update a(k+1:k+ns+np, k+ns+np:istopm) and ! b(k+1:k+ns+np, k+ns+np:istopm) ! from the left with qc(1:ns+np,1:ns+np)' sheight = ns+np swidth = istopm-( k+ns+np )+1_${ik}$ if ( swidth > 0_${ik}$ ) then call stdlib${ii}$_zgemm( 'C', 'N', sheight, swidth, sheight, cone, qc,ldqc, a( k+1, k+& ns+np ), lda, czero, work,sheight ) call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight, a( k+1,k+ns+np ), lda & ) call stdlib${ii}$_zgemm( 'C', 'N', sheight, swidth, sheight, cone, qc,ldqc, b( k+1, k+& ns+np ), ldb, czero, work,sheight ) call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight, b( k+1,k+ns+np ), ldb & ) end if if ( ilq ) then call stdlib${ii}$_zgemm( 'N', 'N', n, nblock, nblock, cone, q( 1_${ik}$, k+1 ),ldq, qc, ldqc, & czero, work, n ) call stdlib${ii}$_zlacpy( 'ALL', n, nblock, work, n, q( 1_${ik}$, k+1 ), ldq ) end if ! update a(istartm:k,k:k+ns+npos-1) and b(istartm:k,k:k+ns+npos-1) ! from the right with zc(1:ns+np,1:ns+np) sheight = k-istartm+1 swidth = nblock if ( sheight > 0_${ik}$ ) then call stdlib${ii}$_zgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, k ), lda, & zc, ldzc, czero, work,sheight ) call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight,a( istartm, k ), lda ) call stdlib${ii}$_zgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, k ), ldb, & zc, ldzc, czero, work,sheight ) call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight,b( istartm, k ), ldb ) end if if ( ilz ) then call stdlib${ii}$_zgemm( 'N', 'N', n, nblock, nblock, cone, z( 1_${ik}$, k ),ldz, zc, ldzc, & czero, work, n ) call stdlib${ii}$_zlacpy( 'ALL', n, nblock, work, n, z( 1_${ik}$, k ), ldz ) end if k = k+np end do ! the following block removes the shifts from the bottom right corner ! one by one. updates are initially applied to a(ihi-ns+1:ihi,ihi-ns:ihi). call stdlib${ii}$_zlaset( 'FULL', ns, ns, czero, cone, qc, ldqc ) call stdlib${ii}$_zlaset( 'FULL', ns+1, ns+1, czero, cone, zc, ldzc ) ! istartb points to the first row we will be updating istartb = ihi-ns+1 ! istopb points to the last column we will be updating istopb = ihi do i = 1, ns ! chase the shift down to the bottom right corner do ishift = ihi-i, ihi-1 call stdlib${ii}$_zlaqz1( .true., .true., ishift, istartb, istopb, ihi,a, lda, b, ldb, & ns, ihi-ns+1, qc, ldqc, ns+1,ihi-ns, zc, ldzc ) end do end do ! update rest of the pencil ! update a(ihi-ns+1:ihi, ihi+1:istopm) ! from the left with qc(1:ns,1:ns)' sheight = ns swidth = istopm-( ihi+1 )+1_${ik}$ if ( swidth > 0_${ik}$ ) then call stdlib${ii}$_zgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,a( ihi-ns+1, & ihi+1 ), lda, czero, work, sheight ) call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight,a( ihi-ns+1, ihi+1 ), lda & ) call stdlib${ii}$_zgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,b( ihi-ns+1, & ihi+1 ), ldb, czero, work, sheight ) call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight,b( ihi-ns+1, ihi+1 ), ldb & ) end if if ( ilq ) then call stdlib${ii}$_zgemm( 'N', 'N', n, ns, ns, cone, q( 1_${ik}$, ihi-ns+1 ), ldq,qc, ldqc, czero,& work, n ) call stdlib${ii}$_zlacpy( 'ALL', n, ns, work, n, q( 1_${ik}$, ihi-ns+1 ), ldq ) end if ! update a(istartm:ihi-ns,ihi-ns:ihi) ! from the right with zc(1:ns+1,1:ns+1) sheight = ihi-ns-istartm+1 swidth = ns+1 if ( sheight > 0_${ik}$ ) then call stdlib${ii}$_zgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, ihi-ns ), & lda, zc, ldzc, czero, work,sheight ) call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ihi-ns ), lda & ) call stdlib${ii}$_zgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, ihi-ns ), & ldb, zc, ldzc, czero, work,sheight ) call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ihi-ns ), ldb & ) end if if ( ilz ) then call stdlib${ii}$_zgemm( 'N', 'N', n, ns+1, ns+1, cone, z( 1_${ik}$, ihi-ns ), ldz,zc, ldzc, & czero, work, n ) call stdlib${ii}$_zlacpy( 'ALL', n, ns+1, work, n, z( 1_${ik}$, ihi-ns ), ldz ) end if end subroutine stdlib${ii}$_zlaqz3 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$laqz3( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_qesired, alpha,& !! ZLAQZ3: Executes a single multishift QZ sweep beta, a, lda, b, ldb,q, ldq, z, ldz, qc, ldqc, zc, ldzc, work,lwork, info ) use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! function arguments logical(lk), intent( in ) :: ilschur, ilq, ilz integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,nshifts, & nblock_qesired, ldqc, ldzc complex(${ck}$), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq,* ), z( ldz, * ), qc( & ldqc, * ), zc( ldzc, * ), work( * ),alpha( * ), beta( * ) integer(${ik}$), intent( out ) :: info ! ================================================================ ! local scalars integer(${ik}$) :: i, j, ns, istartm, istopm, sheight, swidth, k, np, istartb, istopb, & ishift, nblock, npos real(${ck}$) :: safmin, safmax, c, scale complex(${ck}$) :: temp, temp2, temp3, s info = 0_${ik}$ if ( nblock_qesired < nshifts+1 ) then info = -8_${ik}$ end if if ( lwork ==-1_${ik}$ ) then ! workspace query, quick return work( 1_${ik}$ ) = n*nblock_qesired return else if ( lwork < n*nblock_qesired ) then info = -25_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZLAQZ3', -info ) return end if ! executable statements ! get machine constants safmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'SAFE MINIMUM' ) safmax = one/safmin call stdlib${ii}$_${c2ri(ci)}$labad( safmin, safmax ) if ( ilo >= ihi ) then return end if if ( ilschur ) then istartm = 1_${ik}$ istopm = n else istartm = ilo istopm = ihi end if ns = nshifts npos = max( nblock_qesired-ns, 1_${ik}$ ) ! the following block introduces the shifts and chases ! them down one by one just enough to make space for ! the other shifts. the near-the-diagonal block is ! of size (ns+1) x ns. call stdlib${ii}$_${ci}$laset( 'FULL', ns+1, ns+1, czero, cone, qc, ldqc ) call stdlib${ii}$_${ci}$laset( 'FULL', ns, ns, czero, cone, zc, ldzc ) do i = 1, ns ! introduce the shift scale = sqrt( abs( alpha( i ) ) ) * sqrt( abs( beta( i ) ) ) if( scale >= safmin .and. scale <= safmax ) then alpha( i ) = alpha( i )/scale beta( i ) = beta( i )/scale end if temp2 = beta( i )*a( ilo, ilo )-alpha( i )*b( ilo, ilo ) temp3 = beta( i )*a( ilo+1, ilo ) if ( abs( temp2 ) > safmax .or.abs( temp3 ) > safmax ) then temp2 = cone temp3 = czero end if call stdlib${ii}$_${ci}$lartg( temp2, temp3, c, s, temp ) call stdlib${ii}$_${ci}$rot( ns, a( ilo, ilo ), lda, a( ilo+1, ilo ), lda, c,s ) call stdlib${ii}$_${ci}$rot( ns, b( ilo, ilo ), ldb, b( ilo+1, ilo ), ldb, c,s ) call stdlib${ii}$_${ci}$rot( ns+1, qc( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, qc( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c,conjg( s ) ) ! chase the shift down do j = 1, ns-i call stdlib${ii}$_${ci}$laqz1( .true., .true., j, 1_${ik}$, ns, ihi-ilo+1, a( ilo,ilo ), lda, b( & ilo, ilo ), ldb, ns+1, 1_${ik}$, qc,ldqc, ns, 1_${ik}$, zc, ldzc ) end do end do ! update the rest of the pencil ! update a(ilo:ilo+ns,ilo+ns:istopm) and b(ilo:ilo+ns,ilo+ns:istopm) ! from the left with qc(1:ns+1,1:ns+1)' sheight = ns+1 swidth = istopm-( ilo+ns )+1_${ik}$ if ( swidth > 0_${ik}$ ) then call stdlib${ii}$_${ci}$gemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,a( ilo, ilo+& ns ), lda, czero, work, sheight ) call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight, a( ilo,ilo+ns ), lda ) call stdlib${ii}$_${ci}$gemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,b( ilo, ilo+& ns ), ldb, czero, work, sheight ) call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight, b( ilo,ilo+ns ), ldb ) end if if ( ilq ) then call stdlib${ii}$_${ci}$gemm( 'N', 'N', n, sheight, sheight, cone, q( 1_${ik}$, ilo ),ldq, qc, ldqc, & czero, work, n ) call stdlib${ii}$_${ci}$lacpy( 'ALL', n, sheight, work, n, q( 1_${ik}$, ilo ), ldq ) end if ! update a(istartm:ilo-1,ilo:ilo+ns-1) and b(istartm:ilo-1,ilo:ilo+ns-1) ! from the right with zc(1:ns,1:ns) sheight = ilo-1-istartm+1 swidth = ns if ( sheight > 0_${ik}$ ) then call stdlib${ii}$_${ci}$gemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, ilo ), lda, & zc, ldzc, czero, work,sheight ) call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ilo ), lda ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, ilo ), ldb, & zc, ldzc, czero, work,sheight ) call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ilo ), ldb ) end if if ( ilz ) then call stdlib${ii}$_${ci}$gemm( 'N', 'N', n, swidth, swidth, cone, z( 1_${ik}$, ilo ),ldz, zc, ldzc, & czero, work, n ) call stdlib${ii}$_${ci}$lacpy( 'ALL', n, swidth, work, n, z( 1_${ik}$, ilo ), ldz ) end if ! the following block chases the shifts down to the bottom ! right block. if possible, a shift is moved down npos ! positions at a time k = ilo do while ( k < ihi-ns ) np = min( ihi-ns-k, npos ) ! size of the near-the-diagonal block nblock = ns+np ! istartb points to the first row we will be updating istartb = k+1 ! istopb points to the last column we will be updating istopb = k+nblock-1 call stdlib${ii}$_${ci}$laset( 'FULL', ns+np, ns+np, czero, cone, qc, ldqc ) call stdlib${ii}$_${ci}$laset( 'FULL', ns+np, ns+np, czero, cone, zc, ldzc ) ! near the diagonal shift chase do i = ns-1, 0, -1 do j = 0, np-1 ! move down the block with index k+i+j, updating ! the (ns+np x ns+np) block: ! (k:k+ns+np,k:k+ns+np-1) call stdlib${ii}$_${ci}$laqz1( .true., .true., k+i+j, istartb, istopb, ihi,a, lda, b, & ldb, nblock, k+1, qc, ldqc,nblock, k, zc, ldzc ) end do end do ! update rest of the pencil ! update a(k+1:k+ns+np, k+ns+np:istopm) and ! b(k+1:k+ns+np, k+ns+np:istopm) ! from the left with qc(1:ns+np,1:ns+np)' sheight = ns+np swidth = istopm-( k+ns+np )+1_${ik}$ if ( swidth > 0_${ik}$ ) then call stdlib${ii}$_${ci}$gemm( 'C', 'N', sheight, swidth, sheight, cone, qc,ldqc, a( k+1, k+& ns+np ), lda, czero, work,sheight ) call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight, a( k+1,k+ns+np ), lda & ) call stdlib${ii}$_${ci}$gemm( 'C', 'N', sheight, swidth, sheight, cone, qc,ldqc, b( k+1, k+& ns+np ), ldb, czero, work,sheight ) call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight, b( k+1,k+ns+np ), ldb & ) end if if ( ilq ) then call stdlib${ii}$_${ci}$gemm( 'N', 'N', n, nblock, nblock, cone, q( 1_${ik}$, k+1 ),ldq, qc, ldqc, & czero, work, n ) call stdlib${ii}$_${ci}$lacpy( 'ALL', n, nblock, work, n, q( 1_${ik}$, k+1 ), ldq ) end if ! update a(istartm:k,k:k+ns+npos-1) and b(istartm:k,k:k+ns+npos-1) ! from the right with zc(1:ns+np,1:ns+np) sheight = k-istartm+1 swidth = nblock if ( sheight > 0_${ik}$ ) then call stdlib${ii}$_${ci}$gemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, k ), lda, & zc, ldzc, czero, work,sheight ) call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight,a( istartm, k ), lda ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, k ), ldb, & zc, ldzc, czero, work,sheight ) call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight,b( istartm, k ), ldb ) end if if ( ilz ) then call stdlib${ii}$_${ci}$gemm( 'N', 'N', n, nblock, nblock, cone, z( 1_${ik}$, k ),ldz, zc, ldzc, & czero, work, n ) call stdlib${ii}$_${ci}$lacpy( 'ALL', n, nblock, work, n, z( 1_${ik}$, k ), ldz ) end if k = k+np end do ! the following block removes the shifts from the bottom right corner ! one by one. updates are initially applied to a(ihi-ns+1:ihi,ihi-ns:ihi). call stdlib${ii}$_${ci}$laset( 'FULL', ns, ns, czero, cone, qc, ldqc ) call stdlib${ii}$_${ci}$laset( 'FULL', ns+1, ns+1, czero, cone, zc, ldzc ) ! istartb points to the first row we will be updating istartb = ihi-ns+1 ! istopb points to the last column we will be updating istopb = ihi do i = 1, ns ! chase the shift down to the bottom right corner do ishift = ihi-i, ihi-1 call stdlib${ii}$_${ci}$laqz1( .true., .true., ishift, istartb, istopb, ihi,a, lda, b, ldb, & ns, ihi-ns+1, qc, ldqc, ns+1,ihi-ns, zc, ldzc ) end do end do ! update rest of the pencil ! update a(ihi-ns+1:ihi, ihi+1:istopm) ! from the left with qc(1:ns,1:ns)' sheight = ns swidth = istopm-( ihi+1 )+1_${ik}$ if ( swidth > 0_${ik}$ ) then call stdlib${ii}$_${ci}$gemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,a( ihi-ns+1, & ihi+1 ), lda, czero, work, sheight ) call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight,a( ihi-ns+1, ihi+1 ), lda & ) call stdlib${ii}$_${ci}$gemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,b( ihi-ns+1, & ihi+1 ), ldb, czero, work, sheight ) call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight,b( ihi-ns+1, ihi+1 ), ldb & ) end if if ( ilq ) then call stdlib${ii}$_${ci}$gemm( 'N', 'N', n, ns, ns, cone, q( 1_${ik}$, ihi-ns+1 ), ldq,qc, ldqc, czero,& work, n ) call stdlib${ii}$_${ci}$lacpy( 'ALL', n, ns, work, n, q( 1_${ik}$, ihi-ns+1 ), ldq ) end if ! update a(istartm:ihi-ns,ihi-ns:ihi) ! from the right with zc(1:ns+1,1:ns+1) sheight = ihi-ns-istartm+1 swidth = ns+1 if ( sheight > 0_${ik}$ ) then call stdlib${ii}$_${ci}$gemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, ihi-ns ), & lda, zc, ldzc, czero, work,sheight ) call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ihi-ns ), lda & ) call stdlib${ii}$_${ci}$gemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, ihi-ns ), & ldb, zc, ldzc, czero, work,sheight ) call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ihi-ns ), ldb & ) end if if ( ilz ) then call stdlib${ii}$_${ci}$gemm( 'N', 'N', n, ns+1, ns+1, cone, z( 1_${ik}$, ihi-ns ), ldz,zc, ldzc, & czero, work, n ) call stdlib${ii}$_${ci}$lacpy( 'ALL', n, ns+1, work, n, z( 1_${ik}$, ihi-ns ), ldz ) end if end subroutine stdlib${ii}$_${ci}$laqz3 #:endif #:endfor pure module subroutine stdlib${ii}$_slaqz4( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_desired, sr, & !! SLAQZ4 Executes a single multishift QZ sweep si, ss, a, lda, b, ldb, q,ldq, z, ldz, qc, ldqc, zc, ldzc, work, lwork,info ) use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! function arguments logical(lk), intent( in ) :: ilschur, ilq, ilz integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,nshifts, & nblock_desired, ldqc, ldzc real(sp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq, * ),z( ldz, * ), qc( & ldqc, * ), zc( ldzc, * ), work( * ), sr( * ),si( * ), ss( * ) integer(${ik}$), intent( out ) :: info ! ================================================================ ! local variables integer(${ik}$) :: i, j, ns, istartm, istopm, sheight, swidth, k, np, istartb, istopb, & ishift, nblock, npos real(sp) :: temp, v(3_${ik}$), c1, s1, c2, s2, swap info = 0_${ik}$ if ( nblock_desired < nshifts+1 ) then info = -8_${ik}$ end if if ( lwork ==-1_${ik}$ ) then ! workspace query, quick return work( 1_${ik}$ ) = n*nblock_desired return else if ( lwork < n*nblock_desired ) then info = -25_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SLAQZ4', -info ) return end if ! executable statements if ( nshifts < 2_${ik}$ ) then return end if if ( ilo >= ihi ) then return end if if ( ilschur ) then istartm = 1_${ik}$ istopm = n else istartm = ilo istopm = ihi end if ! shuffle shifts into pairs of real shifts and pairs ! of complex conjugate shifts assuming complex ! conjugate shifts are already adjacent to one ! another do i = 1, nshifts-2, 2 if( si( i )/=-si( i+1 ) ) then swap = sr( i ) sr( i ) = sr( i+1 ) sr( i+1 ) = sr( i+2 ) sr( i+2 ) = swap swap = si( i ) si( i ) = si( i+1 ) si( i+1 ) = si( i+2 ) si( i+2 ) = swap swap = ss( i ) ss( i ) = ss( i+1 ) ss( i+1 ) = ss( i+2 ) ss( i+2 ) = swap end if end do ! nshfts is supposed to be even, but if it is odd, ! then simply reduce it by one. the shuffle above ! ensures that the dropped shift is real and that ! the remaining shifts are paired. ns = nshifts-mod( nshifts, 2_${ik}$ ) npos = max( nblock_desired-ns, 1_${ik}$ ) ! the following block introduces the shifts and chases ! them down one by one just enough to make space for ! the other shifts. the near-the-diagonal block is ! of size (ns+1) x ns. call stdlib${ii}$_slaset( 'FULL', ns+1, ns+1, zero, one, qc, ldqc ) call stdlib${ii}$_slaset( 'FULL', ns, ns, zero, one, zc, ldzc ) do i = 1, ns, 2 ! introduce the shift call stdlib${ii}$_slaqz1( a( ilo, ilo ), lda, b( ilo, ilo ), ldb, sr( i ),sr( i+1 ), si( & i ), ss( i ), ss( i+1 ), v ) temp = v( 2_${ik}$ ) call stdlib${ii}$_slartg( temp, v( 3_${ik}$ ), c1, s1, v( 2_${ik}$ ) ) call stdlib${ii}$_slartg( v( 1_${ik}$ ), v( 2_${ik}$ ), c2, s2, temp ) call stdlib${ii}$_srot( ns, a( ilo+1, ilo ), lda, a( ilo+2, ilo ), lda, c1,s1 ) call stdlib${ii}$_srot( ns, a( ilo, ilo ), lda, a( ilo+1, ilo ), lda, c2,s2 ) call stdlib${ii}$_srot( ns, b( ilo+1, ilo ), ldb, b( ilo+2, ilo ), ldb, c1,s1 ) call stdlib${ii}$_srot( ns, b( ilo, ilo ), ldb, b( ilo+1, ilo ), ldb, c2,s2 ) call stdlib${ii}$_srot( ns+1, qc( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, qc( 1_${ik}$, 3_${ik}$ ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_srot( ns+1, qc( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, qc( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c2, s2 ) ! chase the shift down do j = 1, ns-1-i call stdlib${ii}$_slaqz2( .true., .true., j, 1_${ik}$, ns, ihi-ilo+1, a( ilo,ilo ), lda, b( & ilo, ilo ), ldb, ns+1, 1_${ik}$, qc,ldqc, ns, 1_${ik}$, zc, ldzc ) end do end do ! update the rest of the pencil ! update a(ilo:ilo+ns,ilo+ns:istopm) and b(ilo:ilo+ns,ilo+ns:istopm) ! from the left with qc(1:ns+1,1:ns+1)' sheight = ns+1 swidth = istopm-( ilo+ns )+1_${ik}$ if ( swidth > 0_${ik}$ ) then call stdlib${ii}$_sgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,a( ilo, ilo+ns & ), lda, zero, work, sheight ) call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight, a( ilo,ilo+ns ), lda ) call stdlib${ii}$_sgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,b( ilo, ilo+ns & ), ldb, zero, work, sheight ) call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight, b( ilo,ilo+ns ), ldb ) end if if ( ilq ) then call stdlib${ii}$_sgemm( 'N', 'N', n, sheight, sheight, one, q( 1_${ik}$, ilo ),ldq, qc, ldqc, & zero, work, n ) call stdlib${ii}$_slacpy( 'ALL', n, sheight, work, n, q( 1_${ik}$, ilo ), ldq ) end if ! update a(istartm:ilo-1,ilo:ilo+ns-1) and b(istartm:ilo-1,ilo:ilo+ns-1) ! from the right with zc(1:ns,1:ns) sheight = ilo-1-istartm+1 swidth = ns if ( sheight > 0_${ik}$ ) then call stdlib${ii}$_sgemm( 'N', 'N', sheight, swidth, swidth, one, a( istartm,ilo ), lda, & zc, ldzc, zero, work, sheight ) call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ilo ), lda ) call stdlib${ii}$_sgemm( 'N', 'N', sheight, swidth, swidth, one, b( istartm,ilo ), ldb, & zc, ldzc, zero, work, sheight ) call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ilo ), ldb ) end if if ( ilz ) then call stdlib${ii}$_sgemm( 'N', 'N', n, swidth, swidth, one, z( 1_${ik}$, ilo ), ldz,zc, ldzc, & zero, work, n ) call stdlib${ii}$_slacpy( 'ALL', n, swidth, work, n, z( 1_${ik}$, ilo ), ldz ) end if ! the following block chases the shifts down to the bottom ! right block. if possible, a shift is moved down npos ! positions at a time k = ilo do while ( k < ihi-ns ) np = min( ihi-ns-k, npos ) ! size of the near-the-diagonal block nblock = ns+np ! istartb points to the first row we will be updating istartb = k+1 ! istopb points to the last column we will be updating istopb = k+nblock-1 call stdlib${ii}$_slaset( 'FULL', ns+np, ns+np, zero, one, qc, ldqc ) call stdlib${ii}$_slaset( 'FULL', ns+np, ns+np, zero, one, zc, ldzc ) ! near the diagonal shift chase do i = ns-1, 0, -2 do j = 0, np-1 ! move down the block with index k+i+j-1, updating ! the (ns+np x ns+np) block: ! (k:k+ns+np,k:k+ns+np-1) call stdlib${ii}$_slaqz2( .true., .true., k+i+j-1, istartb, istopb,ihi, a, lda, b, & ldb, nblock, k+1, qc, ldqc,nblock, k, zc, ldzc ) end do end do ! update rest of the pencil ! update a(k+1:k+ns+np, k+ns+np:istopm) and ! b(k+1:k+ns+np, k+ns+np:istopm) ! from the left with qc(1:ns+np,1:ns+np)' sheight = ns+np swidth = istopm-( k+ns+np )+1_${ik}$ if ( swidth > 0_${ik}$ ) then call stdlib${ii}$_sgemm( 'T', 'N', sheight, swidth, sheight, one, qc,ldqc, a( k+1, k+& ns+np ), lda, zero, work,sheight ) call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight, a( k+1,k+ns+np ), lda & ) call stdlib${ii}$_sgemm( 'T', 'N', sheight, swidth, sheight, one, qc,ldqc, b( k+1, k+& ns+np ), ldb, zero, work,sheight ) call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight, b( k+1,k+ns+np ), ldb & ) end if if ( ilq ) then call stdlib${ii}$_sgemm( 'N', 'N', n, nblock, nblock, one, q( 1_${ik}$, k+1 ),ldq, qc, ldqc, & zero, work, n ) call stdlib${ii}$_slacpy( 'ALL', n, nblock, work, n, q( 1_${ik}$, k+1 ), ldq ) end if ! update a(istartm:k,k:k+ns+npos-1) and b(istartm:k,k:k+ns+npos-1) ! from the right with zc(1:ns+np,1:ns+np) sheight = k-istartm+1 swidth = nblock if ( sheight > 0_${ik}$ ) then call stdlib${ii}$_sgemm( 'N', 'N', sheight, swidth, swidth, one,a( istartm, k ), lda, & zc, ldzc, zero, work,sheight ) call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight,a( istartm, k ), lda ) call stdlib${ii}$_sgemm( 'N', 'N', sheight, swidth, swidth, one,b( istartm, k ), ldb, & zc, ldzc, zero, work,sheight ) call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight,b( istartm, k ), ldb ) end if if ( ilz ) then call stdlib${ii}$_sgemm( 'N', 'N', n, nblock, nblock, one, z( 1_${ik}$, k ),ldz, zc, ldzc, & zero, work, n ) call stdlib${ii}$_slacpy( 'ALL', n, nblock, work, n, z( 1_${ik}$, k ), ldz ) end if k = k+np end do ! the following block removes the shifts from the bottom right corner ! one by one. updates are initially applied to a(ihi-ns+1:ihi,ihi-ns:ihi). call stdlib${ii}$_slaset( 'FULL', ns, ns, zero, one, qc, ldqc ) call stdlib${ii}$_slaset( 'FULL', ns+1, ns+1, zero, one, zc, ldzc ) ! istartb points to the first row we will be updating istartb = ihi-ns+1 ! istopb points to the last column we will be updating istopb = ihi do i = 1, ns, 2 ! chase the shift down to the bottom right corner do ishift = ihi-i-1, ihi-2 call stdlib${ii}$_slaqz2( .true., .true., ishift, istartb, istopb, ihi,a, lda, b, ldb, & ns, ihi-ns+1, qc, ldqc, ns+1,ihi-ns, zc, ldzc ) end do end do ! update rest of the pencil ! update a(ihi-ns+1:ihi, ihi+1:istopm) ! from the left with qc(1:ns,1:ns)' sheight = ns swidth = istopm-( ihi+1 )+1_${ik}$ if ( swidth > 0_${ik}$ ) then call stdlib${ii}$_sgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,a( ihi-ns+1, & ihi+1 ), lda, zero, work, sheight ) call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight,a( ihi-ns+1, ihi+1 ), lda & ) call stdlib${ii}$_sgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,b( ihi-ns+1, & ihi+1 ), ldb, zero, work, sheight ) call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight,b( ihi-ns+1, ihi+1 ), ldb & ) end if if ( ilq ) then call stdlib${ii}$_sgemm( 'N', 'N', n, ns, ns, one, q( 1_${ik}$, ihi-ns+1 ), ldq,qc, ldqc, zero, & work, n ) call stdlib${ii}$_slacpy( 'ALL', n, ns, work, n, q( 1_${ik}$, ihi-ns+1 ), ldq ) end if ! update a(istartm:ihi-ns,ihi-ns:ihi) ! from the right with zc(1:ns+1,1:ns+1) sheight = ihi-ns-istartm+1 swidth = ns+1 if ( sheight > 0_${ik}$ ) then call stdlib${ii}$_sgemm( 'N', 'N', sheight, swidth, swidth, one, a( istartm,ihi-ns ), lda,& zc, ldzc, zero, work, sheight ) call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ihi-ns ), lda & ) call stdlib${ii}$_sgemm( 'N', 'N', sheight, swidth, swidth, one, b( istartm,ihi-ns ), ldb,& zc, ldzc, zero, work, sheight ) call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ihi-ns ), ldb & ) end if if ( ilz ) then call stdlib${ii}$_sgemm( 'N', 'N', n, ns+1, ns+1, one, z( 1_${ik}$, ihi-ns ), ldz, zc,ldzc, zero, & work, n ) call stdlib${ii}$_slacpy( 'ALL', n, ns+1, work, n, z( 1_${ik}$, ihi-ns ), ldz ) end if end subroutine stdlib${ii}$_slaqz4 pure module subroutine stdlib${ii}$_dlaqz4( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_desired, sr, & !! DLAQZ4 Executes a single multishift QZ sweep si, ss, a, lda, b, ldb, q,ldq, z, ldz, qc, ldqc, zc, ldzc, work, lwork,info ) use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! function arguments logical(lk), intent( in ) :: ilschur, ilq, ilz integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,nshifts, & nblock_desired, ldqc, ldzc real(dp), intent( inout ) :: a( lda, * ), b( ldb, * ),q( ldq, * ), z( ldz, * ), qc( & ldqc, * ),zc( ldzc, * ), work( * ), sr( * ), si( * ),ss( * ) integer(${ik}$), intent( out ) :: info ! ================================================================ ! local scalars integer(${ik}$) :: i, j, ns, istartm, istopm, sheight, swidth, k, np, istartb, istopb, & ishift, nblock, npos real(dp) :: temp, v(3_${ik}$), c1, s1, c2, s2, swap info = 0_${ik}$ if ( nblock_desired < nshifts+1 ) then info = -8_${ik}$ end if if ( lwork ==-1_${ik}$ ) then ! workspace query, quick return work( 1_${ik}$ ) = n*nblock_desired return else if ( lwork < n*nblock_desired ) then info = -25_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLAQZ4', -info ) return end if ! executable statements if ( nshifts < 2_${ik}$ ) then return end if if ( ilo >= ihi ) then return end if if ( ilschur ) then istartm = 1_${ik}$ istopm = n else istartm = ilo istopm = ihi end if ! shuffle shifts into pairs of real shifts and pairs ! of complex conjugate shifts assuming complex ! conjugate shifts are already adjacent to one ! another do i = 1, nshifts-2, 2 if( si( i )/=-si( i+1 ) ) then swap = sr( i ) sr( i ) = sr( i+1 ) sr( i+1 ) = sr( i+2 ) sr( i+2 ) = swap swap = si( i ) si( i ) = si( i+1 ) si( i+1 ) = si( i+2 ) si( i+2 ) = swap swap = ss( i ) ss( i ) = ss( i+1 ) ss( i+1 ) = ss( i+2 ) ss( i+2 ) = swap end if end do ! nshfts is supposed to be even, but if it is odd, ! then simply reduce it by one. the shuffle above ! ensures that the dropped shift is real and that ! the remaining shifts are paired. ns = nshifts-mod( nshifts, 2_${ik}$ ) npos = max( nblock_desired-ns, 1_${ik}$ ) ! the following block introduces the shifts and chases ! them down one by one just enough to make space for ! the other shifts. the near-the-diagonal block is ! of size (ns+1) x ns. call stdlib${ii}$_dlaset( 'FULL', ns+1, ns+1, zero, one, qc, ldqc ) call stdlib${ii}$_dlaset( 'FULL', ns, ns, zero, one, zc, ldzc ) do i = 1, ns, 2 ! introduce the shift call stdlib${ii}$_dlaqz1( a( ilo, ilo ), lda, b( ilo, ilo ), ldb, sr( i ),sr( i+1 ), si( & i ), ss( i ), ss( i+1 ), v ) temp = v( 2_${ik}$ ) call stdlib${ii}$_dlartg( temp, v( 3_${ik}$ ), c1, s1, v( 2_${ik}$ ) ) call stdlib${ii}$_dlartg( v( 1_${ik}$ ), v( 2_${ik}$ ), c2, s2, temp ) call stdlib${ii}$_drot( ns, a( ilo+1, ilo ), lda, a( ilo+2, ilo ), lda, c1,s1 ) call stdlib${ii}$_drot( ns, a( ilo, ilo ), lda, a( ilo+1, ilo ), lda, c2,s2 ) call stdlib${ii}$_drot( ns, b( ilo+1, ilo ), ldb, b( ilo+2, ilo ), ldb, c1,s1 ) call stdlib${ii}$_drot( ns, b( ilo, ilo ), ldb, b( ilo+1, ilo ), ldb, c2,s2 ) call stdlib${ii}$_drot( ns+1, qc( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, qc( 1_${ik}$, 3_${ik}$ ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_drot( ns+1, qc( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, qc( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c2, s2 ) ! chase the shift down do j = 1, ns-1-i call stdlib${ii}$_dlaqz2( .true., .true., j, 1_${ik}$, ns, ihi-ilo+1, a( ilo,ilo ), lda, b( & ilo, ilo ), ldb, ns+1, 1_${ik}$, qc,ldqc, ns, 1_${ik}$, zc, ldzc ) end do end do ! update the rest of the pencil ! update a(ilo:ilo+ns,ilo+ns:istopm) and b(ilo:ilo+ns,ilo+ns:istopm) ! from the left with qc(1:ns+1,1:ns+1)' sheight = ns+1 swidth = istopm-( ilo+ns )+1_${ik}$ if ( swidth > 0_${ik}$ ) then call stdlib${ii}$_dgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,a( ilo, ilo+ns & ), lda, zero, work, sheight ) call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight, a( ilo,ilo+ns ), lda ) call stdlib${ii}$_dgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,b( ilo, ilo+ns & ), ldb, zero, work, sheight ) call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight, b( ilo,ilo+ns ), ldb ) end if if ( ilq ) then call stdlib${ii}$_dgemm( 'N', 'N', n, sheight, sheight, one, q( 1_${ik}$, ilo ),ldq, qc, ldqc, & zero, work, n ) call stdlib${ii}$_dlacpy( 'ALL', n, sheight, work, n, q( 1_${ik}$, ilo ), ldq ) end if ! update a(istartm:ilo-1,ilo:ilo+ns-1) and b(istartm:ilo-1,ilo:ilo+ns-1) ! from the right with zc(1:ns,1:ns) sheight = ilo-1-istartm+1 swidth = ns if ( sheight > 0_${ik}$ ) then call stdlib${ii}$_dgemm( 'N', 'N', sheight, swidth, swidth, one, a( istartm,ilo ), lda, & zc, ldzc, zero, work, sheight ) call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ilo ), lda ) call stdlib${ii}$_dgemm( 'N', 'N', sheight, swidth, swidth, one, b( istartm,ilo ), ldb, & zc, ldzc, zero, work, sheight ) call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ilo ), ldb ) end if if ( ilz ) then call stdlib${ii}$_dgemm( 'N', 'N', n, swidth, swidth, one, z( 1_${ik}$, ilo ), ldz,zc, ldzc, & zero, work, n ) call stdlib${ii}$_dlacpy( 'ALL', n, swidth, work, n, z( 1_${ik}$, ilo ), ldz ) end if ! the following block chases the shifts down to the bottom ! right block. if possible, a shift is moved down npos ! positions at a time k = ilo do while ( k < ihi-ns ) np = min( ihi-ns-k, npos ) ! size of the near-the-diagonal block nblock = ns+np ! istartb points to the first row we will be updating istartb = k+1 ! istopb points to the last column we will be updating istopb = k+nblock-1 call stdlib${ii}$_dlaset( 'FULL', ns+np, ns+np, zero, one, qc, ldqc ) call stdlib${ii}$_dlaset( 'FULL', ns+np, ns+np, zero, one, zc, ldzc ) ! near the diagonal shift chase do i = ns-1, 0, -2 do j = 0, np-1 ! move down the block with index k+i+j-1, updating ! the (ns+np x ns+np) block: ! (k:k+ns+np,k:k+ns+np-1) call stdlib${ii}$_dlaqz2( .true., .true., k+i+j-1, istartb, istopb,ihi, a, lda, b, & ldb, nblock, k+1, qc, ldqc,nblock, k, zc, ldzc ) end do end do ! update rest of the pencil ! update a(k+1:k+ns+np, k+ns+np:istopm) and ! b(k+1:k+ns+np, k+ns+np:istopm) ! from the left with qc(1:ns+np,1:ns+np)' sheight = ns+np swidth = istopm-( k+ns+np )+1_${ik}$ if ( swidth > 0_${ik}$ ) then call stdlib${ii}$_dgemm( 'T', 'N', sheight, swidth, sheight, one, qc,ldqc, a( k+1, k+& ns+np ), lda, zero, work,sheight ) call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight, a( k+1,k+ns+np ), lda & ) call stdlib${ii}$_dgemm( 'T', 'N', sheight, swidth, sheight, one, qc,ldqc, b( k+1, k+& ns+np ), ldb, zero, work,sheight ) call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight, b( k+1,k+ns+np ), ldb & ) end if if ( ilq ) then call stdlib${ii}$_dgemm( 'N', 'N', n, nblock, nblock, one, q( 1_${ik}$, k+1 ),ldq, qc, ldqc, & zero, work, n ) call stdlib${ii}$_dlacpy( 'ALL', n, nblock, work, n, q( 1_${ik}$, k+1 ), ldq ) end if ! update a(istartm:k,k:k+ns+npos-1) and b(istartm:k,k:k+ns+npos-1) ! from the right with zc(1:ns+np,1:ns+np) sheight = k-istartm+1 swidth = nblock if ( sheight > 0_${ik}$ ) then call stdlib${ii}$_dgemm( 'N', 'N', sheight, swidth, swidth, one,a( istartm, k ), lda, & zc, ldzc, zero, work,sheight ) call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight,a( istartm, k ), lda ) call stdlib${ii}$_dgemm( 'N', 'N', sheight, swidth, swidth, one,b( istartm, k ), ldb, & zc, ldzc, zero, work,sheight ) call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight,b( istartm, k ), ldb ) end if if ( ilz ) then call stdlib${ii}$_dgemm( 'N', 'N', n, nblock, nblock, one, z( 1_${ik}$, k ),ldz, zc, ldzc, & zero, work, n ) call stdlib${ii}$_dlacpy( 'ALL', n, nblock, work, n, z( 1_${ik}$, k ), ldz ) end if k = k+np end do ! the following block removes the shifts from the bottom right corner ! one by one. updates are initially applied to a(ihi-ns+1:ihi,ihi-ns:ihi). call stdlib${ii}$_dlaset( 'FULL', ns, ns, zero, one, qc, ldqc ) call stdlib${ii}$_dlaset( 'FULL', ns+1, ns+1, zero, one, zc, ldzc ) ! istartb points to the first row we will be updating istartb = ihi-ns+1 ! istopb points to the last column we will be updating istopb = ihi do i = 1, ns, 2 ! chase the shift down to the bottom right corner do ishift = ihi-i-1, ihi-2 call stdlib${ii}$_dlaqz2( .true., .true., ishift, istartb, istopb, ihi,a, lda, b, ldb, & ns, ihi-ns+1, qc, ldqc, ns+1,ihi-ns, zc, ldzc ) end do end do ! update rest of the pencil ! update a(ihi-ns+1:ihi, ihi+1:istopm) ! from the left with qc(1:ns,1:ns)' sheight = ns swidth = istopm-( ihi+1 )+1_${ik}$ if ( swidth > 0_${ik}$ ) then call stdlib${ii}$_dgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,a( ihi-ns+1, & ihi+1 ), lda, zero, work, sheight ) call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight,a( ihi-ns+1, ihi+1 ), lda & ) call stdlib${ii}$_dgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,b( ihi-ns+1, & ihi+1 ), ldb, zero, work, sheight ) call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight,b( ihi-ns+1, ihi+1 ), ldb & ) end if if ( ilq ) then call stdlib${ii}$_dgemm( 'N', 'N', n, ns, ns, one, q( 1_${ik}$, ihi-ns+1 ), ldq,qc, ldqc, zero, & work, n ) call stdlib${ii}$_dlacpy( 'ALL', n, ns, work, n, q( 1_${ik}$, ihi-ns+1 ), ldq ) end if ! update a(istartm:ihi-ns,ihi-ns:ihi) ! from the right with zc(1:ns+1,1:ns+1) sheight = ihi-ns-istartm+1 swidth = ns+1 if ( sheight > 0_${ik}$ ) then call stdlib${ii}$_dgemm( 'N', 'N', sheight, swidth, swidth, one, a( istartm,ihi-ns ), lda,& zc, ldzc, zero, work, sheight ) call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ihi-ns ), lda & ) call stdlib${ii}$_dgemm( 'N', 'N', sheight, swidth, swidth, one, b( istartm,ihi-ns ), ldb,& zc, ldzc, zero, work, sheight ) call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ihi-ns ), ldb & ) end if if ( ilz ) then call stdlib${ii}$_dgemm( 'N', 'N', n, ns+1, ns+1, one, z( 1_${ik}$, ihi-ns ), ldz,zc, ldzc, zero,& work, n ) call stdlib${ii}$_dlacpy( 'ALL', n, ns+1, work, n, z( 1_${ik}$, ihi-ns ), ldz ) end if end subroutine stdlib${ii}$_dlaqz4 #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$laqz4( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_qesired, sr, & !! DLAQZ4: Executes a single multishift QZ sweep si, ss, a, lda, b, ldb, q,ldq, z, ldz, qc, ldqc, zc, ldzc, work, lwork,info ) use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! function arguments logical(lk), intent( in ) :: ilschur, ilq, ilz integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,nshifts, & nblock_qesired, ldqc, ldzc real(${rk}$), intent( inout ) :: a( lda, * ), b( ldb, * ),q( ldq, * ), z( ldz, * ), qc( & ldqc, * ),zc( ldzc, * ), work( * ), sr( * ), si( * ),ss( * ) integer(${ik}$), intent( out ) :: info ! ================================================================ ! local scalars integer(${ik}$) :: i, j, ns, istartm, istopm, sheight, swidth, k, np, istartb, istopb, & ishift, nblock, npos real(${rk}$) :: temp, v(3_${ik}$), c1, s1, c2, s2, swap info = 0_${ik}$ if ( nblock_qesired < nshifts+1 ) then info = -8_${ik}$ end if if ( lwork ==-1_${ik}$ ) then ! workspace query, quick return work( 1_${ik}$ ) = n*nblock_qesired return else if ( lwork < n*nblock_qesired ) then info = -25_${ik}$ end if if( info/=0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DLAQZ4', -info ) return end if ! executable statements if ( nshifts < 2_${ik}$ ) then return end if if ( ilo >= ihi ) then return end if if ( ilschur ) then istartm = 1_${ik}$ istopm = n else istartm = ilo istopm = ihi end if ! shuffle shifts into pairs of real shifts and pairs ! of complex conjugate shifts assuming complex ! conjugate shifts are already adjacent to one ! another do i = 1, nshifts-2, 2 if( si( i )/=-si( i+1 ) ) then swap = sr( i ) sr( i ) = sr( i+1 ) sr( i+1 ) = sr( i+2 ) sr( i+2 ) = swap swap = si( i ) si( i ) = si( i+1 ) si( i+1 ) = si( i+2 ) si( i+2 ) = swap swap = ss( i ) ss( i ) = ss( i+1 ) ss( i+1 ) = ss( i+2 ) ss( i+2 ) = swap end if end do ! nshfts is supposed to be even, but if it is odd, ! then simply reduce it by one. the shuffle above ! ensures that the dropped shift is real and that ! the remaining shifts are paired. ns = nshifts-mod( nshifts, 2_${ik}$ ) npos = max( nblock_qesired-ns, 1_${ik}$ ) ! the following block introduces the shifts and chases ! them down one by one just enough to make space for ! the other shifts. the near-the-diagonal block is ! of size (ns+1) x ns. call stdlib${ii}$_${ri}$laset( 'FULL', ns+1, ns+1, zero, one, qc, ldqc ) call stdlib${ii}$_${ri}$laset( 'FULL', ns, ns, zero, one, zc, ldzc ) do i = 1, ns, 2 ! introduce the shift call stdlib${ii}$_${ri}$laqz1( a( ilo, ilo ), lda, b( ilo, ilo ), ldb, sr( i ),sr( i+1 ), si( & i ), ss( i ), ss( i+1 ), v ) temp = v( 2_${ik}$ ) call stdlib${ii}$_${ri}$lartg( temp, v( 3_${ik}$ ), c1, s1, v( 2_${ik}$ ) ) call stdlib${ii}$_${ri}$lartg( v( 1_${ik}$ ), v( 2_${ik}$ ), c2, s2, temp ) call stdlib${ii}$_${ri}$rot( ns, a( ilo+1, ilo ), lda, a( ilo+2, ilo ), lda, c1,s1 ) call stdlib${ii}$_${ri}$rot( ns, a( ilo, ilo ), lda, a( ilo+1, ilo ), lda, c2,s2 ) call stdlib${ii}$_${ri}$rot( ns, b( ilo+1, ilo ), ldb, b( ilo+2, ilo ), ldb, c1,s1 ) call stdlib${ii}$_${ri}$rot( ns, b( ilo, ilo ), ldb, b( ilo+1, ilo ), ldb, c2,s2 ) call stdlib${ii}$_${ri}$rot( ns+1, qc( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, qc( 1_${ik}$, 3_${ik}$ ), 1_${ik}$, c1, s1 ) call stdlib${ii}$_${ri}$rot( ns+1, qc( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, qc( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c2, s2 ) ! chase the shift down do j = 1, ns-1-i call stdlib${ii}$_${ri}$laqz2( .true., .true., j, 1_${ik}$, ns, ihi-ilo+1, a( ilo,ilo ), lda, b( & ilo, ilo ), ldb, ns+1, 1_${ik}$, qc,ldqc, ns, 1_${ik}$, zc, ldzc ) end do end do ! update the rest of the pencil ! update a(ilo:ilo+ns,ilo+ns:istopm) and b(ilo:ilo+ns,ilo+ns:istopm) ! from the left with qc(1:ns+1,1:ns+1)' sheight = ns+1 swidth = istopm-( ilo+ns )+1_${ik}$ if ( swidth > 0_${ik}$ ) then call stdlib${ii}$_${ri}$gemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,a( ilo, ilo+ns & ), lda, zero, work, sheight ) call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight, a( ilo,ilo+ns ), lda ) call stdlib${ii}$_${ri}$gemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,b( ilo, ilo+ns & ), ldb, zero, work, sheight ) call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight, b( ilo,ilo+ns ), ldb ) end if if ( ilq ) then call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, sheight, sheight, one, q( 1_${ik}$, ilo ),ldq, qc, ldqc, & zero, work, n ) call stdlib${ii}$_${ri}$lacpy( 'ALL', n, sheight, work, n, q( 1_${ik}$, ilo ), ldq ) end if ! update a(istartm:ilo-1,ilo:ilo+ns-1) and b(istartm:ilo-1,ilo:ilo+ns-1) ! from the right with zc(1:ns,1:ns) sheight = ilo-1-istartm+1 swidth = ns if ( sheight > 0_${ik}$ ) then call stdlib${ii}$_${ri}$gemm( 'N', 'N', sheight, swidth, swidth, one, a( istartm,ilo ), lda, & zc, ldzc, zero, work, sheight ) call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ilo ), lda ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', sheight, swidth, swidth, one, b( istartm,ilo ), ldb, & zc, ldzc, zero, work, sheight ) call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ilo ), ldb ) end if if ( ilz ) then call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, swidth, swidth, one, z( 1_${ik}$, ilo ), ldz,zc, ldzc, & zero, work, n ) call stdlib${ii}$_${ri}$lacpy( 'ALL', n, swidth, work, n, z( 1_${ik}$, ilo ), ldz ) end if ! the following block chases the shifts down to the bottom ! right block. if possible, a shift is moved down npos ! positions at a time k = ilo do while ( k < ihi-ns ) np = min( ihi-ns-k, npos ) ! size of the near-the-diagonal block nblock = ns+np ! istartb points to the first row we will be updating istartb = k+1 ! istopb points to the last column we will be updating istopb = k+nblock-1 call stdlib${ii}$_${ri}$laset( 'FULL', ns+np, ns+np, zero, one, qc, ldqc ) call stdlib${ii}$_${ri}$laset( 'FULL', ns+np, ns+np, zero, one, zc, ldzc ) ! near the diagonal shift chase do i = ns-1, 0, -2 do j = 0, np-1 ! move down the block with index k+i+j-1, updating ! the (ns+np x ns+np) block: ! (k:k+ns+np,k:k+ns+np-1) call stdlib${ii}$_${ri}$laqz2( .true., .true., k+i+j-1, istartb, istopb,ihi, a, lda, b, & ldb, nblock, k+1, qc, ldqc,nblock, k, zc, ldzc ) end do end do ! update rest of the pencil ! update a(k+1:k+ns+np, k+ns+np:istopm) and ! b(k+1:k+ns+np, k+ns+np:istopm) ! from the left with qc(1:ns+np,1:ns+np)' sheight = ns+np swidth = istopm-( k+ns+np )+1_${ik}$ if ( swidth > 0_${ik}$ ) then call stdlib${ii}$_${ri}$gemm( 'T', 'N', sheight, swidth, sheight, one, qc,ldqc, a( k+1, k+& ns+np ), lda, zero, work,sheight ) call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight, a( k+1,k+ns+np ), lda & ) call stdlib${ii}$_${ri}$gemm( 'T', 'N', sheight, swidth, sheight, one, qc,ldqc, b( k+1, k+& ns+np ), ldb, zero, work,sheight ) call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight, b( k+1,k+ns+np ), ldb & ) end if if ( ilq ) then call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, nblock, nblock, one, q( 1_${ik}$, k+1 ),ldq, qc, ldqc, & zero, work, n ) call stdlib${ii}$_${ri}$lacpy( 'ALL', n, nblock, work, n, q( 1_${ik}$, k+1 ), ldq ) end if ! update a(istartm:k,k:k+ns+npos-1) and b(istartm:k,k:k+ns+npos-1) ! from the right with zc(1:ns+np,1:ns+np) sheight = k-istartm+1 swidth = nblock if ( sheight > 0_${ik}$ ) then call stdlib${ii}$_${ri}$gemm( 'N', 'N', sheight, swidth, swidth, one,a( istartm, k ), lda, & zc, ldzc, zero, work,sheight ) call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight,a( istartm, k ), lda ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', sheight, swidth, swidth, one,b( istartm, k ), ldb, & zc, ldzc, zero, work,sheight ) call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight,b( istartm, k ), ldb ) end if if ( ilz ) then call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, nblock, nblock, one, z( 1_${ik}$, k ),ldz, zc, ldzc, & zero, work, n ) call stdlib${ii}$_${ri}$lacpy( 'ALL', n, nblock, work, n, z( 1_${ik}$, k ), ldz ) end if k = k+np end do ! the following block removes the shifts from the bottom right corner ! one by one. updates are initially applied to a(ihi-ns+1:ihi,ihi-ns:ihi). call stdlib${ii}$_${ri}$laset( 'FULL', ns, ns, zero, one, qc, ldqc ) call stdlib${ii}$_${ri}$laset( 'FULL', ns+1, ns+1, zero, one, zc, ldzc ) ! istartb points to the first row we will be updating istartb = ihi-ns+1 ! istopb points to the last column we will be updating istopb = ihi do i = 1, ns, 2 ! chase the shift down to the bottom right corner do ishift = ihi-i-1, ihi-2 call stdlib${ii}$_${ri}$laqz2( .true., .true., ishift, istartb, istopb, ihi,a, lda, b, ldb, & ns, ihi-ns+1, qc, ldqc, ns+1,ihi-ns, zc, ldzc ) end do end do ! update rest of the pencil ! update a(ihi-ns+1:ihi, ihi+1:istopm) ! from the left with qc(1:ns,1:ns)' sheight = ns swidth = istopm-( ihi+1 )+1_${ik}$ if ( swidth > 0_${ik}$ ) then call stdlib${ii}$_${ri}$gemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,a( ihi-ns+1, & ihi+1 ), lda, zero, work, sheight ) call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight,a( ihi-ns+1, ihi+1 ), lda & ) call stdlib${ii}$_${ri}$gemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,b( ihi-ns+1, & ihi+1 ), ldb, zero, work, sheight ) call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight,b( ihi-ns+1, ihi+1 ), ldb & ) end if if ( ilq ) then call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, ns, ns, one, q( 1_${ik}$, ihi-ns+1 ), ldq,qc, ldqc, zero, & work, n ) call stdlib${ii}$_${ri}$lacpy( 'ALL', n, ns, work, n, q( 1_${ik}$, ihi-ns+1 ), ldq ) end if ! update a(istartm:ihi-ns,ihi-ns:ihi) ! from the right with zc(1:ns+1,1:ns+1) sheight = ihi-ns-istartm+1 swidth = ns+1 if ( sheight > 0_${ik}$ ) then call stdlib${ii}$_${ri}$gemm( 'N', 'N', sheight, swidth, swidth, one, a( istartm,ihi-ns ), lda,& zc, ldzc, zero, work, sheight ) call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ihi-ns ), lda & ) call stdlib${ii}$_${ri}$gemm( 'N', 'N', sheight, swidth, swidth, one, b( istartm,ihi-ns ), ldb,& zc, ldzc, zero, work, sheight ) call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ihi-ns ), ldb & ) end if if ( ilz ) then call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, ns+1, ns+1, one, z( 1_${ik}$, ihi-ns ), ldz,zc, ldzc, zero,& work, n ) call stdlib${ii}$_${ri}$lacpy( 'ALL', n, ns+1, work, n, z( 1_${ik}$, ihi-ns ), ldz ) end if end subroutine stdlib${ii}$_${ri}$laqz4 #:endif #:endfor #:endfor end submodule stdlib_lapack_eigv_gen3