stdlib_lapack_eigv_gen3.fypp Source File


Source Code

#:include "common.fypp" 
submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_eigv_gen3
  implicit none


  contains
#:for ik,it,ii in LINALG_INT_KINDS_TYPES

     module subroutine stdlib${ii}$_slaqtr( ltran, lreal, n, t, ldt, b, w, scale, x, work,info )
     !! SLAQTR solves the real quasi-triangular system
     !! op(T)*p = scale*c,               if LREAL = .TRUE.
     !! or the complex quasi-triangular systems
     !! op(T + iB)*(p+iq) = scale*(c+id),  if LREAL = .FALSE.
     !! in real arithmetic, where T is upper quasi-triangular.
     !! If LREAL = .FALSE., then the first diagonal block of T must be
     !! 1 by 1, B is the specially structured matrix
     !! B = [ b(1) b(2) ... b(n) ]
     !! [       w            ]
     !! [           w        ]
     !! [              .     ]
     !! [                 w  ]
     !! op(A) = A or A**T, A**T denotes the transpose of
     !! matrix A.
     !! On input, X = [ c ].  On output, X = [ p ].
     !! [ d ]                  [ q ]
     !! This subroutine is designed for the condition number estimation
     !! in routine STRSNA.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: lreal, ltran
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ldt, n
           real(sp), intent(out) :: scale
           real(sp), intent(in) :: w
           ! Array Arguments 
           real(sp), intent(in) :: b(*), t(ldt,*)
           real(sp), intent(out) :: work(*)
           real(sp), intent(inout) :: x(*)
       ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: notran
           integer(${ik}$) :: i, ierr, j, j1, j2, jnext, k, n1, n2
           real(sp) :: bignum, eps, rec, scaloc, si, smin, sminw, smlnum, sr, tjj, tmp, xj, xmax, &
                     xnorm, z
           ! Local Arrays 
           real(sp) :: d(2_${ik}$,2_${ik}$), v(2_${ik}$,2_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! do not test the input parameters for errors
           notran = .not.ltran
           info = 0_${ik}$
           ! quick return if possible
           if( n==0 )return
           ! set constants to control overflow
           eps = stdlib${ii}$_slamch( 'P' )
           smlnum = stdlib${ii}$_slamch( 'S' ) / eps
           bignum = one / smlnum
           xnorm = stdlib${ii}$_slange( 'M', n, n, t, ldt, d )
           if( .not.lreal )xnorm = max( xnorm, abs( w ), stdlib${ii}$_slange( 'M', n, 1_${ik}$, b, n, d ) )
                     
           smin = max( smlnum, eps*xnorm )
           ! compute 1-norm of each column of strictly upper triangular
           ! part of t to control overflow in triangular solver.
           work( 1_${ik}$ ) = zero
           do j = 2, n
              work( j ) = stdlib${ii}$_sasum( j-1, t( 1_${ik}$, j ), 1_${ik}$ )
           end do
           if( .not.lreal ) then
              do i = 2, n
                 work( i ) = work( i ) + abs( b( i ) )
              end do
           end if
           n2 = 2_${ik}$*n
           n1 = n
           if( .not.lreal )n1 = n2
           k = stdlib${ii}$_isamax( n1, x, 1_${ik}$ )
           xmax = abs( x( k ) )
           scale = one
           if( xmax>bignum ) then
              scale = bignum / xmax
              call stdlib${ii}$_sscal( n1, scale, x, 1_${ik}$ )
              xmax = bignum
           end if
           if( lreal ) then
              if( notran ) then
                 ! solve t*p = scale*c
                 jnext = n
                 loop_30: do j = n, 1, -1
                    if( j>jnext )cycle loop_30
                    j1 = j
                    j2 = j
                    jnext = j - 1_${ik}$
                    if( j>1_${ik}$ ) then
                       if( t( j, j-1 )/=zero ) then
                          j1 = j - 1_${ik}$
                          jnext = j - 2_${ik}$
                       end if
                    end if
                    if( j1==j2 ) then
                       ! meet 1 by 1 diagonal block
                       ! scale to avoid overflow when computing
                           ! x(j) = b(j)/t(j,j)
                       xj = abs( x( j1 ) )
                       tjj = abs( t( j1, j1 ) )
                       tmp = t( j1, j1 )
                       if( tjj<smin ) then
                          tmp = smin
                          tjj = smin
                          info = 1_${ik}$
                       end if
                       if( xj==zero )cycle loop_30
                       if( tjj<one ) then
                          if( xj>bignum*tjj ) then
                             rec = one / xj
                             call stdlib${ii}$_sscal( n, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       x( j1 ) = x( j1 ) / tmp
                       xj = abs( x( j1 ) )
                       ! scale x if necessary to avoid overflow when adding a
                       ! multiple of column j1 of t.
                       if( xj>one ) then
                          rec = one / xj
                          if( work( j1 )>( bignum-xmax )*rec ) then
                             call stdlib${ii}$_sscal( n, rec, x, 1_${ik}$ )
                             scale = scale*rec
                          end if
                       end if
                       if( j1>1_${ik}$ ) then
                          call stdlib${ii}$_saxpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ )
                          k = stdlib${ii}$_isamax( j1-1, x, 1_${ik}$ )
                          xmax = abs( x( k ) )
                       end if
                    else
                       ! meet 2 by 2 diagonal block
                       ! call 2 by 2 linear system solve, to take
                       ! care of possible overflow by scaling factor.
                       d( 1_${ik}$, 1_${ik}$ ) = x( j1 )
                       d( 2_${ik}$, 1_${ik}$ ) = x( j2 )
                       call stdlib${ii}$_slaln2( .false., 2_${ik}$, 1_${ik}$, smin, one, t( j1, j1 ),ldt, one, one, d,&
                                  2_${ik}$, zero, zero, v, 2_${ik}$,scaloc, xnorm, ierr )
                       if( ierr/=0_${ik}$ )info = 2_${ik}$
                       if( scaloc/=one ) then
                          call stdlib${ii}$_sscal( n, scaloc, x, 1_${ik}$ )
                          scale = scale*scaloc
                       end if
                       x( j1 ) = v( 1_${ik}$, 1_${ik}$ )
                       x( j2 ) = v( 2_${ik}$, 1_${ik}$ )
                       ! scale v(1,1) (= x(j1)) and/or v(2,1) (=x(j2))
                       ! to avoid overflow in updating right-hand side.
                       xj = max( abs( v( 1_${ik}$, 1_${ik}$ ) ), abs( v( 2_${ik}$, 1_${ik}$ ) ) )
                       if( xj>one ) then
                          rec = one / xj
                          if( max( work( j1 ), work( j2 ) )>( bignum-xmax )*rec ) then
                             call stdlib${ii}$_sscal( n, rec, x, 1_${ik}$ )
                             scale = scale*rec
                          end if
                       end if
                       ! update right-hand side
                       if( j1>1_${ik}$ ) then
                          call stdlib${ii}$_saxpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ )
                          call stdlib${ii}$_saxpy( j1-1, -x( j2 ), t( 1_${ik}$, j2 ), 1_${ik}$, x, 1_${ik}$ )
                          k = stdlib${ii}$_isamax( j1-1, x, 1_${ik}$ )
                          xmax = abs( x( k ) )
                       end if
                    end if
                 end do loop_30
              else
                 ! solve t**t*p = scale*c
                 jnext = 1_${ik}$
                 loop_40: do j = 1, n
                    if( j<jnext )cycle loop_40
                    j1 = j
                    j2 = j
                    jnext = j + 1_${ik}$
                    if( j<n ) then
                       if( t( j+1, j )/=zero ) then
                          j2 = j + 1_${ik}$
                          jnext = j + 2_${ik}$
                       end if
                    end if
                    if( j1==j2 ) then
                       ! 1 by 1 diagonal block
                       ! scale if necessary to avoid overflow in forming the
                       ! right-hand side element by inner product.
                       xj = abs( x( j1 ) )
                       if( xmax>one ) then
                          rec = one / xmax
                          if( work( j1 )>( bignum-xj )*rec ) then
                             call stdlib${ii}$_sscal( n, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       x( j1 ) = x( j1 ) - stdlib${ii}$_sdot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ )
                       xj = abs( x( j1 ) )
                       tjj = abs( t( j1, j1 ) )
                       tmp = t( j1, j1 )
                       if( tjj<smin ) then
                          tmp = smin
                          tjj = smin
                          info = 1_${ik}$
                       end if
                       if( tjj<one ) then
                          if( xj>bignum*tjj ) then
                             rec = one / xj
                             call stdlib${ii}$_sscal( n, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       x( j1 ) = x( j1 ) / tmp
                       xmax = max( xmax, abs( x( j1 ) ) )
                    else
                       ! 2 by 2 diagonal block
                       ! scale if necessary to avoid overflow in forming the
                       ! right-hand side elements by inner product.
                       xj = max( abs( x( j1 ) ), abs( x( j2 ) ) )
                       if( xmax>one ) then
                          rec = one / xmax
                          if( max( work( j2 ), work( j1 ) )>( bignum-xj )*rec ) then
                             call stdlib${ii}$_sscal( n, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       d( 1_${ik}$, 1_${ik}$ ) = x( j1 ) - stdlib${ii}$_sdot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x,1_${ik}$ )
                       d( 2_${ik}$, 1_${ik}$ ) = x( j2 ) - stdlib${ii}$_sdot( j1-1, t( 1_${ik}$, j2 ), 1_${ik}$, x,1_${ik}$ )
                       call stdlib${ii}$_slaln2( .true., 2_${ik}$, 1_${ik}$, smin, one, t( j1, j1 ),ldt, one, one, d, &
                                 2_${ik}$, zero, zero, v, 2_${ik}$,scaloc, xnorm, ierr )
                       if( ierr/=0_${ik}$ )info = 2_${ik}$
                       if( scaloc/=one ) then
                          call stdlib${ii}$_sscal( n, scaloc, x, 1_${ik}$ )
                          scale = scale*scaloc
                       end if
                       x( j1 ) = v( 1_${ik}$, 1_${ik}$ )
                       x( j2 ) = v( 2_${ik}$, 1_${ik}$ )
                       xmax = max( abs( x( j1 ) ), abs( x( j2 ) ), xmax )
                    end if
                 end do loop_40
              end if
           else
              sminw = max( eps*abs( w ), smin )
              if( notran ) then
                 ! solve (t + ib)*(p+iq) = c+id
                 jnext = n
                 loop_70: do j = n, 1, -1
                    if( j>jnext )cycle loop_70
                    j1 = j
                    j2 = j
                    jnext = j - 1_${ik}$
                    if( j>1_${ik}$ ) then
                       if( t( j, j-1 )/=zero ) then
                          j1 = j - 1_${ik}$
                          jnext = j - 2_${ik}$
                       end if
                    end if
                    if( j1==j2 ) then
                       ! 1 by 1 diagonal block
                       ! scale if necessary to avoid overflow in division
                       z = w
                       if( j1==1_${ik}$ )z = b( 1_${ik}$ )
                       xj = abs( x( j1 ) ) + abs( x( n+j1 ) )
                       tjj = abs( t( j1, j1 ) ) + abs( z )
                       tmp = t( j1, j1 )
                       if( tjj<sminw ) then
                          tmp = sminw
                          tjj = sminw
                          info = 1_${ik}$
                       end if
                       if( xj==zero )cycle loop_70
                       if( tjj<one ) then
                          if( xj>bignum*tjj ) then
                             rec = one / xj
                             call stdlib${ii}$_sscal( n2, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       call stdlib${ii}$_sladiv( x( j1 ), x( n+j1 ), tmp, z, sr, si )
                       x( j1 ) = sr
                       x( n+j1 ) = si
                       xj = abs( x( j1 ) ) + abs( x( n+j1 ) )
                       ! scale x if necessary to avoid overflow when adding a
                       ! multiple of column j1 of t.
                       if( xj>one ) then
                          rec = one / xj
                          if( work( j1 )>( bignum-xmax )*rec ) then
                             call stdlib${ii}$_sscal( n2, rec, x, 1_${ik}$ )
                             scale = scale*rec
                          end if
                       end if
                       if( j1>1_${ik}$ ) then
                          call stdlib${ii}$_saxpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ )
                          call stdlib${ii}$_saxpy( j1-1, -x( n+j1 ), t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ )
                          x( 1_${ik}$ ) = x( 1_${ik}$ ) + b( j1 )*x( n+j1 )
                          x( n+1 ) = x( n+1 ) - b( j1 )*x( j1 )
                          xmax = zero
                          do k = 1, j1 - 1
                             xmax = max( xmax, abs( x( k ) )+abs( x( k+n ) ) )
                          end do
                       end if
                    else
                       ! meet 2 by 2 diagonal block
                       d( 1_${ik}$, 1_${ik}$ ) = x( j1 )
                       d( 2_${ik}$, 1_${ik}$ ) = x( j2 )
                       d( 1_${ik}$, 2_${ik}$ ) = x( n+j1 )
                       d( 2_${ik}$, 2_${ik}$ ) = x( n+j2 )
                       call stdlib${ii}$_slaln2( .false., 2_${ik}$, 2_${ik}$, sminw, one, t( j1, j1 ),ldt, one, one, &
                                 d, 2_${ik}$, zero, -w, v, 2_${ik}$,scaloc, xnorm, ierr )
                       if( ierr/=0_${ik}$ )info = 2_${ik}$
                       if( scaloc/=one ) then
                          call stdlib${ii}$_sscal( 2_${ik}$*n, scaloc, x, 1_${ik}$ )
                          scale = scaloc*scale
                       end if
                       x( j1 ) = v( 1_${ik}$, 1_${ik}$ )
                       x( j2 ) = v( 2_${ik}$, 1_${ik}$ )
                       x( n+j1 ) = v( 1_${ik}$, 2_${ik}$ )
                       x( n+j2 ) = v( 2_${ik}$, 2_${ik}$ )
                       ! scale x(j1), .... to avoid overflow in
                       ! updating right hand side.
                       xj = max( abs( v( 1_${ik}$, 1_${ik}$ ) )+abs( v( 1_${ik}$, 2_${ik}$ ) ),abs( v( 2_${ik}$, 1_${ik}$ ) )+abs( v( 2_${ik}$, 2_${ik}$ )&
                                  ) )
                       if( xj>one ) then
                          rec = one / xj
                          if( max( work( j1 ), work( j2 ) )>( bignum-xmax )*rec ) then
                             call stdlib${ii}$_sscal( n2, rec, x, 1_${ik}$ )
                             scale = scale*rec
                          end if
                       end if
                       ! update the right-hand side.
                       if( j1>1_${ik}$ ) then
                          call stdlib${ii}$_saxpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ )
                          call stdlib${ii}$_saxpy( j1-1, -x( j2 ), t( 1_${ik}$, j2 ), 1_${ik}$, x, 1_${ik}$ )
                          call stdlib${ii}$_saxpy( j1-1, -x( n+j1 ), t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ )
                          call stdlib${ii}$_saxpy( j1-1, -x( n+j2 ), t( 1_${ik}$, j2 ), 1_${ik}$,x( n+1 ), 1_${ik}$ )
                          x( 1_${ik}$ ) = x( 1_${ik}$ ) + b( j1 )*x( n+j1 ) +b( j2 )*x( n+j2 )
                          x( n+1 ) = x( n+1 ) - b( j1 )*x( j1 ) -b( j2 )*x( j2 )
                          xmax = zero
                          do k = 1, j1 - 1
                             xmax = max( abs( x( k ) )+abs( x( k+n ) ),xmax )
                          end do
                       end if
                    end if
                 end do loop_70
              else
                 ! solve (t + ib)**t*(p+iq) = c+id
                 jnext = 1_${ik}$
                 loop_80: do j = 1, n
                    if( j<jnext )cycle loop_80
                    j1 = j
                    j2 = j
                    jnext = j + 1_${ik}$
                    if( j<n ) then
                       if( t( j+1, j )/=zero ) then
                          j2 = j + 1_${ik}$
                          jnext = j + 2_${ik}$
                       end if
                    end if
                    if( j1==j2 ) then
                       ! 1 by 1 diagonal block
                       ! scale if necessary to avoid overflow in forming the
                       ! right-hand side element by inner product.
                       xj = abs( x( j1 ) ) + abs( x( j1+n ) )
                       if( xmax>one ) then
                          rec = one / xmax
                          if( work( j1 )>( bignum-xj )*rec ) then
                             call stdlib${ii}$_sscal( n2, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       x( j1 ) = x( j1 ) - stdlib${ii}$_sdot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ )
                       x( n+j1 ) = x( n+j1 ) - stdlib${ii}$_sdot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ )
                                 
                       if( j1>1_${ik}$ ) then
                          x( j1 ) = x( j1 ) - b( j1 )*x( n+1 )
                          x( n+j1 ) = x( n+j1 ) + b( j1 )*x( 1_${ik}$ )
                       end if
                       xj = abs( x( j1 ) ) + abs( x( j1+n ) )
                       z = w
                       if( j1==1_${ik}$ )z = b( 1_${ik}$ )
                       ! scale if necessary to avoid overflow in
                       ! complex division
                       tjj = abs( t( j1, j1 ) ) + abs( z )
                       tmp = t( j1, j1 )
                       if( tjj<sminw ) then
                          tmp = sminw
                          tjj = sminw
                          info = 1_${ik}$
                       end if
                       if( tjj<one ) then
                          if( xj>bignum*tjj ) then
                             rec = one / xj
                             call stdlib${ii}$_sscal( n2, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       call stdlib${ii}$_sladiv( x( j1 ), x( n+j1 ), tmp, -z, sr, si )
                       x( j1 ) = sr
                       x( j1+n ) = si
                       xmax = max( abs( x( j1 ) )+abs( x( j1+n ) ), xmax )
                    else
                       ! 2 by 2 diagonal block
                       ! scale if necessary to avoid overflow in forming the
                       ! right-hand side element by inner product.
                       xj = max( abs( x( j1 ) )+abs( x( n+j1 ) ),abs( x( j2 ) )+abs( x( n+j2 ) ) )
                                 
                       if( xmax>one ) then
                          rec = one / xmax
                          if( max( work( j1 ), work( j2 ) )>( bignum-xj ) / xmax ) then
                             call stdlib${ii}$_sscal( n2, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       d( 1_${ik}$, 1_${ik}$ ) = x( j1 ) - stdlib${ii}$_sdot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x,1_${ik}$ )
                       d( 2_${ik}$, 1_${ik}$ ) = x( j2 ) - stdlib${ii}$_sdot( j1-1, t( 1_${ik}$, j2 ), 1_${ik}$, x,1_${ik}$ )
                       d( 1_${ik}$, 2_${ik}$ ) = x( n+j1 ) - stdlib${ii}$_sdot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ )
                                 
                       d( 2_${ik}$, 2_${ik}$ ) = x( n+j2 ) - stdlib${ii}$_sdot( j1-1, t( 1_${ik}$, j2 ), 1_${ik}$,x( n+1 ), 1_${ik}$ )
                                 
                       d( 1_${ik}$, 1_${ik}$ ) = d( 1_${ik}$, 1_${ik}$ ) - b( j1 )*x( n+1 )
                       d( 2_${ik}$, 1_${ik}$ ) = d( 2_${ik}$, 1_${ik}$ ) - b( j2 )*x( n+1 )
                       d( 1_${ik}$, 2_${ik}$ ) = d( 1_${ik}$, 2_${ik}$ ) + b( j1 )*x( 1_${ik}$ )
                       d( 2_${ik}$, 2_${ik}$ ) = d( 2_${ik}$, 2_${ik}$ ) + b( j2 )*x( 1_${ik}$ )
                       call stdlib${ii}$_slaln2( .true., 2_${ik}$, 2_${ik}$, sminw, one, t( j1, j1 ),ldt, one, one, d,&
                                  2_${ik}$, zero, w, v, 2_${ik}$,scaloc, xnorm, ierr )
                       if( ierr/=0_${ik}$ )info = 2_${ik}$
                       if( scaloc/=one ) then
                          call stdlib${ii}$_sscal( n2, scaloc, x, 1_${ik}$ )
                          scale = scaloc*scale
                       end if
                       x( j1 ) = v( 1_${ik}$, 1_${ik}$ )
                       x( j2 ) = v( 2_${ik}$, 1_${ik}$ )
                       x( n+j1 ) = v( 1_${ik}$, 2_${ik}$ )
                       x( n+j2 ) = v( 2_${ik}$, 2_${ik}$ )
                       xmax = max( abs( x( j1 ) )+abs( x( n+j1 ) ),abs( x( j2 ) )+abs( x( n+j2 ) )&
                                 , xmax )
                    end if
                 end do loop_80
              end if
           end if
           return
     end subroutine stdlib${ii}$_slaqtr

     module subroutine stdlib${ii}$_dlaqtr( ltran, lreal, n, t, ldt, b, w, scale, x, work,info )
     !! DLAQTR solves the real quasi-triangular system
     !! op(T)*p = scale*c,               if LREAL = .TRUE.
     !! or the complex quasi-triangular systems
     !! op(T + iB)*(p+iq) = scale*(c+id),  if LREAL = .FALSE.
     !! in real arithmetic, where T is upper quasi-triangular.
     !! If LREAL = .FALSE., then the first diagonal block of T must be
     !! 1 by 1, B is the specially structured matrix
     !! B = [ b(1) b(2) ... b(n) ]
     !! [       w            ]
     !! [           w        ]
     !! [              .     ]
     !! [                 w  ]
     !! op(A) = A or A**T, A**T denotes the transpose of
     !! matrix A.
     !! On input, X = [ c ].  On output, X = [ p ].
     !! [ d ]                  [ q ]
     !! This subroutine is designed for the condition number estimation
     !! in routine DTRSNA.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: lreal, ltran
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ldt, n
           real(dp), intent(out) :: scale
           real(dp), intent(in) :: w
           ! Array Arguments 
           real(dp), intent(in) :: b(*), t(ldt,*)
           real(dp), intent(out) :: work(*)
           real(dp), intent(inout) :: x(*)
       ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: notran
           integer(${ik}$) :: i, ierr, j, j1, j2, jnext, k, n1, n2
           real(dp) :: bignum, eps, rec, scaloc, si, smin, sminw, smlnum, sr, tjj, tmp, xj, xmax, &
                     xnorm, z
           ! Local Arrays 
           real(dp) :: d(2_${ik}$,2_${ik}$), v(2_${ik}$,2_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! do not test the input parameters for errors
           notran = .not.ltran
           info = 0_${ik}$
           ! quick return if possible
           if( n==0 )return
           ! set constants to control overflow
           eps = stdlib${ii}$_dlamch( 'P' )
           smlnum = stdlib${ii}$_dlamch( 'S' ) / eps
           bignum = one / smlnum
           xnorm = stdlib${ii}$_dlange( 'M', n, n, t, ldt, d )
           if( .not.lreal )xnorm = max( xnorm, abs( w ), stdlib${ii}$_dlange( 'M', n, 1_${ik}$, b, n, d ) )
                     
           smin = max( smlnum, eps*xnorm )
           ! compute 1-norm of each column of strictly upper triangular
           ! part of t to control overflow in triangular solver.
           work( 1_${ik}$ ) = zero
           do j = 2, n
              work( j ) = stdlib${ii}$_dasum( j-1, t( 1_${ik}$, j ), 1_${ik}$ )
           end do
           if( .not.lreal ) then
              do i = 2, n
                 work( i ) = work( i ) + abs( b( i ) )
              end do
           end if
           n2 = 2_${ik}$*n
           n1 = n
           if( .not.lreal )n1 = n2
           k = stdlib${ii}$_idamax( n1, x, 1_${ik}$ )
           xmax = abs( x( k ) )
           scale = one
           if( xmax>bignum ) then
              scale = bignum / xmax
              call stdlib${ii}$_dscal( n1, scale, x, 1_${ik}$ )
              xmax = bignum
           end if
           if( lreal ) then
              if( notran ) then
                 ! solve t*p = scale*c
                 jnext = n
                 loop_30: do j = n, 1, -1
                    if( j>jnext )cycle loop_30
                    j1 = j
                    j2 = j
                    jnext = j - 1_${ik}$
                    if( j>1_${ik}$ ) then
                       if( t( j, j-1 )/=zero ) then
                          j1 = j - 1_${ik}$
                          jnext = j - 2_${ik}$
                       end if
                    end if
                    if( j1==j2 ) then
                       ! meet 1 by 1 diagonal block
                       ! scale to avoid overflow when computing
                           ! x(j) = b(j)/t(j,j)
                       xj = abs( x( j1 ) )
                       tjj = abs( t( j1, j1 ) )
                       tmp = t( j1, j1 )
                       if( tjj<smin ) then
                          tmp = smin
                          tjj = smin
                          info = 1_${ik}$
                       end if
                       if( xj==zero )cycle loop_30
                       if( tjj<one ) then
                          if( xj>bignum*tjj ) then
                             rec = one / xj
                             call stdlib${ii}$_dscal( n, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       x( j1 ) = x( j1 ) / tmp
                       xj = abs( x( j1 ) )
                       ! scale x if necessary to avoid overflow when adding a
                       ! multiple of column j1 of t.
                       if( xj>one ) then
                          rec = one / xj
                          if( work( j1 )>( bignum-xmax )*rec ) then
                             call stdlib${ii}$_dscal( n, rec, x, 1_${ik}$ )
                             scale = scale*rec
                          end if
                       end if
                       if( j1>1_${ik}$ ) then
                          call stdlib${ii}$_daxpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ )
                          k = stdlib${ii}$_idamax( j1-1, x, 1_${ik}$ )
                          xmax = abs( x( k ) )
                       end if
                    else
                       ! meet 2 by 2 diagonal block
                       ! call 2 by 2 linear system solve, to take
                       ! care of possible overflow by scaling factor.
                       d( 1_${ik}$, 1_${ik}$ ) = x( j1 )
                       d( 2_${ik}$, 1_${ik}$ ) = x( j2 )
                       call stdlib${ii}$_dlaln2( .false., 2_${ik}$, 1_${ik}$, smin, one, t( j1, j1 ),ldt, one, one, d,&
                                  2_${ik}$, zero, zero, v, 2_${ik}$,scaloc, xnorm, ierr )
                       if( ierr/=0_${ik}$ )info = 2_${ik}$
                       if( scaloc/=one ) then
                          call stdlib${ii}$_dscal( n, scaloc, x, 1_${ik}$ )
                          scale = scale*scaloc
                       end if
                       x( j1 ) = v( 1_${ik}$, 1_${ik}$ )
                       x( j2 ) = v( 2_${ik}$, 1_${ik}$ )
                       ! scale v(1,1) (= x(j1)) and/or v(2,1) (=x(j2))
                       ! to avoid overflow in updating right-hand side.
                       xj = max( abs( v( 1_${ik}$, 1_${ik}$ ) ), abs( v( 2_${ik}$, 1_${ik}$ ) ) )
                       if( xj>one ) then
                          rec = one / xj
                          if( max( work( j1 ), work( j2 ) )>( bignum-xmax )*rec ) then
                             call stdlib${ii}$_dscal( n, rec, x, 1_${ik}$ )
                             scale = scale*rec
                          end if
                       end if
                       ! update right-hand side
                       if( j1>1_${ik}$ ) then
                          call stdlib${ii}$_daxpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ )
                          call stdlib${ii}$_daxpy( j1-1, -x( j2 ), t( 1_${ik}$, j2 ), 1_${ik}$, x, 1_${ik}$ )
                          k = stdlib${ii}$_idamax( j1-1, x, 1_${ik}$ )
                          xmax = abs( x( k ) )
                       end if
                    end if
                 end do loop_30
              else
                 ! solve t**t*p = scale*c
                 jnext = 1_${ik}$
                 loop_40: do j = 1, n
                    if( j<jnext )cycle loop_40
                    j1 = j
                    j2 = j
                    jnext = j + 1_${ik}$
                    if( j<n ) then
                       if( t( j+1, j )/=zero ) then
                          j2 = j + 1_${ik}$
                          jnext = j + 2_${ik}$
                       end if
                    end if
                    if( j1==j2 ) then
                       ! 1 by 1 diagonal block
                       ! scale if necessary to avoid overflow in forming the
                       ! right-hand side element by inner product.
                       xj = abs( x( j1 ) )
                       if( xmax>one ) then
                          rec = one / xmax
                          if( work( j1 )>( bignum-xj )*rec ) then
                             call stdlib${ii}$_dscal( n, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       x( j1 ) = x( j1 ) - stdlib${ii}$_ddot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ )
                       xj = abs( x( j1 ) )
                       tjj = abs( t( j1, j1 ) )
                       tmp = t( j1, j1 )
                       if( tjj<smin ) then
                          tmp = smin
                          tjj = smin
                          info = 1_${ik}$
                       end if
                       if( tjj<one ) then
                          if( xj>bignum*tjj ) then
                             rec = one / xj
                             call stdlib${ii}$_dscal( n, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       x( j1 ) = x( j1 ) / tmp
                       xmax = max( xmax, abs( x( j1 ) ) )
                    else
                       ! 2 by 2 diagonal block
                       ! scale if necessary to avoid overflow in forming the
                       ! right-hand side elements by inner product.
                       xj = max( abs( x( j1 ) ), abs( x( j2 ) ) )
                       if( xmax>one ) then
                          rec = one / xmax
                          if( max( work( j2 ), work( j1 ) )>( bignum-xj )*rec ) then
                             call stdlib${ii}$_dscal( n, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       d( 1_${ik}$, 1_${ik}$ ) = x( j1 ) - stdlib${ii}$_ddot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x,1_${ik}$ )
                       d( 2_${ik}$, 1_${ik}$ ) = x( j2 ) - stdlib${ii}$_ddot( j1-1, t( 1_${ik}$, j2 ), 1_${ik}$, x,1_${ik}$ )
                       call stdlib${ii}$_dlaln2( .true., 2_${ik}$, 1_${ik}$, smin, one, t( j1, j1 ),ldt, one, one, d, &
                                 2_${ik}$, zero, zero, v, 2_${ik}$,scaloc, xnorm, ierr )
                       if( ierr/=0_${ik}$ )info = 2_${ik}$
                       if( scaloc/=one ) then
                          call stdlib${ii}$_dscal( n, scaloc, x, 1_${ik}$ )
                          scale = scale*scaloc
                       end if
                       x( j1 ) = v( 1_${ik}$, 1_${ik}$ )
                       x( j2 ) = v( 2_${ik}$, 1_${ik}$ )
                       xmax = max( abs( x( j1 ) ), abs( x( j2 ) ), xmax )
                    end if
                 end do loop_40
              end if
           else
              sminw = max( eps*abs( w ), smin )
              if( notran ) then
                 ! solve (t + ib)*(p+iq) = c+id
                 jnext = n
                 loop_70: do j = n, 1, -1
                    if( j>jnext )cycle loop_70
                    j1 = j
                    j2 = j
                    jnext = j - 1_${ik}$
                    if( j>1_${ik}$ ) then
                       if( t( j, j-1 )/=zero ) then
                          j1 = j - 1_${ik}$
                          jnext = j - 2_${ik}$
                       end if
                    end if
                    if( j1==j2 ) then
                       ! 1 by 1 diagonal block
                       ! scale if necessary to avoid overflow in division
                       z = w
                       if( j1==1_${ik}$ )z = b( 1_${ik}$ )
                       xj = abs( x( j1 ) ) + abs( x( n+j1 ) )
                       tjj = abs( t( j1, j1 ) ) + abs( z )
                       tmp = t( j1, j1 )
                       if( tjj<sminw ) then
                          tmp = sminw
                          tjj = sminw
                          info = 1_${ik}$
                       end if
                       if( xj==zero )cycle loop_70
                       if( tjj<one ) then
                          if( xj>bignum*tjj ) then
                             rec = one / xj
                             call stdlib${ii}$_dscal( n2, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       call stdlib${ii}$_dladiv( x( j1 ), x( n+j1 ), tmp, z, sr, si )
                       x( j1 ) = sr
                       x( n+j1 ) = si
                       xj = abs( x( j1 ) ) + abs( x( n+j1 ) )
                       ! scale x if necessary to avoid overflow when adding a
                       ! multiple of column j1 of t.
                       if( xj>one ) then
                          rec = one / xj
                          if( work( j1 )>( bignum-xmax )*rec ) then
                             call stdlib${ii}$_dscal( n2, rec, x, 1_${ik}$ )
                             scale = scale*rec
                          end if
                       end if
                       if( j1>1_${ik}$ ) then
                          call stdlib${ii}$_daxpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ )
                          call stdlib${ii}$_daxpy( j1-1, -x( n+j1 ), t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ )
                          x( 1_${ik}$ ) = x( 1_${ik}$ ) + b( j1 )*x( n+j1 )
                          x( n+1 ) = x( n+1 ) - b( j1 )*x( j1 )
                          xmax = zero
                          do k = 1, j1 - 1
                             xmax = max( xmax, abs( x( k ) )+abs( x( k+n ) ) )
                          end do
                       end if
                    else
                       ! meet 2 by 2 diagonal block
                       d( 1_${ik}$, 1_${ik}$ ) = x( j1 )
                       d( 2_${ik}$, 1_${ik}$ ) = x( j2 )
                       d( 1_${ik}$, 2_${ik}$ ) = x( n+j1 )
                       d( 2_${ik}$, 2_${ik}$ ) = x( n+j2 )
                       call stdlib${ii}$_dlaln2( .false., 2_${ik}$, 2_${ik}$, sminw, one, t( j1, j1 ),ldt, one, one, &
                                 d, 2_${ik}$, zero, -w, v, 2_${ik}$,scaloc, xnorm, ierr )
                       if( ierr/=0_${ik}$ )info = 2_${ik}$
                       if( scaloc/=one ) then
                          call stdlib${ii}$_dscal( 2_${ik}$*n, scaloc, x, 1_${ik}$ )
                          scale = scaloc*scale
                       end if
                       x( j1 ) = v( 1_${ik}$, 1_${ik}$ )
                       x( j2 ) = v( 2_${ik}$, 1_${ik}$ )
                       x( n+j1 ) = v( 1_${ik}$, 2_${ik}$ )
                       x( n+j2 ) = v( 2_${ik}$, 2_${ik}$ )
                       ! scale x(j1), .... to avoid overflow in
                       ! updating right hand side.
                       xj = max( abs( v( 1_${ik}$, 1_${ik}$ ) )+abs( v( 1_${ik}$, 2_${ik}$ ) ),abs( v( 2_${ik}$, 1_${ik}$ ) )+abs( v( 2_${ik}$, 2_${ik}$ )&
                                  ) )
                       if( xj>one ) then
                          rec = one / xj
                          if( max( work( j1 ), work( j2 ) )>( bignum-xmax )*rec ) then
                             call stdlib${ii}$_dscal( n2, rec, x, 1_${ik}$ )
                             scale = scale*rec
                          end if
                       end if
                       ! update the right-hand side.
                       if( j1>1_${ik}$ ) then
                          call stdlib${ii}$_daxpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ )
                          call stdlib${ii}$_daxpy( j1-1, -x( j2 ), t( 1_${ik}$, j2 ), 1_${ik}$, x, 1_${ik}$ )
                          call stdlib${ii}$_daxpy( j1-1, -x( n+j1 ), t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ )
                          call stdlib${ii}$_daxpy( j1-1, -x( n+j2 ), t( 1_${ik}$, j2 ), 1_${ik}$,x( n+1 ), 1_${ik}$ )
                          x( 1_${ik}$ ) = x( 1_${ik}$ ) + b( j1 )*x( n+j1 ) +b( j2 )*x( n+j2 )
                          x( n+1 ) = x( n+1 ) - b( j1 )*x( j1 ) -b( j2 )*x( j2 )
                          xmax = zero
                          do k = 1, j1 - 1
                             xmax = max( abs( x( k ) )+abs( x( k+n ) ),xmax )
                          end do
                       end if
                    end if
                 end do loop_70
              else
                 ! solve (t + ib)**t*(p+iq) = c+id
                 jnext = 1_${ik}$
                 loop_80: do j = 1, n
                    if( j<jnext )cycle loop_80
                    j1 = j
                    j2 = j
                    jnext = j + 1_${ik}$
                    if( j<n ) then
                       if( t( j+1, j )/=zero ) then
                          j2 = j + 1_${ik}$
                          jnext = j + 2_${ik}$
                       end if
                    end if
                    if( j1==j2 ) then
                       ! 1 by 1 diagonal block
                       ! scale if necessary to avoid overflow in forming the
                       ! right-hand side element by inner product.
                       xj = abs( x( j1 ) ) + abs( x( j1+n ) )
                       if( xmax>one ) then
                          rec = one / xmax
                          if( work( j1 )>( bignum-xj )*rec ) then
                             call stdlib${ii}$_dscal( n2, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       x( j1 ) = x( j1 ) - stdlib${ii}$_ddot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ )
                       x( n+j1 ) = x( n+j1 ) - stdlib${ii}$_ddot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ )
                                 
                       if( j1>1_${ik}$ ) then
                          x( j1 ) = x( j1 ) - b( j1 )*x( n+1 )
                          x( n+j1 ) = x( n+j1 ) + b( j1 )*x( 1_${ik}$ )
                       end if
                       xj = abs( x( j1 ) ) + abs( x( j1+n ) )
                       z = w
                       if( j1==1_${ik}$ )z = b( 1_${ik}$ )
                       ! scale if necessary to avoid overflow in
                       ! complex division
                       tjj = abs( t( j1, j1 ) ) + abs( z )
                       tmp = t( j1, j1 )
                       if( tjj<sminw ) then
                          tmp = sminw
                          tjj = sminw
                          info = 1_${ik}$
                       end if
                       if( tjj<one ) then
                          if( xj>bignum*tjj ) then
                             rec = one / xj
                             call stdlib${ii}$_dscal( n2, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       call stdlib${ii}$_dladiv( x( j1 ), x( n+j1 ), tmp, -z, sr, si )
                       x( j1 ) = sr
                       x( j1+n ) = si
                       xmax = max( abs( x( j1 ) )+abs( x( j1+n ) ), xmax )
                    else
                       ! 2 by 2 diagonal block
                       ! scale if necessary to avoid overflow in forming the
                       ! right-hand side element by inner product.
                       xj = max( abs( x( j1 ) )+abs( x( n+j1 ) ),abs( x( j2 ) )+abs( x( n+j2 ) ) )
                                 
                       if( xmax>one ) then
                          rec = one / xmax
                          if( max( work( j1 ), work( j2 ) )>( bignum-xj ) / xmax ) then
                             call stdlib${ii}$_dscal( n2, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       d( 1_${ik}$, 1_${ik}$ ) = x( j1 ) - stdlib${ii}$_ddot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x,1_${ik}$ )
                       d( 2_${ik}$, 1_${ik}$ ) = x( j2 ) - stdlib${ii}$_ddot( j1-1, t( 1_${ik}$, j2 ), 1_${ik}$, x,1_${ik}$ )
                       d( 1_${ik}$, 2_${ik}$ ) = x( n+j1 ) - stdlib${ii}$_ddot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ )
                                 
                       d( 2_${ik}$, 2_${ik}$ ) = x( n+j2 ) - stdlib${ii}$_ddot( j1-1, t( 1_${ik}$, j2 ), 1_${ik}$,x( n+1 ), 1_${ik}$ )
                                 
                       d( 1_${ik}$, 1_${ik}$ ) = d( 1_${ik}$, 1_${ik}$ ) - b( j1 )*x( n+1 )
                       d( 2_${ik}$, 1_${ik}$ ) = d( 2_${ik}$, 1_${ik}$ ) - b( j2 )*x( n+1 )
                       d( 1_${ik}$, 2_${ik}$ ) = d( 1_${ik}$, 2_${ik}$ ) + b( j1 )*x( 1_${ik}$ )
                       d( 2_${ik}$, 2_${ik}$ ) = d( 2_${ik}$, 2_${ik}$ ) + b( j2 )*x( 1_${ik}$ )
                       call stdlib${ii}$_dlaln2( .true., 2_${ik}$, 2_${ik}$, sminw, one, t( j1, j1 ),ldt, one, one, d,&
                                  2_${ik}$, zero, w, v, 2_${ik}$,scaloc, xnorm, ierr )
                       if( ierr/=0_${ik}$ )info = 2_${ik}$
                       if( scaloc/=one ) then
                          call stdlib${ii}$_dscal( n2, scaloc, x, 1_${ik}$ )
                          scale = scaloc*scale
                       end if
                       x( j1 ) = v( 1_${ik}$, 1_${ik}$ )
                       x( j2 ) = v( 2_${ik}$, 1_${ik}$ )
                       x( n+j1 ) = v( 1_${ik}$, 2_${ik}$ )
                       x( n+j2 ) = v( 2_${ik}$, 2_${ik}$ )
                       xmax = max( abs( x( j1 ) )+abs( x( n+j1 ) ),abs( x( j2 ) )+abs( x( n+j2 ) )&
                                 , xmax )
                    end if
                 end do loop_80
              end if
           end if
           return
     end subroutine stdlib${ii}$_dlaqtr

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     module subroutine stdlib${ii}$_${ri}$laqtr( ltran, lreal, n, t, ldt, b, w, scale, x, work,info )
     !! DLAQTR: solves the real quasi-triangular system
     !! op(T)*p = scale*c,               if LREAL = .TRUE.
     !! or the complex quasi-triangular systems
     !! op(T + iB)*(p+iq) = scale*(c+id),  if LREAL = .FALSE.
     !! in real arithmetic, where T is upper quasi-triangular.
     !! If LREAL = .FALSE., then the first diagonal block of T must be
     !! 1 by 1, B is the specially structured matrix
     !! B = [ b(1) b(2) ... b(n) ]
     !! [       w            ]
     !! [           w        ]
     !! [              .     ]
     !! [                 w  ]
     !! op(A) = A or A**T, A**T denotes the transpose of
     !! matrix A.
     !! On input, X = [ c ].  On output, X = [ p ].
     !! [ d ]                  [ q ]
     !! This subroutine is designed for the condition number estimation
     !! in routine DTRSNA.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: lreal, ltran
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: ldt, n
           real(${rk}$), intent(out) :: scale
           real(${rk}$), intent(in) :: w
           ! Array Arguments 
           real(${rk}$), intent(in) :: b(*), t(ldt,*)
           real(${rk}$), intent(out) :: work(*)
           real(${rk}$), intent(inout) :: x(*)
       ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: notran
           integer(${ik}$) :: i, ierr, j, j1, j2, jnext, k, n1, n2
           real(${rk}$) :: bignum, eps, rec, scaloc, si, smin, sminw, smlnum, sr, tjj, tmp, xj, xmax, &
                     xnorm, z
           ! Local Arrays 
           real(${rk}$) :: d(2_${ik}$,2_${ik}$), v(2_${ik}$,2_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! do not test the input parameters for errors
           notran = .not.ltran
           info = 0_${ik}$
           ! quick return if possible
           if( n==0 )return
           ! set constants to control overflow
           eps = stdlib${ii}$_${ri}$lamch( 'P' )
           smlnum = stdlib${ii}$_${ri}$lamch( 'S' ) / eps
           bignum = one / smlnum
           xnorm = stdlib${ii}$_${ri}$lange( 'M', n, n, t, ldt, d )
           if( .not.lreal )xnorm = max( xnorm, abs( w ), stdlib${ii}$_${ri}$lange( 'M', n, 1_${ik}$, b, n, d ) )
                     
           smin = max( smlnum, eps*xnorm )
           ! compute 1-norm of each column of strictly upper triangular
           ! part of t to control overflow in triangular solver.
           work( 1_${ik}$ ) = zero
           do j = 2, n
              work( j ) = stdlib${ii}$_${ri}$asum( j-1, t( 1_${ik}$, j ), 1_${ik}$ )
           end do
           if( .not.lreal ) then
              do i = 2, n
                 work( i ) = work( i ) + abs( b( i ) )
              end do
           end if
           n2 = 2_${ik}$*n
           n1 = n
           if( .not.lreal )n1 = n2
           k = stdlib${ii}$_i${ri}$amax( n1, x, 1_${ik}$ )
           xmax = abs( x( k ) )
           scale = one
           if( xmax>bignum ) then
              scale = bignum / xmax
              call stdlib${ii}$_${ri}$scal( n1, scale, x, 1_${ik}$ )
              xmax = bignum
           end if
           if( lreal ) then
              if( notran ) then
                 ! solve t*p = scale*c
                 jnext = n
                 loop_30: do j = n, 1, -1
                    if( j>jnext )cycle loop_30
                    j1 = j
                    j2 = j
                    jnext = j - 1_${ik}$
                    if( j>1_${ik}$ ) then
                       if( t( j, j-1 )/=zero ) then
                          j1 = j - 1_${ik}$
                          jnext = j - 2_${ik}$
                       end if
                    end if
                    if( j1==j2 ) then
                       ! meet 1 by 1 diagonal block
                       ! scale to avoid overflow when computing
                           ! x(j) = b(j)/t(j,j)
                       xj = abs( x( j1 ) )
                       tjj = abs( t( j1, j1 ) )
                       tmp = t( j1, j1 )
                       if( tjj<smin ) then
                          tmp = smin
                          tjj = smin
                          info = 1_${ik}$
                       end if
                       if( xj==zero )cycle loop_30
                       if( tjj<one ) then
                          if( xj>bignum*tjj ) then
                             rec = one / xj
                             call stdlib${ii}$_${ri}$scal( n, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       x( j1 ) = x( j1 ) / tmp
                       xj = abs( x( j1 ) )
                       ! scale x if necessary to avoid overflow when adding a
                       ! multiple of column j1 of t.
                       if( xj>one ) then
                          rec = one / xj
                          if( work( j1 )>( bignum-xmax )*rec ) then
                             call stdlib${ii}$_${ri}$scal( n, rec, x, 1_${ik}$ )
                             scale = scale*rec
                          end if
                       end if
                       if( j1>1_${ik}$ ) then
                          call stdlib${ii}$_${ri}$axpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ )
                          k = stdlib${ii}$_i${ri}$amax( j1-1, x, 1_${ik}$ )
                          xmax = abs( x( k ) )
                       end if
                    else
                       ! meet 2 by 2 diagonal block
                       ! call 2 by 2 linear system solve, to take
                       ! care of possible overflow by scaling factor.
                       d( 1_${ik}$, 1_${ik}$ ) = x( j1 )
                       d( 2_${ik}$, 1_${ik}$ ) = x( j2 )
                       call stdlib${ii}$_${ri}$laln2( .false., 2_${ik}$, 1_${ik}$, smin, one, t( j1, j1 ),ldt, one, one, d,&
                                  2_${ik}$, zero, zero, v, 2_${ik}$,scaloc, xnorm, ierr )
                       if( ierr/=0_${ik}$ )info = 2_${ik}$
                       if( scaloc/=one ) then
                          call stdlib${ii}$_${ri}$scal( n, scaloc, x, 1_${ik}$ )
                          scale = scale*scaloc
                       end if
                       x( j1 ) = v( 1_${ik}$, 1_${ik}$ )
                       x( j2 ) = v( 2_${ik}$, 1_${ik}$ )
                       ! scale v(1,1) (= x(j1)) and/or v(2,1) (=x(j2))
                       ! to avoid overflow in updating right-hand side.
                       xj = max( abs( v( 1_${ik}$, 1_${ik}$ ) ), abs( v( 2_${ik}$, 1_${ik}$ ) ) )
                       if( xj>one ) then
                          rec = one / xj
                          if( max( work( j1 ), work( j2 ) )>( bignum-xmax )*rec ) then
                             call stdlib${ii}$_${ri}$scal( n, rec, x, 1_${ik}$ )
                             scale = scale*rec
                          end if
                       end if
                       ! update right-hand side
                       if( j1>1_${ik}$ ) then
                          call stdlib${ii}$_${ri}$axpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ )
                          call stdlib${ii}$_${ri}$axpy( j1-1, -x( j2 ), t( 1_${ik}$, j2 ), 1_${ik}$, x, 1_${ik}$ )
                          k = stdlib${ii}$_i${ri}$amax( j1-1, x, 1_${ik}$ )
                          xmax = abs( x( k ) )
                       end if
                    end if
                 end do loop_30
              else
                 ! solve t**t*p = scale*c
                 jnext = 1_${ik}$
                 loop_40: do j = 1, n
                    if( j<jnext )cycle loop_40
                    j1 = j
                    j2 = j
                    jnext = j + 1_${ik}$
                    if( j<n ) then
                       if( t( j+1, j )/=zero ) then
                          j2 = j + 1_${ik}$
                          jnext = j + 2_${ik}$
                       end if
                    end if
                    if( j1==j2 ) then
                       ! 1 by 1 diagonal block
                       ! scale if necessary to avoid overflow in forming the
                       ! right-hand side element by inner product.
                       xj = abs( x( j1 ) )
                       if( xmax>one ) then
                          rec = one / xmax
                          if( work( j1 )>( bignum-xj )*rec ) then
                             call stdlib${ii}$_${ri}$scal( n, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       x( j1 ) = x( j1 ) - stdlib${ii}$_${ri}$dot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ )
                       xj = abs( x( j1 ) )
                       tjj = abs( t( j1, j1 ) )
                       tmp = t( j1, j1 )
                       if( tjj<smin ) then
                          tmp = smin
                          tjj = smin
                          info = 1_${ik}$
                       end if
                       if( tjj<one ) then
                          if( xj>bignum*tjj ) then
                             rec = one / xj
                             call stdlib${ii}$_${ri}$scal( n, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       x( j1 ) = x( j1 ) / tmp
                       xmax = max( xmax, abs( x( j1 ) ) )
                    else
                       ! 2 by 2 diagonal block
                       ! scale if necessary to avoid overflow in forming the
                       ! right-hand side elements by inner product.
                       xj = max( abs( x( j1 ) ), abs( x( j2 ) ) )
                       if( xmax>one ) then
                          rec = one / xmax
                          if( max( work( j2 ), work( j1 ) )>( bignum-xj )*rec ) then
                             call stdlib${ii}$_${ri}$scal( n, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       d( 1_${ik}$, 1_${ik}$ ) = x( j1 ) - stdlib${ii}$_${ri}$dot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x,1_${ik}$ )
                       d( 2_${ik}$, 1_${ik}$ ) = x( j2 ) - stdlib${ii}$_${ri}$dot( j1-1, t( 1_${ik}$, j2 ), 1_${ik}$, x,1_${ik}$ )
                       call stdlib${ii}$_${ri}$laln2( .true., 2_${ik}$, 1_${ik}$, smin, one, t( j1, j1 ),ldt, one, one, d, &
                                 2_${ik}$, zero, zero, v, 2_${ik}$,scaloc, xnorm, ierr )
                       if( ierr/=0_${ik}$ )info = 2_${ik}$
                       if( scaloc/=one ) then
                          call stdlib${ii}$_${ri}$scal( n, scaloc, x, 1_${ik}$ )
                          scale = scale*scaloc
                       end if
                       x( j1 ) = v( 1_${ik}$, 1_${ik}$ )
                       x( j2 ) = v( 2_${ik}$, 1_${ik}$ )
                       xmax = max( abs( x( j1 ) ), abs( x( j2 ) ), xmax )
                    end if
                 end do loop_40
              end if
           else
              sminw = max( eps*abs( w ), smin )
              if( notran ) then
                 ! solve (t + ib)*(p+iq) = c+id
                 jnext = n
                 loop_70: do j = n, 1, -1
                    if( j>jnext )cycle loop_70
                    j1 = j
                    j2 = j
                    jnext = j - 1_${ik}$
                    if( j>1_${ik}$ ) then
                       if( t( j, j-1 )/=zero ) then
                          j1 = j - 1_${ik}$
                          jnext = j - 2_${ik}$
                       end if
                    end if
                    if( j1==j2 ) then
                       ! 1 by 1 diagonal block
                       ! scale if necessary to avoid overflow in division
                       z = w
                       if( j1==1_${ik}$ )z = b( 1_${ik}$ )
                       xj = abs( x( j1 ) ) + abs( x( n+j1 ) )
                       tjj = abs( t( j1, j1 ) ) + abs( z )
                       tmp = t( j1, j1 )
                       if( tjj<sminw ) then
                          tmp = sminw
                          tjj = sminw
                          info = 1_${ik}$
                       end if
                       if( xj==zero )cycle loop_70
                       if( tjj<one ) then
                          if( xj>bignum*tjj ) then
                             rec = one / xj
                             call stdlib${ii}$_${ri}$scal( n2, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       call stdlib${ii}$_${ri}$ladiv( x( j1 ), x( n+j1 ), tmp, z, sr, si )
                       x( j1 ) = sr
                       x( n+j1 ) = si
                       xj = abs( x( j1 ) ) + abs( x( n+j1 ) )
                       ! scale x if necessary to avoid overflow when adding a
                       ! multiple of column j1 of t.
                       if( xj>one ) then
                          rec = one / xj
                          if( work( j1 )>( bignum-xmax )*rec ) then
                             call stdlib${ii}$_${ri}$scal( n2, rec, x, 1_${ik}$ )
                             scale = scale*rec
                          end if
                       end if
                       if( j1>1_${ik}$ ) then
                          call stdlib${ii}$_${ri}$axpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ )
                          call stdlib${ii}$_${ri}$axpy( j1-1, -x( n+j1 ), t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ )
                          x( 1_${ik}$ ) = x( 1_${ik}$ ) + b( j1 )*x( n+j1 )
                          x( n+1 ) = x( n+1 ) - b( j1 )*x( j1 )
                          xmax = zero
                          do k = 1, j1 - 1
                             xmax = max( xmax, abs( x( k ) )+abs( x( k+n ) ) )
                          end do
                       end if
                    else
                       ! meet 2 by 2 diagonal block
                       d( 1_${ik}$, 1_${ik}$ ) = x( j1 )
                       d( 2_${ik}$, 1_${ik}$ ) = x( j2 )
                       d( 1_${ik}$, 2_${ik}$ ) = x( n+j1 )
                       d( 2_${ik}$, 2_${ik}$ ) = x( n+j2 )
                       call stdlib${ii}$_${ri}$laln2( .false., 2_${ik}$, 2_${ik}$, sminw, one, t( j1, j1 ),ldt, one, one, &
                                 d, 2_${ik}$, zero, -w, v, 2_${ik}$,scaloc, xnorm, ierr )
                       if( ierr/=0_${ik}$ )info = 2_${ik}$
                       if( scaloc/=one ) then
                          call stdlib${ii}$_${ri}$scal( 2_${ik}$*n, scaloc, x, 1_${ik}$ )
                          scale = scaloc*scale
                       end if
                       x( j1 ) = v( 1_${ik}$, 1_${ik}$ )
                       x( j2 ) = v( 2_${ik}$, 1_${ik}$ )
                       x( n+j1 ) = v( 1_${ik}$, 2_${ik}$ )
                       x( n+j2 ) = v( 2_${ik}$, 2_${ik}$ )
                       ! scale x(j1), .... to avoid overflow in
                       ! updating right hand side.
                       xj = max( abs( v( 1_${ik}$, 1_${ik}$ ) )+abs( v( 1_${ik}$, 2_${ik}$ ) ),abs( v( 2_${ik}$, 1_${ik}$ ) )+abs( v( 2_${ik}$, 2_${ik}$ )&
                                  ) )
                       if( xj>one ) then
                          rec = one / xj
                          if( max( work( j1 ), work( j2 ) )>( bignum-xmax )*rec ) then
                             call stdlib${ii}$_${ri}$scal( n2, rec, x, 1_${ik}$ )
                             scale = scale*rec
                          end if
                       end if
                       ! update the right-hand side.
                       if( j1>1_${ik}$ ) then
                          call stdlib${ii}$_${ri}$axpy( j1-1, -x( j1 ), t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ )
                          call stdlib${ii}$_${ri}$axpy( j1-1, -x( j2 ), t( 1_${ik}$, j2 ), 1_${ik}$, x, 1_${ik}$ )
                          call stdlib${ii}$_${ri}$axpy( j1-1, -x( n+j1 ), t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ )
                          call stdlib${ii}$_${ri}$axpy( j1-1, -x( n+j2 ), t( 1_${ik}$, j2 ), 1_${ik}$,x( n+1 ), 1_${ik}$ )
                          x( 1_${ik}$ ) = x( 1_${ik}$ ) + b( j1 )*x( n+j1 ) +b( j2 )*x( n+j2 )
                          x( n+1 ) = x( n+1 ) - b( j1 )*x( j1 ) -b( j2 )*x( j2 )
                          xmax = zero
                          do k = 1, j1 - 1
                             xmax = max( abs( x( k ) )+abs( x( k+n ) ),xmax )
                          end do
                       end if
                    end if
                 end do loop_70
              else
                 ! solve (t + ib)**t*(p+iq) = c+id
                 jnext = 1_${ik}$
                 loop_80: do j = 1, n
                    if( j<jnext )cycle loop_80
                    j1 = j
                    j2 = j
                    jnext = j + 1_${ik}$
                    if( j<n ) then
                       if( t( j+1, j )/=zero ) then
                          j2 = j + 1_${ik}$
                          jnext = j + 2_${ik}$
                       end if
                    end if
                    if( j1==j2 ) then
                       ! 1 by 1 diagonal block
                       ! scale if necessary to avoid overflow in forming the
                       ! right-hand side element by inner product.
                       xj = abs( x( j1 ) ) + abs( x( j1+n ) )
                       if( xmax>one ) then
                          rec = one / xmax
                          if( work( j1 )>( bignum-xj )*rec ) then
                             call stdlib${ii}$_${ri}$scal( n2, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       x( j1 ) = x( j1 ) - stdlib${ii}$_${ri}$dot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x, 1_${ik}$ )
                       x( n+j1 ) = x( n+j1 ) - stdlib${ii}$_${ri}$dot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ )
                                 
                       if( j1>1_${ik}$ ) then
                          x( j1 ) = x( j1 ) - b( j1 )*x( n+1 )
                          x( n+j1 ) = x( n+j1 ) + b( j1 )*x( 1_${ik}$ )
                       end if
                       xj = abs( x( j1 ) ) + abs( x( j1+n ) )
                       z = w
                       if( j1==1_${ik}$ )z = b( 1_${ik}$ )
                       ! scale if necessary to avoid overflow in
                       ! complex division
                       tjj = abs( t( j1, j1 ) ) + abs( z )
                       tmp = t( j1, j1 )
                       if( tjj<sminw ) then
                          tmp = sminw
                          tjj = sminw
                          info = 1_${ik}$
                       end if
                       if( tjj<one ) then
                          if( xj>bignum*tjj ) then
                             rec = one / xj
                             call stdlib${ii}$_${ri}$scal( n2, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       call stdlib${ii}$_${ri}$ladiv( x( j1 ), x( n+j1 ), tmp, -z, sr, si )
                       x( j1 ) = sr
                       x( j1+n ) = si
                       xmax = max( abs( x( j1 ) )+abs( x( j1+n ) ), xmax )
                    else
                       ! 2 by 2 diagonal block
                       ! scale if necessary to avoid overflow in forming the
                       ! right-hand side element by inner product.
                       xj = max( abs( x( j1 ) )+abs( x( n+j1 ) ),abs( x( j2 ) )+abs( x( n+j2 ) ) )
                                 
                       if( xmax>one ) then
                          rec = one / xmax
                          if( max( work( j1 ), work( j2 ) )>( bignum-xj ) / xmax ) then
                             call stdlib${ii}$_${ri}$scal( n2, rec, x, 1_${ik}$ )
                             scale = scale*rec
                             xmax = xmax*rec
                          end if
                       end if
                       d( 1_${ik}$, 1_${ik}$ ) = x( j1 ) - stdlib${ii}$_${ri}$dot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$, x,1_${ik}$ )
                       d( 2_${ik}$, 1_${ik}$ ) = x( j2 ) - stdlib${ii}$_${ri}$dot( j1-1, t( 1_${ik}$, j2 ), 1_${ik}$, x,1_${ik}$ )
                       d( 1_${ik}$, 2_${ik}$ ) = x( n+j1 ) - stdlib${ii}$_${ri}$dot( j1-1, t( 1_${ik}$, j1 ), 1_${ik}$,x( n+1 ), 1_${ik}$ )
                                 
                       d( 2_${ik}$, 2_${ik}$ ) = x( n+j2 ) - stdlib${ii}$_${ri}$dot( j1-1, t( 1_${ik}$, j2 ), 1_${ik}$,x( n+1 ), 1_${ik}$ )
                                 
                       d( 1_${ik}$, 1_${ik}$ ) = d( 1_${ik}$, 1_${ik}$ ) - b( j1 )*x( n+1 )
                       d( 2_${ik}$, 1_${ik}$ ) = d( 2_${ik}$, 1_${ik}$ ) - b( j2 )*x( n+1 )
                       d( 1_${ik}$, 2_${ik}$ ) = d( 1_${ik}$, 2_${ik}$ ) + b( j1 )*x( 1_${ik}$ )
                       d( 2_${ik}$, 2_${ik}$ ) = d( 2_${ik}$, 2_${ik}$ ) + b( j2 )*x( 1_${ik}$ )
                       call stdlib${ii}$_${ri}$laln2( .true., 2_${ik}$, 2_${ik}$, sminw, one, t( j1, j1 ),ldt, one, one, d,&
                                  2_${ik}$, zero, w, v, 2_${ik}$,scaloc, xnorm, ierr )
                       if( ierr/=0_${ik}$ )info = 2_${ik}$
                       if( scaloc/=one ) then
                          call stdlib${ii}$_${ri}$scal( n2, scaloc, x, 1_${ik}$ )
                          scale = scaloc*scale
                       end if
                       x( j1 ) = v( 1_${ik}$, 1_${ik}$ )
                       x( j2 ) = v( 2_${ik}$, 1_${ik}$ )
                       x( n+j1 ) = v( 1_${ik}$, 2_${ik}$ )
                       x( n+j2 ) = v( 2_${ik}$, 2_${ik}$ )
                       xmax = max( abs( x( j1 ) )+abs( x( n+j1 ) ),abs( x( j2 ) )+abs( x( n+j2 ) )&
                                 , xmax )
                    end if
                 end do loop_80
              end if
           end if
           return
     end subroutine stdlib${ii}$_${ri}$laqtr

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, &
     !! SLAHQR is an auxiliary routine called by SHSEQR to update the
     !! eigenvalues and Schur decomposition already computed by SHSEQR, by
     !! dealing with the Hessenberg submatrix in rows and columns ILO to
     !! IHI.
               info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, n
           integer(${ik}$), intent(out) :: info
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           real(sp), intent(inout) :: h(ldh,*), z(ldz,*)
           real(sp), intent(out) :: wi(*), wr(*)
        ! =========================================================
           ! Parameters 
           real(sp), parameter :: dat1 = 3.0_sp/4.0_sp
           real(sp), parameter :: dat2 = -0.4375_sp
           integer(${ik}$), parameter :: kexsh = 10_${ik}$
           
           
           
           ! Local Scalars 
           real(sp) :: aa, ab, ba, bb, cs, det, h11, h12, h21, h21s, h22, rt1i, rt1r, rt2i, rt2r, &
                     rtdisc, s, safmax, safmin, smlnum, sn, sum, t1, t2, t3, tr, tst, ulp, v2, v3
           integer(${ik}$) :: i, i1, i2, its, itmax, j, k, l, m, nh, nr, nz, kdefl
           ! Local Arrays 
           real(sp) :: v(3_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           ! quick return if possible
           if( n==0 )return
           if( ilo==ihi ) then
              wr( ilo ) = h( ilo, ilo )
              wi( ilo ) = zero
              return
           end if
           ! ==== clear out the trash ====
           do j = ilo, ihi - 3
              h( j+2, j ) = zero
              h( j+3, j ) = zero
           end do
           if( ilo<=ihi-2 )h( ihi, ihi-2 ) = zero
           nh = ihi - ilo + 1_${ik}$
           nz = ihiz - iloz + 1_${ik}$
           ! set machine-dependent constants for the stopping criterion.
           safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_slabad( safmin, safmax )
           ulp = stdlib${ii}$_slamch( 'PRECISION' )
           smlnum = safmin*( real( nh,KIND=sp) / ulp )
           ! i1 and i2 are the indices of the first row and last column of h
           ! to which transformations must be applied. if eigenvalues only are
           ! being computed, i1 and i2 are set inside the main loop.
           if( wantt ) then
              i1 = 1_${ik}$
              i2 = n
           end if
           ! itmax is the total number of qr iterations allowed.
           itmax = 30_${ik}$ * max( 10_${ik}$, nh )
           ! kdefl counts the number of iterations since a deflation
           kdefl = 0_${ik}$
           ! the main loop begins here. i is the loop index and decreases from
           ! ihi to ilo in steps of 1 or 2. each iteration of the loop works
           ! with the active submatrix in rows and columns l to i.
           ! eigenvalues i+1 to ihi have already converged. either l = ilo or
           ! h(l,l-1) is negligible so that the matrix splits.
           i = ihi
           20 continue
           l = ilo
           if( i<ilo )go to 160
           ! perform qr iterations on rows and columns ilo to i until a
           ! submatrix of order 1 or 2 splits off at the bottom because a
           ! subdiagonal element has become negligible.
           loop_140: do its = 0, itmax
              ! look for a single small subdiagonal element.
              do k = i, l + 1, -1
                 if( abs( h( k, k-1 ) )<=smlnum )go to 40
                 tst = abs( h( k-1, k-1 ) ) + abs( h( k, k ) )
                 if( tst==zero ) then
                    if( k-2>=ilo )tst = tst + abs( h( k-1, k-2 ) )
                    if( k+1<=ihi )tst = tst + abs( h( k+1, k ) )
                 end if
                 ! ==== the following is a conservative small subdiagonal
                 ! .    deflation  criterion due to ahues
                 ! .    1997). it has better mathematical foundation and
                 ! .    improves accuracy in some cases.  ====
                 if( abs( h( k, k-1 ) )<=ulp*tst ) then
                    ab = max( abs( h( k, k-1 ) ), abs( h( k-1, k ) ) )
                    ba = min( abs( h( k, k-1 ) ), abs( h( k-1, k ) ) )
                    aa = max( abs( h( k, k ) ),abs( h( k-1, k-1 )-h( k, k ) ) )
                    bb = min( abs( h( k, k ) ),abs( h( k-1, k-1 )-h( k, k ) ) )
                    s = aa + ab
                    if( ba*( ab / s )<=max( smlnum,ulp*( bb*( aa / s ) ) ) )go to 40
                 end if
              end do
              40 continue
              l = k
              if( l>ilo ) then
                 ! h(l,l-1) is negligible
                 h( l, l-1 ) = zero
              end if
              ! exit from loop if a submatrix of order 1 or 2 has split off.
              if( l>=i-1 )go to 150
              kdefl = kdefl + 1_${ik}$
              ! now the active submatrix is in rows and columns l to i. if
              ! eigenvalues only are being computed, only the active submatrix
              ! need be transformed.
              if( .not.wantt ) then
                 i1 = l
                 i2 = i
              end if
              if( mod(kdefl,2_${ik}$*kexsh)==0_${ik}$ ) then
                 ! exceptional shift.
                 s = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) )
                 h11 = dat1*s + h( i, i )
                 h12 = dat2*s
                 h21 = s
                 h22 = h11
              else if( mod(kdefl,kexsh)==0_${ik}$ ) then
                 ! exceptional shift.
                 s = abs( h( l+1, l ) ) + abs( h( l+2, l+1 ) )
                 h11 = dat1*s + h( l, l )
                 h12 = dat2*s
                 h21 = s
                 h22 = h11
              else
                 ! prepare to use francis' double shift
                 ! (i.e. 2nd degree generalized rayleigh quotient)
                 h11 = h( i-1, i-1 )
                 h21 = h( i, i-1 )
                 h12 = h( i-1, i )
                 h22 = h( i, i )
              end if
              s = abs( h11 ) + abs( h12 ) + abs( h21 ) + abs( h22 )
              if( s==zero ) then
                 rt1r = zero
                 rt1i = zero
                 rt2r = zero
                 rt2i = zero
              else
                 h11 = h11 / s
                 h21 = h21 / s
                 h12 = h12 / s
                 h22 = h22 / s
                 tr = ( h11+h22 ) / two
                 det = ( h11-tr )*( h22-tr ) - h12*h21
                 rtdisc = sqrt( abs( det ) )
                 if( det>=zero ) then
                    ! ==== complex conjugate shifts ====
                    rt1r = tr*s
                    rt2r = rt1r
                    rt1i = rtdisc*s
                    rt2i = -rt1i
                 else
                    ! ==== realshifts (use only one of them,KIND=sp)  ====
                    rt1r = tr + rtdisc
                    rt2r = tr - rtdisc
                    if( abs( rt1r-h22 )<=abs( rt2r-h22 ) ) then
                       rt1r = rt1r*s
                       rt2r = rt1r
                    else
                       rt2r = rt2r*s
                       rt1r = rt2r
                    end if
                    rt1i = zero
                    rt2i = zero
                 end if
              end if
              ! look for two consecutive small subdiagonal elements.
              do m = i - 2, l, -1
                 ! determine the effect of starting the double-shift qr
                 ! iteration at row m, and see if this would make h(m,m-1)
                 ! negligible.  (the following uses scaling to avoid
                 ! overflows and most underflows.)
                 h21s = h( m+1, m )
                 s = abs( h( m, m )-rt2r ) + abs( rt2i ) + abs( h21s )
                 h21s = h( m+1, m ) / s
                 v( 1_${ik}$ ) = h21s*h( m, m+1 ) + ( h( m, m )-rt1r )*( ( h( m, m )-rt2r ) / s ) - &
                           rt1i*( rt2i / s )
                 v( 2_${ik}$ ) = h21s*( h( m, m )+h( m+1, m+1 )-rt1r-rt2r )
                 v( 3_${ik}$ ) = h21s*h( m+2, m+1 )
                 s = abs( v( 1_${ik}$ ) ) + abs( v( 2_${ik}$ ) ) + abs( v( 3_${ik}$ ) )
                 v( 1_${ik}$ ) = v( 1_${ik}$ ) / s
                 v( 2_${ik}$ ) = v( 2_${ik}$ ) / s
                 v( 3_${ik}$ ) = v( 3_${ik}$ ) / s
                 if( m==l )go to 60
                 if( abs( h( m, m-1 ) )*( abs( v( 2_${ik}$ ) )+abs( v( 3_${ik}$ ) ) )<=ulp*abs( v( 1_${ik}$ ) )*( abs( &
                           h( m-1, m-1 ) )+abs( h( m,m ) )+abs( h( m+1, m+1 ) ) ) )go to 60
              end do
              60 continue
              ! double-shift qr step
              loop_130: do k = m, i - 1
                 ! the first iteration of this loop determines a reflection g
                 ! from the vector v and applies it from left and right to h,
                 ! thus creating a nonzero bulge below the subdiagonal.
                 ! each subsequent iteration determines a reflection g to
                 ! restore the hessenberg form in the (k-1)th column, and thus
                 ! chases the bulge one step toward the bottom of the active
                 ! submatrix. nr is the order of g.
                 nr = min( 3_${ik}$, i-k+1 )
                 if( k>m )call stdlib${ii}$_scopy( nr, h( k, k-1 ), 1_${ik}$, v, 1_${ik}$ )
                 call stdlib${ii}$_slarfg( nr, v( 1_${ik}$ ), v( 2_${ik}$ ), 1_${ik}$, t1 )
                 if( k>m ) then
                    h( k, k-1 ) = v( 1_${ik}$ )
                    h( k+1, k-1 ) = zero
                    if( k<i-1 )h( k+2, k-1 ) = zero
                 else if( m>l ) then
                     ! ==== use the following instead of
                     ! .    h( k, k-1 ) = -h( k, k-1 ) to
                     ! .    avoid a bug when v(2) and v(3)
                     ! .    underflow. ====
                    h( k, k-1 ) = h( k, k-1 )*( one-t1 )
                 end if
                 v2 = v( 2_${ik}$ )
                 t2 = t1*v2
                 if( nr==3_${ik}$ ) then
                    v3 = v( 3_${ik}$ )
                    t3 = t1*v3
                    ! apply g from the left to transform the rows of the matrix
                    ! in columns k to i2.
                    do j = k, i2
                       sum = h( k, j ) + v2*h( k+1, j ) + v3*h( k+2, j )
                       h( k, j ) = h( k, j ) - sum*t1
                       h( k+1, j ) = h( k+1, j ) - sum*t2
                       h( k+2, j ) = h( k+2, j ) - sum*t3
                    end do
                    ! apply g from the right to transform the columns of the
                    ! matrix in rows i1 to min(k+3,i).
                    do j = i1, min( k+3, i )
                       sum = h( j, k ) + v2*h( j, k+1 ) + v3*h( j, k+2 )
                       h( j, k ) = h( j, k ) - sum*t1
                       h( j, k+1 ) = h( j, k+1 ) - sum*t2
                       h( j, k+2 ) = h( j, k+2 ) - sum*t3
                    end do
                    if( wantz ) then
                       ! accumulate transformations in the matrix z
                       do j = iloz, ihiz
                          sum = z( j, k ) + v2*z( j, k+1 ) + v3*z( j, k+2 )
                          z( j, k ) = z( j, k ) - sum*t1
                          z( j, k+1 ) = z( j, k+1 ) - sum*t2
                          z( j, k+2 ) = z( j, k+2 ) - sum*t3
                       end do
                    end if
                 else if( nr==2_${ik}$ ) then
                    ! apply g from the left to transform the rows of the matrix
                    ! in columns k to i2.
                    do j = k, i2
                       sum = h( k, j ) + v2*h( k+1, j )
                       h( k, j ) = h( k, j ) - sum*t1
                       h( k+1, j ) = h( k+1, j ) - sum*t2
                    end do
                    ! apply g from the right to transform the columns of the
                    ! matrix in rows i1 to min(k+3,i).
                    do j = i1, i
                       sum = h( j, k ) + v2*h( j, k+1 )
                       h( j, k ) = h( j, k ) - sum*t1
                       h( j, k+1 ) = h( j, k+1 ) - sum*t2
                    end do
                    if( wantz ) then
                       ! accumulate transformations in the matrix z
                       do j = iloz, ihiz
                          sum = z( j, k ) + v2*z( j, k+1 )
                          z( j, k ) = z( j, k ) - sum*t1
                          z( j, k+1 ) = z( j, k+1 ) - sum*t2
                       end do
                    end if
                 end if
              end do loop_130
           end do loop_140
           ! failure to converge in remaining number of iterations
           info = i
           return
           150 continue
           if( l==i ) then
              ! h(i,i-1) is negligible: one eigenvalue has converged.
              wr( i ) = h( i, i )
              wi( i ) = zero
           else if( l==i-1 ) then
              ! h(i-1,i-2) is negligible: a pair of eigenvalues have converged.
              ! transform the 2-by-2 submatrix to standard schur form,
              ! and compute and store the eigenvalues.
              call stdlib${ii}$_slanv2( h( i-1, i-1 ), h( i-1, i ), h( i, i-1 ),h( i, i ), wr( i-1 ), &
                        wi( i-1 ), wr( i ), wi( i ),cs, sn )
              if( wantt ) then
                 ! apply the transformation to the rest of h.
                 if( i2>i )call stdlib${ii}$_srot( i2-i, h( i-1, i+1 ), ldh, h( i, i+1 ), ldh,cs, sn )
                           
                 call stdlib${ii}$_srot( i-i1-1, h( i1, i-1 ), 1_${ik}$, h( i1, i ), 1_${ik}$, cs, sn )
              end if
              if( wantz ) then
                 ! apply the transformation to z.
                 call stdlib${ii}$_srot( nz, z( iloz, i-1 ), 1_${ik}$, z( iloz, i ), 1_${ik}$, cs, sn )
              end if
           end if
           ! reset deflation counter
           kdefl = 0_${ik}$
           ! return to start of the main loop with new value of i.
           i = l - 1_${ik}$
           go to 20
           160 continue
           return
     end subroutine stdlib${ii}$_slahqr

     pure module subroutine stdlib${ii}$_dlahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, &
     !! DLAHQR is an auxiliary routine called by DHSEQR to update the
     !! eigenvalues and Schur decomposition already computed by DHSEQR, by
     !! dealing with the Hessenberg submatrix in rows and columns ILO to
     !! IHI.
               info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, n
           integer(${ik}$), intent(out) :: info
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           real(dp), intent(inout) :: h(ldh,*), z(ldz,*)
           real(dp), intent(out) :: wi(*), wr(*)
        ! =========================================================
           ! Parameters 
           real(dp), parameter :: dat1 = 3.0_dp/4.0_dp
           real(dp), parameter :: dat2 = -0.4375_dp
           integer(${ik}$), parameter :: kexsh = 10_${ik}$
           
           
           
           ! Local Scalars 
           real(dp) :: aa, ab, ba, bb, cs, det, h11, h12, h21, h21s, h22, rt1i, rt1r, rt2i, rt2r, &
                     rtdisc, s, safmax, safmin, smlnum, sn, sum, t1, t2, t3, tr, tst, ulp, v2, v3
           integer(${ik}$) :: i, i1, i2, its, itmax, j, k, l, m, nh, nr, nz, kdefl
           ! Local Arrays 
           real(dp) :: v(3_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           ! quick return if possible
           if( n==0 )return
           if( ilo==ihi ) then
              wr( ilo ) = h( ilo, ilo )
              wi( ilo ) = zero
              return
           end if
           ! ==== clear out the trash ====
           do j = ilo, ihi - 3
              h( j+2, j ) = zero
              h( j+3, j ) = zero
           end do
           if( ilo<=ihi-2 )h( ihi, ihi-2 ) = zero
           nh = ihi - ilo + 1_${ik}$
           nz = ihiz - iloz + 1_${ik}$
           ! set machine-dependent constants for the stopping criterion.
           safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_dlabad( safmin, safmax )
           ulp = stdlib${ii}$_dlamch( 'PRECISION' )
           smlnum = safmin*( real( nh,KIND=dp) / ulp )
           ! i1 and i2 are the indices of the first row and last column of h
           ! to which transformations must be applied. if eigenvalues only are
           ! being computed, i1 and i2 are set inside the main loop.
           if( wantt ) then
              i1 = 1_${ik}$
              i2 = n
           end if
           ! itmax is the total number of qr iterations allowed.
           itmax = 30_${ik}$ * max( 10_${ik}$, nh )
           ! kdefl counts the number of iterations since a deflation
           kdefl = 0_${ik}$
           ! the main loop begins here. i is the loop index and decreases from
           ! ihi to ilo in steps of 1 or 2. each iteration of the loop works
           ! with the active submatrix in rows and columns l to i.
           ! eigenvalues i+1 to ihi have already converged. either l = ilo or
           ! h(l,l-1) is negligible so that the matrix splits.
           i = ihi
           20 continue
           l = ilo
           if( i<ilo )go to 160
           ! perform qr iterations on rows and columns ilo to i until a
           ! submatrix of order 1 or 2 splits off at the bottom because a
           ! subdiagonal element has become negligible.
           loop_140: do its = 0, itmax
              ! look for a single small subdiagonal element.
              do k = i, l + 1, -1
                 if( abs( h( k, k-1 ) )<=smlnum )go to 40
                 tst = abs( h( k-1, k-1 ) ) + abs( h( k, k ) )
                 if( tst==zero ) then
                    if( k-2>=ilo )tst = tst + abs( h( k-1, k-2 ) )
                    if( k+1<=ihi )tst = tst + abs( h( k+1, k ) )
                 end if
                 ! ==== the following is a conservative small subdiagonal
                 ! .    deflation  criterion due to ahues
                 ! .    1997). it has better mathematical foundation and
                 ! .    improves accuracy in some cases.  ====
                 if( abs( h( k, k-1 ) )<=ulp*tst ) then
                    ab = max( abs( h( k, k-1 ) ), abs( h( k-1, k ) ) )
                    ba = min( abs( h( k, k-1 ) ), abs( h( k-1, k ) ) )
                    aa = max( abs( h( k, k ) ),abs( h( k-1, k-1 )-h( k, k ) ) )
                    bb = min( abs( h( k, k ) ),abs( h( k-1, k-1 )-h( k, k ) ) )
                    s = aa + ab
                    if( ba*( ab / s )<=max( smlnum,ulp*( bb*( aa / s ) ) ) )go to 40
                 end if
              end do
              40 continue
              l = k
              if( l>ilo ) then
                 ! h(l,l-1) is negligible
                 h( l, l-1 ) = zero
              end if
              ! exit from loop if a submatrix of order 1 or 2 has split off.
              if( l>=i-1 )go to 150
              kdefl = kdefl + 1_${ik}$
              ! now the active submatrix is in rows and columns l to i. if
              ! eigenvalues only are being computed, only the active submatrix
              ! need be transformed.
              if( .not.wantt ) then
                 i1 = l
                 i2 = i
              end if
              if( mod(kdefl,2_${ik}$*kexsh)==0_${ik}$ ) then
                 ! exceptional shift.
                 s = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) )
                 h11 = dat1*s + h( i, i )
                 h12 = dat2*s
                 h21 = s
                 h22 = h11
              else if( mod(kdefl,kexsh)==0_${ik}$ ) then
                 ! exceptional shift.
                 s = abs( h( l+1, l ) ) + abs( h( l+2, l+1 ) )
                 h11 = dat1*s + h( l, l )
                 h12 = dat2*s
                 h21 = s
                 h22 = h11
              else
                 ! prepare to use francis' double shift
                 ! (i.e. 2nd degree generalized rayleigh quotient)
                 h11 = h( i-1, i-1 )
                 h21 = h( i, i-1 )
                 h12 = h( i-1, i )
                 h22 = h( i, i )
              end if
              s = abs( h11 ) + abs( h12 ) + abs( h21 ) + abs( h22 )
              if( s==zero ) then
                 rt1r = zero
                 rt1i = zero
                 rt2r = zero
                 rt2i = zero
              else
                 h11 = h11 / s
                 h21 = h21 / s
                 h12 = h12 / s
                 h22 = h22 / s
                 tr = ( h11+h22 ) / two
                 det = ( h11-tr )*( h22-tr ) - h12*h21
                 rtdisc = sqrt( abs( det ) )
                 if( det>=zero ) then
                    ! ==== complex conjugate shifts ====
                    rt1r = tr*s
                    rt2r = rt1r
                    rt1i = rtdisc*s
                    rt2i = -rt1i
                 else
                    ! ==== realshifts (use only one of them,KIND=dp)  ====
                    rt1r = tr + rtdisc
                    rt2r = tr - rtdisc
                    if( abs( rt1r-h22 )<=abs( rt2r-h22 ) ) then
                       rt1r = rt1r*s
                       rt2r = rt1r
                    else
                       rt2r = rt2r*s
                       rt1r = rt2r
                    end if
                    rt1i = zero
                    rt2i = zero
                 end if
              end if
              ! look for two consecutive small subdiagonal elements.
              do m = i - 2, l, -1
                 ! determine the effect of starting the double-shift qr
                 ! iteration at row m, and see if this would make h(m,m-1)
                 ! negligible.  (the following uses scaling to avoid
                 ! overflows and most underflows.)
                 h21s = h( m+1, m )
                 s = abs( h( m, m )-rt2r ) + abs( rt2i ) + abs( h21s )
                 h21s = h( m+1, m ) / s
                 v( 1_${ik}$ ) = h21s*h( m, m+1 ) + ( h( m, m )-rt1r )*( ( h( m, m )-rt2r ) / s ) - &
                           rt1i*( rt2i / s )
                 v( 2_${ik}$ ) = h21s*( h( m, m )+h( m+1, m+1 )-rt1r-rt2r )
                 v( 3_${ik}$ ) = h21s*h( m+2, m+1 )
                 s = abs( v( 1_${ik}$ ) ) + abs( v( 2_${ik}$ ) ) + abs( v( 3_${ik}$ ) )
                 v( 1_${ik}$ ) = v( 1_${ik}$ ) / s
                 v( 2_${ik}$ ) = v( 2_${ik}$ ) / s
                 v( 3_${ik}$ ) = v( 3_${ik}$ ) / s
                 if( m==l )go to 60
                 if( abs( h( m, m-1 ) )*( abs( v( 2_${ik}$ ) )+abs( v( 3_${ik}$ ) ) )<=ulp*abs( v( 1_${ik}$ ) )*( abs( &
                           h( m-1, m-1 ) )+abs( h( m,m ) )+abs( h( m+1, m+1 ) ) ) )go to 60
              end do
              60 continue
              ! double-shift qr step
              loop_130: do k = m, i - 1
                 ! the first iteration of this loop determines a reflection g
                 ! from the vector v and applies it from left and right to h,
                 ! thus creating a nonzero bulge below the subdiagonal.
                 ! each subsequent iteration determines a reflection g to
                 ! restore the hessenberg form in the (k-1)th column, and thus
                 ! chases the bulge one step toward the bottom of the active
                 ! submatrix. nr is the order of g.
                 nr = min( 3_${ik}$, i-k+1 )
                 if( k>m )call stdlib${ii}$_dcopy( nr, h( k, k-1 ), 1_${ik}$, v, 1_${ik}$ )
                 call stdlib${ii}$_dlarfg( nr, v( 1_${ik}$ ), v( 2_${ik}$ ), 1_${ik}$, t1 )
                 if( k>m ) then
                    h( k, k-1 ) = v( 1_${ik}$ )
                    h( k+1, k-1 ) = zero
                    if( k<i-1 )h( k+2, k-1 ) = zero
                 else if( m>l ) then
                     ! ==== use the following instead of
                     ! .    h( k, k-1 ) = -h( k, k-1 ) to
                     ! .    avoid a bug when v(2) and v(3)
                     ! .    underflow. ====
                    h( k, k-1 ) = h( k, k-1 )*( one-t1 )
                 end if
                 v2 = v( 2_${ik}$ )
                 t2 = t1*v2
                 if( nr==3_${ik}$ ) then
                    v3 = v( 3_${ik}$ )
                    t3 = t1*v3
                    ! apply g from the left to transform the rows of the matrix
                    ! in columns k to i2.
                    do j = k, i2
                       sum = h( k, j ) + v2*h( k+1, j ) + v3*h( k+2, j )
                       h( k, j ) = h( k, j ) - sum*t1
                       h( k+1, j ) = h( k+1, j ) - sum*t2
                       h( k+2, j ) = h( k+2, j ) - sum*t3
                    end do
                    ! apply g from the right to transform the columns of the
                    ! matrix in rows i1 to min(k+3,i).
                    do j = i1, min( k+3, i )
                       sum = h( j, k ) + v2*h( j, k+1 ) + v3*h( j, k+2 )
                       h( j, k ) = h( j, k ) - sum*t1
                       h( j, k+1 ) = h( j, k+1 ) - sum*t2
                       h( j, k+2 ) = h( j, k+2 ) - sum*t3
                    end do
                    if( wantz ) then
                       ! accumulate transformations in the matrix z
                       do j = iloz, ihiz
                          sum = z( j, k ) + v2*z( j, k+1 ) + v3*z( j, k+2 )
                          z( j, k ) = z( j, k ) - sum*t1
                          z( j, k+1 ) = z( j, k+1 ) - sum*t2
                          z( j, k+2 ) = z( j, k+2 ) - sum*t3
                       end do
                    end if
                 else if( nr==2_${ik}$ ) then
                    ! apply g from the left to transform the rows of the matrix
                    ! in columns k to i2.
                    do j = k, i2
                       sum = h( k, j ) + v2*h( k+1, j )
                       h( k, j ) = h( k, j ) - sum*t1
                       h( k+1, j ) = h( k+1, j ) - sum*t2
                    end do
                    ! apply g from the right to transform the columns of the
                    ! matrix in rows i1 to min(k+3,i).
                    do j = i1, i
                       sum = h( j, k ) + v2*h( j, k+1 )
                       h( j, k ) = h( j, k ) - sum*t1
                       h( j, k+1 ) = h( j, k+1 ) - sum*t2
                    end do
                    if( wantz ) then
                       ! accumulate transformations in the matrix z
                       do j = iloz, ihiz
                          sum = z( j, k ) + v2*z( j, k+1 )
                          z( j, k ) = z( j, k ) - sum*t1
                          z( j, k+1 ) = z( j, k+1 ) - sum*t2
                       end do
                    end if
                 end if
              end do loop_130
           end do loop_140
           ! failure to converge in remaining number of iterations
           info = i
           return
           150 continue
           if( l==i ) then
              ! h(i,i-1) is negligible: one eigenvalue has converged.
              wr( i ) = h( i, i )
              wi( i ) = zero
           else if( l==i-1 ) then
              ! h(i-1,i-2) is negligible: a pair of eigenvalues have converged.
              ! transform the 2-by-2 submatrix to standard schur form,
              ! and compute and store the eigenvalues.
              call stdlib${ii}$_dlanv2( h( i-1, i-1 ), h( i-1, i ), h( i, i-1 ),h( i, i ), wr( i-1 ), &
                        wi( i-1 ), wr( i ), wi( i ),cs, sn )
              if( wantt ) then
                 ! apply the transformation to the rest of h.
                 if( i2>i )call stdlib${ii}$_drot( i2-i, h( i-1, i+1 ), ldh, h( i, i+1 ), ldh,cs, sn )
                           
                 call stdlib${ii}$_drot( i-i1-1, h( i1, i-1 ), 1_${ik}$, h( i1, i ), 1_${ik}$, cs, sn )
              end if
              if( wantz ) then
                 ! apply the transformation to z.
                 call stdlib${ii}$_drot( nz, z( iloz, i-1 ), 1_${ik}$, z( iloz, i ), 1_${ik}$, cs, sn )
              end if
           end if
           ! reset deflation counter
           kdefl = 0_${ik}$
           ! return to start of the main loop with new value of i.
           i = l - 1_${ik}$
           go to 20
           160 continue
           return
     end subroutine stdlib${ii}$_dlahqr

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, &
     !! DLAHQR: is an auxiliary routine called by DHSEQR to update the
     !! eigenvalues and Schur decomposition already computed by DHSEQR, by
     !! dealing with the Hessenberg submatrix in rows and columns ILO to
     !! IHI.
               info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, n
           integer(${ik}$), intent(out) :: info
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           real(${rk}$), intent(inout) :: h(ldh,*), z(ldz,*)
           real(${rk}$), intent(out) :: wi(*), wr(*)
        ! =========================================================
           ! Parameters 
           real(${rk}$), parameter :: dat1 = 3.0_${rk}$/4.0_${rk}$
           real(${rk}$), parameter :: dat2 = -0.4375_${rk}$
           integer(${ik}$), parameter :: kexsh = 10_${ik}$
           
           
           
           ! Local Scalars 
           real(${rk}$) :: aa, ab, ba, bb, cs, det, h11, h12, h21, h21s, h22, rt1i, rt1r, rt2i, rt2r, &
                     rtdisc, s, safmax, safmin, smlnum, sn, sum, t1, t2, t3, tr, tst, ulp, v2, v3
           integer(${ik}$) :: i, i1, i2, its, itmax, j, k, l, m, nh, nr, nz, kdefl
           ! Local Arrays 
           real(${rk}$) :: v(3_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           ! quick return if possible
           if( n==0 )return
           if( ilo==ihi ) then
              wr( ilo ) = h( ilo, ilo )
              wi( ilo ) = zero
              return
           end if
           ! ==== clear out the trash ====
           do j = ilo, ihi - 3
              h( j+2, j ) = zero
              h( j+3, j ) = zero
           end do
           if( ilo<=ihi-2 )h( ihi, ihi-2 ) = zero
           nh = ihi - ilo + 1_${ik}$
           nz = ihiz - iloz + 1_${ik}$
           ! set machine-dependent constants for the stopping criterion.
           safmin = stdlib${ii}$_${ri}$lamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_${ri}$labad( safmin, safmax )
           ulp = stdlib${ii}$_${ri}$lamch( 'PRECISION' )
           smlnum = safmin*( real( nh,KIND=${rk}$) / ulp )
           ! i1 and i2 are the indices of the first row and last column of h
           ! to which transformations must be applied. if eigenvalues only are
           ! being computed, i1 and i2 are set inside the main loop.
           if( wantt ) then
              i1 = 1_${ik}$
              i2 = n
           end if
           ! itmax is the total number of qr iterations allowed.
           itmax = 30_${ik}$ * max( 10_${ik}$, nh )
           ! kdefl counts the number of iterations since a deflation
           kdefl = 0_${ik}$
           ! the main loop begins here. i is the loop index and decreases from
           ! ihi to ilo in steps of 1 or 2. each iteration of the loop works
           ! with the active submatrix in rows and columns l to i.
           ! eigenvalues i+1 to ihi have already converged. either l = ilo or
           ! h(l,l-1) is negligible so that the matrix splits.
           i = ihi
           20 continue
           l = ilo
           if( i<ilo )go to 160
           ! perform qr iterations on rows and columns ilo to i until a
           ! submatrix of order 1 or 2 splits off at the bottom because a
           ! subdiagonal element has become negligible.
           loop_140: do its = 0, itmax
              ! look for a single small subdiagonal element.
              do k = i, l + 1, -1
                 if( abs( h( k, k-1 ) )<=smlnum )go to 40
                 tst = abs( h( k-1, k-1 ) ) + abs( h( k, k ) )
                 if( tst==zero ) then
                    if( k-2>=ilo )tst = tst + abs( h( k-1, k-2 ) )
                    if( k+1<=ihi )tst = tst + abs( h( k+1, k ) )
                 end if
                 ! ==== the following is a conservative small subdiagonal
                 ! .    deflation  criterion due to ahues
                 ! .    1997). it has better mathematical foundation and
                 ! .    improves accuracy in some cases.  ====
                 if( abs( h( k, k-1 ) )<=ulp*tst ) then
                    ab = max( abs( h( k, k-1 ) ), abs( h( k-1, k ) ) )
                    ba = min( abs( h( k, k-1 ) ), abs( h( k-1, k ) ) )
                    aa = max( abs( h( k, k ) ),abs( h( k-1, k-1 )-h( k, k ) ) )
                    bb = min( abs( h( k, k ) ),abs( h( k-1, k-1 )-h( k, k ) ) )
                    s = aa + ab
                    if( ba*( ab / s )<=max( smlnum,ulp*( bb*( aa / s ) ) ) )go to 40
                 end if
              end do
              40 continue
              l = k
              if( l>ilo ) then
                 ! h(l,l-1) is negligible
                 h( l, l-1 ) = zero
              end if
              ! exit from loop if a submatrix of order 1 or 2 has split off.
              if( l>=i-1 )go to 150
              kdefl = kdefl + 1_${ik}$
              ! now the active submatrix is in rows and columns l to i. if
              ! eigenvalues only are being computed, only the active submatrix
              ! need be transformed.
              if( .not.wantt ) then
                 i1 = l
                 i2 = i
              end if
              if( mod(kdefl,2_${ik}$*kexsh)==0_${ik}$ ) then
                 ! exceptional shift.
                 s = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) )
                 h11 = dat1*s + h( i, i )
                 h12 = dat2*s
                 h21 = s
                 h22 = h11
              else if( mod(kdefl,kexsh)==0_${ik}$ ) then
                 ! exceptional shift.
                 s = abs( h( l+1, l ) ) + abs( h( l+2, l+1 ) )
                 h11 = dat1*s + h( l, l )
                 h12 = dat2*s
                 h21 = s
                 h22 = h11
              else
                 ! prepare to use francis' double shift
                 ! (i.e. 2nd degree generalized rayleigh quotient)
                 h11 = h( i-1, i-1 )
                 h21 = h( i, i-1 )
                 h12 = h( i-1, i )
                 h22 = h( i, i )
              end if
              s = abs( h11 ) + abs( h12 ) + abs( h21 ) + abs( h22 )
              if( s==zero ) then
                 rt1r = zero
                 rt1i = zero
                 rt2r = zero
                 rt2i = zero
              else
                 h11 = h11 / s
                 h21 = h21 / s
                 h12 = h12 / s
                 h22 = h22 / s
                 tr = ( h11+h22 ) / two
                 det = ( h11-tr )*( h22-tr ) - h12*h21
                 rtdisc = sqrt( abs( det ) )
                 if( det>=zero ) then
                    ! ==== complex conjugate shifts ====
                    rt1r = tr*s
                    rt2r = rt1r
                    rt1i = rtdisc*s
                    rt2i = -rt1i
                 else
                    ! ==== realshifts (use only one of them,KIND=${rk}$)  ====
                    rt1r = tr + rtdisc
                    rt2r = tr - rtdisc
                    if( abs( rt1r-h22 )<=abs( rt2r-h22 ) ) then
                       rt1r = rt1r*s
                       rt2r = rt1r
                    else
                       rt2r = rt2r*s
                       rt1r = rt2r
                    end if
                    rt1i = zero
                    rt2i = zero
                 end if
              end if
              ! look for two consecutive small subdiagonal elements.
              do m = i - 2, l, -1
                 ! determine the effect of starting the double-shift qr
                 ! iteration at row m, and see if this would make h(m,m-1)
                 ! negligible.  (the following uses scaling to avoid
                 ! overflows and most underflows.)
                 h21s = h( m+1, m )
                 s = abs( h( m, m )-rt2r ) + abs( rt2i ) + abs( h21s )
                 h21s = h( m+1, m ) / s
                 v( 1_${ik}$ ) = h21s*h( m, m+1 ) + ( h( m, m )-rt1r )*( ( h( m, m )-rt2r ) / s ) - &
                           rt1i*( rt2i / s )
                 v( 2_${ik}$ ) = h21s*( h( m, m )+h( m+1, m+1 )-rt1r-rt2r )
                 v( 3_${ik}$ ) = h21s*h( m+2, m+1 )
                 s = abs( v( 1_${ik}$ ) ) + abs( v( 2_${ik}$ ) ) + abs( v( 3_${ik}$ ) )
                 v( 1_${ik}$ ) = v( 1_${ik}$ ) / s
                 v( 2_${ik}$ ) = v( 2_${ik}$ ) / s
                 v( 3_${ik}$ ) = v( 3_${ik}$ ) / s
                 if( m==l )go to 60
                 if( abs( h( m, m-1 ) )*( abs( v( 2_${ik}$ ) )+abs( v( 3_${ik}$ ) ) )<=ulp*abs( v( 1_${ik}$ ) )*( abs( &
                           h( m-1, m-1 ) )+abs( h( m,m ) )+abs( h( m+1, m+1 ) ) ) )go to 60
              end do
              60 continue
              ! double-shift qr step
              loop_130: do k = m, i - 1
                 ! the first iteration of this loop determines a reflection g
                 ! from the vector v and applies it from left and right to h,
                 ! thus creating a nonzero bulge below the subdiagonal.
                 ! each subsequent iteration determines a reflection g to
                 ! restore the hessenberg form in the (k-1)th column, and thus
                 ! chases the bulge one step toward the bottom of the active
                 ! submatrix. nr is the order of g.
                 nr = min( 3_${ik}$, i-k+1 )
                 if( k>m )call stdlib${ii}$_${ri}$copy( nr, h( k, k-1 ), 1_${ik}$, v, 1_${ik}$ )
                 call stdlib${ii}$_${ri}$larfg( nr, v( 1_${ik}$ ), v( 2_${ik}$ ), 1_${ik}$, t1 )
                 if( k>m ) then
                    h( k, k-1 ) = v( 1_${ik}$ )
                    h( k+1, k-1 ) = zero
                    if( k<i-1 )h( k+2, k-1 ) = zero
                 else if( m>l ) then
                     ! ==== use the following instead of
                     ! .    h( k, k-1 ) = -h( k, k-1 ) to
                     ! .    avoid a bug when v(2) and v(3)
                     ! .    underflow. ====
                    h( k, k-1 ) = h( k, k-1 )*( one-t1 )
                 end if
                 v2 = v( 2_${ik}$ )
                 t2 = t1*v2
                 if( nr==3_${ik}$ ) then
                    v3 = v( 3_${ik}$ )
                    t3 = t1*v3
                    ! apply g from the left to transform the rows of the matrix
                    ! in columns k to i2.
                    do j = k, i2
                       sum = h( k, j ) + v2*h( k+1, j ) + v3*h( k+2, j )
                       h( k, j ) = h( k, j ) - sum*t1
                       h( k+1, j ) = h( k+1, j ) - sum*t2
                       h( k+2, j ) = h( k+2, j ) - sum*t3
                    end do
                    ! apply g from the right to transform the columns of the
                    ! matrix in rows i1 to min(k+3,i).
                    do j = i1, min( k+3, i )
                       sum = h( j, k ) + v2*h( j, k+1 ) + v3*h( j, k+2 )
                       h( j, k ) = h( j, k ) - sum*t1
                       h( j, k+1 ) = h( j, k+1 ) - sum*t2
                       h( j, k+2 ) = h( j, k+2 ) - sum*t3
                    end do
                    if( wantz ) then
                       ! accumulate transformations in the matrix z
                       do j = iloz, ihiz
                          sum = z( j, k ) + v2*z( j, k+1 ) + v3*z( j, k+2 )
                          z( j, k ) = z( j, k ) - sum*t1
                          z( j, k+1 ) = z( j, k+1 ) - sum*t2
                          z( j, k+2 ) = z( j, k+2 ) - sum*t3
                       end do
                    end if
                 else if( nr==2_${ik}$ ) then
                    ! apply g from the left to transform the rows of the matrix
                    ! in columns k to i2.
                    do j = k, i2
                       sum = h( k, j ) + v2*h( k+1, j )
                       h( k, j ) = h( k, j ) - sum*t1
                       h( k+1, j ) = h( k+1, j ) - sum*t2
                    end do
                    ! apply g from the right to transform the columns of the
                    ! matrix in rows i1 to min(k+3,i).
                    do j = i1, i
                       sum = h( j, k ) + v2*h( j, k+1 )
                       h( j, k ) = h( j, k ) - sum*t1
                       h( j, k+1 ) = h( j, k+1 ) - sum*t2
                    end do
                    if( wantz ) then
                       ! accumulate transformations in the matrix z
                       do j = iloz, ihiz
                          sum = z( j, k ) + v2*z( j, k+1 )
                          z( j, k ) = z( j, k ) - sum*t1
                          z( j, k+1 ) = z( j, k+1 ) - sum*t2
                       end do
                    end if
                 end if
              end do loop_130
           end do loop_140
           ! failure to converge in remaining number of iterations
           info = i
           return
           150 continue
           if( l==i ) then
              ! h(i,i-1) is negligible: one eigenvalue has converged.
              wr( i ) = h( i, i )
              wi( i ) = zero
           else if( l==i-1 ) then
              ! h(i-1,i-2) is negligible: a pair of eigenvalues have converged.
              ! transform the 2-by-2 submatrix to standard schur form,
              ! and compute and store the eigenvalues.
              call stdlib${ii}$_${ri}$lanv2( h( i-1, i-1 ), h( i-1, i ), h( i, i-1 ),h( i, i ), wr( i-1 ), &
                        wi( i-1 ), wr( i ), wi( i ),cs, sn )
              if( wantt ) then
                 ! apply the transformation to the rest of h.
                 if( i2>i )call stdlib${ii}$_${ri}$rot( i2-i, h( i-1, i+1 ), ldh, h( i, i+1 ), ldh,cs, sn )
                           
                 call stdlib${ii}$_${ri}$rot( i-i1-1, h( i1, i-1 ), 1_${ik}$, h( i1, i ), 1_${ik}$, cs, sn )
              end if
              if( wantz ) then
                 ! apply the transformation to z.
                 call stdlib${ii}$_${ri}$rot( nz, z( iloz, i-1 ), 1_${ik}$, z( iloz, i ), 1_${ik}$, cs, sn )
              end if
           end if
           ! reset deflation counter
           kdefl = 0_${ik}$
           ! return to start of the main loop with new value of i.
           i = l - 1_${ik}$
           go to 20
           160 continue
           return
     end subroutine stdlib${ii}$_${ri}$lahqr

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_clahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, info &
     !! CLAHQR is an auxiliary routine called by CHSEQR to update the
     !! eigenvalues and Schur decomposition already computed by CHSEQR, by
     !! dealing with the Hessenberg submatrix in rows and columns ILO to
     !! IHI.
               )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, n
           integer(${ik}$), intent(out) :: info
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           complex(sp), intent(inout) :: h(ldh,*), z(ldz,*)
           complex(sp), intent(out) :: w(*)
        ! =========================================================
           ! Parameters 
           real(sp), parameter :: dat1 = 3.0_sp/4.0_sp
           integer(${ik}$), parameter :: kexsh = 10_${ik}$
           
           
           
           
           ! Local Scalars 
           complex(sp) :: cdum, h11, h11s, h22, sc, sum, t, t1, temp, u, v2, x, y
           real(sp) :: aa, ab, ba, bb, h10, h21, rtemp, s, safmax, safmin, smlnum, sx, t2, tst, &
                     ulp
           integer(${ik}$) :: i, i1, i2, its, itmax, j, jhi, jlo, k, l, m, nh, nz, kdefl
           ! Local Arrays 
           complex(sp) :: v(2_${ik}$)
           ! Statement Functions 
           real(sp) :: cabs1
           ! Intrinsic Functions 
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           info = 0_${ik}$
           ! quick return if possible
           if( n==0 )return
           if( ilo==ihi ) then
              w( ilo ) = h( ilo, ilo )
              return
           end if
           ! ==== clear out the trash ====
           do j = ilo, ihi - 3
              h( j+2, j ) = czero
              h( j+3, j ) = czero
           end do
           if( ilo<=ihi-2 )h( ihi, ihi-2 ) = czero
           ! ==== ensure that subdiagonal entries are real ====
           if( wantt ) then
              jlo = 1_${ik}$
              jhi = n
           else
              jlo = ilo
              jhi = ihi
           end if
           do i = ilo + 1, ihi
              if( aimag( h( i, i-1 ) )/=zero ) then
                 ! ==== the following redundant normalization
                 ! .    avoids problems with both gradual and
                 ! .    sudden underflow in abs(h(i,i-1)) ====
                 sc = h( i, i-1 ) / cabs1( h( i, i-1 ) )
                 sc = conjg( sc ) / abs( sc )
                 h( i, i-1 ) = abs( h( i, i-1 ) )
                 call stdlib${ii}$_cscal( jhi-i+1, sc, h( i, i ), ldh )
                 call stdlib${ii}$_cscal( min( jhi, i+1 )-jlo+1, conjg( sc ), h( jlo, i ),1_${ik}$ )
                 if( wantz )call stdlib${ii}$_cscal( ihiz-iloz+1, conjg( sc ), z( iloz, i ), 1_${ik}$ )
              end if
           end do
           nh = ihi - ilo + 1_${ik}$
           nz = ihiz - iloz + 1_${ik}$
           ! set machine-dependent constants for the stopping criterion.
           safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_slabad( safmin, safmax )
           ulp = stdlib${ii}$_slamch( 'PRECISION' )
           smlnum = safmin*( real( nh,KIND=sp) / ulp )
           ! i1 and i2 are the indices of the first row and last column of h
           ! to which transformations must be applied. if eigenvalues only are
           ! being computed, i1 and i2 are set inside the main loop.
           if( wantt ) then
              i1 = 1_${ik}$
              i2 = n
           end if
           ! itmax is the total number of qr iterations allowed.
           itmax = 30_${ik}$ * max( 10_${ik}$, nh )
           ! kdefl counts the number of iterations since a deflation
           kdefl = 0_${ik}$
           ! the main loop begins here. i is the loop index and decreases from
           ! ihi to ilo in steps of 1. each iteration of the loop works
           ! with the active submatrix in rows and columns l to i.
           ! eigenvalues i+1 to ihi have already converged. either l = ilo, or
           ! h(l,l-1) is negligible so that the matrix splits.
           i = ihi
           30 continue
           if( i<ilo )go to 150
           ! perform qr iterations on rows and columns ilo to i until a
           ! submatrix of order 1 splits off at the bottom because a
           ! subdiagonal element has become negligible.
           l = ilo
           loop_130: do its = 0, itmax
              ! look for a single small subdiagonal element.
              do k = i, l + 1, -1
                 if( cabs1( h( k, k-1 ) )<=smlnum )go to 50
                 tst = cabs1( h( k-1, k-1 ) ) + cabs1( h( k, k ) )
                 if( tst==czero ) then
                    if( k-2>=ilo )tst = tst + abs( real( h( k-1, k-2 ),KIND=sp) )
                    if( k+1<=ihi )tst = tst + abs( real( h( k+1, k ),KIND=sp) )
                 end if
                 ! ==== the following is a conservative small subdiagonal
                 ! .    deflation criterion due to ahues
                 ! .    1997). it has better mathematical foundation and
                 ! .    improves accuracy in some examples.  ====
                 if( abs( real( h( k, k-1 ),KIND=sp) )<=ulp*tst ) then
                    ab = max( cabs1( h( k, k-1 ) ), cabs1( h( k-1, k ) ) )
                    ba = min( cabs1( h( k, k-1 ) ), cabs1( h( k-1, k ) ) )
                    aa = max( cabs1( h( k, k ) ),cabs1( h( k-1, k-1 )-h( k, k ) ) )
                    bb = min( cabs1( h( k, k ) ),cabs1( h( k-1, k-1 )-h( k, k ) ) )
                    s = aa + ab
                    if( ba*( ab / s )<=max( smlnum,ulp*( bb*( aa / s ) ) ) )go to 50
                 end if
              end do
              50 continue
              l = k
              if( l>ilo ) then
                 ! h(l,l-1) is negligible
                 h( l, l-1 ) = czero
              end if
              ! exit from loop if a submatrix of order 1 has split off.
              if( l>=i )go to 140
              kdefl = kdefl + 1_${ik}$
              ! now the active submatrix is in rows and columns l to i. if
              ! eigenvalues only are being computed, only the active submatrix
              ! need be transformed.
              if( .not.wantt ) then
                 i1 = l
                 i2 = i
              end if
              if( mod(kdefl,2_${ik}$*kexsh)==0_${ik}$ ) then
                 ! exceptional shift.
                 s = dat1*abs( real( h( i, i-1 ),KIND=sp) )
                 t = s + h( i, i )
              else if( mod(kdefl,kexsh)==0_${ik}$ ) then
                 ! exceptional shift.
                 s = dat1*abs( real( h( l+1, l ),KIND=sp) )
                 t = s + h( l, l )
              else
                 ! wilkinson's shift.
                 t = h( i, i )
                 u = sqrt( h( i-1, i ) )*sqrt( h( i, i-1 ) )
                 s = cabs1( u )
                 if( s/=zero ) then
                    x = half*( h( i-1, i-1 )-t )
                    sx = cabs1( x )
                    s = max( s, cabs1( x ) )
                    y = s*sqrt( ( x / s )**2_${ik}$+( u / s )**2_${ik}$ )
                    if( sx>zero ) then
                       if( real( x / sx,KIND=sp)*real( y,KIND=sp)+aimag( x / sx )*aimag( y )&
                                 <zero )y = -y
                    end if
                    t = t - u*stdlib${ii}$_cladiv( u, ( x+y ) )
                 end if
              end if
              ! look for two consecutive small subdiagonal elements.
              do m = i - 1, l + 1, -1
                 ! determine the effect of starting the single-shift qr
                 ! iteration at row m, and see if this would make h(m,m-1)
                 ! negligible.
                 h11 = h( m, m )
                 h22 = h( m+1, m+1 )
                 h11s = h11 - t
                 h21 = real( h( m+1, m ),KIND=sp)
                 s = cabs1( h11s ) + abs( h21 )
                 h11s = h11s / s
                 h21 = h21 / s
                 v( 1_${ik}$ ) = h11s
                 v( 2_${ik}$ ) = h21
                 h10 = real( h( m, m-1 ),KIND=sp)
                 if( abs( h10 )*abs( h21 )<=ulp*( cabs1( h11s )*( cabs1( h11 )+cabs1( h22 ) ) ) )&
                           go to 70
              end do
              h11 = h( l, l )
              h22 = h( l+1, l+1 )
              h11s = h11 - t
              h21 = real( h( l+1, l ),KIND=sp)
              s = cabs1( h11s ) + abs( h21 )
              h11s = h11s / s
              h21 = h21 / s
              v( 1_${ik}$ ) = h11s
              v( 2_${ik}$ ) = h21
              70 continue
              ! single-shift qr step
              loop_120: do k = m, i - 1
                 ! the first iteration of this loop determines a reflection g
                 ! from the vector v and applies it from left and right to h,
                 ! thus creating a nonzero bulge below the subdiagonal.
                 ! each subsequent iteration determines a reflection g to
                 ! restore the hessenberg form in the (k-1)th column, and thus
                 ! chases the bulge cone step toward the bottom of the active
                 ! submatrix.
                 ! v(2) is always real before the call to stdlib${ii}$_clarfg, and hence
                 ! after the call t2 ( = t1*v(2) ) is also real.
                 if( k>m )call stdlib${ii}$_ccopy( 2_${ik}$, h( k, k-1 ), 1_${ik}$, v, 1_${ik}$ )
                 call stdlib${ii}$_clarfg( 2_${ik}$, v( 1_${ik}$ ), v( 2_${ik}$ ), 1_${ik}$, t1 )
                 if( k>m ) then
                    h( k, k-1 ) = v( 1_${ik}$ )
                    h( k+1, k-1 ) = czero
                 end if
                 v2 = v( 2_${ik}$ )
                 t2 = real( t1*v2,KIND=sp)
                 ! apply g from the left to transform the rows of the matrix
                 ! in columns k to i2.
                 do j = k, i2
                    sum = conjg( t1 )*h( k, j ) + t2*h( k+1, j )
                    h( k, j ) = h( k, j ) - sum
                    h( k+1, j ) = h( k+1, j ) - sum*v2
                 end do
                 ! apply g from the right to transform the columns of the
                 ! matrix in rows i1 to min(k+2,i).
                 do j = i1, min( k+2, i )
                    sum = t1*h( j, k ) + t2*h( j, k+1 )
                    h( j, k ) = h( j, k ) - sum
                    h( j, k+1 ) = h( j, k+1 ) - sum*conjg( v2 )
                 end do
                 if( wantz ) then
                    ! accumulate transformations in the matrix z
                    do j = iloz, ihiz
                       sum = t1*z( j, k ) + t2*z( j, k+1 )
                       z( j, k ) = z( j, k ) - sum
                       z( j, k+1 ) = z( j, k+1 ) - sum*conjg( v2 )
                    end do
                 end if
                 if( k==m .and. m>l ) then
                    ! if the qr step was started at row m > l because two
                    ! consecutive small subdiagonals were found, then extra
                    ! scaling must be performed to ensure that h(m,m-1) remains
                    ! real.
                    temp = cone - t1
                    temp = temp / abs( temp )
                    h( m+1, m ) = h( m+1, m )*conjg( temp )
                    if( m+2<=i )h( m+2, m+1 ) = h( m+2, m+1 )*temp
                    do j = m, i
                       if( j/=m+1 ) then
                          if( i2>j )call stdlib${ii}$_cscal( i2-j, temp, h( j, j+1 ), ldh )
                          call stdlib${ii}$_cscal( j-i1, conjg( temp ), h( i1, j ), 1_${ik}$ )
                          if( wantz ) then
                             call stdlib${ii}$_cscal( nz, conjg( temp ), z( iloz, j ), 1_${ik}$ )
                          end if
                       end if
                    end do
                 end if
              end do loop_120
              ! ensure that h(i,i-1) is real.
              temp = h( i, i-1 )
              if( aimag( temp )/=zero ) then
                 rtemp = abs( temp )
                 h( i, i-1 ) = rtemp
                 temp = temp / rtemp
                 if( i2>i )call stdlib${ii}$_cscal( i2-i, conjg( temp ), h( i, i+1 ), ldh )
                 call stdlib${ii}$_cscal( i-i1, temp, h( i1, i ), 1_${ik}$ )
                 if( wantz ) then
                    call stdlib${ii}$_cscal( nz, temp, z( iloz, i ), 1_${ik}$ )
                 end if
              end if
           end do loop_130
           ! failure to converge in remaining number of iterations
           info = i
           return
           140 continue
           ! h(i,i-1) is negligible: cone eigenvalue has converged.
           w( i ) = h( i, i )
           ! reset deflation counter
           kdefl = 0_${ik}$
           ! return to start of the main loop with new value of i.
           i = l - 1_${ik}$
           go to 30
           150 continue
           return
     end subroutine stdlib${ii}$_clahqr

     pure module subroutine stdlib${ii}$_zlahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, info &
     !! ZLAHQR is an auxiliary routine called by CHSEQR to update the
     !! eigenvalues and Schur decomposition already computed by CHSEQR, by
     !! dealing with the Hessenberg submatrix in rows and columns ILO to
     !! IHI.
               )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, n
           integer(${ik}$), intent(out) :: info
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           complex(dp), intent(inout) :: h(ldh,*), z(ldz,*)
           complex(dp), intent(out) :: w(*)
        ! =========================================================
           ! Parameters 
           real(dp), parameter :: dat1 = 3.0_dp/4.0_dp
           integer(${ik}$), parameter :: kexsh = 10_${ik}$
           
           
           
           
           ! Local Scalars 
           complex(dp) :: cdum, h11, h11s, h22, sc, sum, t, t1, temp, u, v2, x, y
           real(dp) :: aa, ab, ba, bb, h10, h21, rtemp, s, safmax, safmin, smlnum, sx, t2, tst, &
                     ulp
           integer(${ik}$) :: i, i1, i2, its, itmax, j, jhi, jlo, k, l, m, nh, nz, kdefl
           ! Local Arrays 
           complex(dp) :: v(2_${ik}$)
           ! Statement Functions 
           real(dp) :: cabs1
           ! Intrinsic Functions 
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           info = 0_${ik}$
           ! quick return if possible
           if( n==0 )return
           if( ilo==ihi ) then
              w( ilo ) = h( ilo, ilo )
              return
           end if
           ! ==== clear out the trash ====
           do j = ilo, ihi - 3
              h( j+2, j ) = czero
              h( j+3, j ) = czero
           end do
           if( ilo<=ihi-2 )h( ihi, ihi-2 ) = czero
           ! ==== ensure that subdiagonal entries are real ====
           if( wantt ) then
              jlo = 1_${ik}$
              jhi = n
           else
              jlo = ilo
              jhi = ihi
           end if
           do i = ilo + 1, ihi
              if( aimag( h( i, i-1 ) )/=zero ) then
                 ! ==== the following redundant normalization
                 ! .    avoids problems with both gradual and
                 ! .    sudden underflow in abs(h(i,i-1)) ====
                 sc = h( i, i-1 ) / cabs1( h( i, i-1 ) )
                 sc = conjg( sc ) / abs( sc )
                 h( i, i-1 ) = abs( h( i, i-1 ) )
                 call stdlib${ii}$_zscal( jhi-i+1, sc, h( i, i ), ldh )
                 call stdlib${ii}$_zscal( min( jhi, i+1 )-jlo+1, conjg( sc ),h( jlo, i ), 1_${ik}$ )
                 if( wantz )call stdlib${ii}$_zscal( ihiz-iloz+1, conjg( sc ), z( iloz, i ), 1_${ik}$ )
              end if
           end do
           nh = ihi - ilo + 1_${ik}$
           nz = ihiz - iloz + 1_${ik}$
           ! set machine-dependent constants for the stopping criterion.
           safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_dlabad( safmin, safmax )
           ulp = stdlib${ii}$_dlamch( 'PRECISION' )
           smlnum = safmin*( real( nh,KIND=dp) / ulp )
           ! i1 and i2 are the indices of the first row and last column of h
           ! to which transformations must be applied. if eigenvalues only are
           ! being computed, i1 and i2 are set inside the main loop.
           if( wantt ) then
              i1 = 1_${ik}$
              i2 = n
           end if
           ! itmax is the total number of qr iterations allowed.
           itmax = 30_${ik}$ * max( 10_${ik}$, nh )
           ! kdefl counts the number of iterations since a deflation
           kdefl = 0_${ik}$
           ! the main loop begins here. i is the loop index and decreases from
           ! ihi to ilo in steps of 1. each iteration of the loop works
           ! with the active submatrix in rows and columns l to i.
           ! eigenvalues i+1 to ihi have already converged. either l = ilo, or
           ! h(l,l-1) is negligible so that the matrix splits.
           i = ihi
           30 continue
           if( i<ilo )go to 150
           ! perform qr iterations on rows and columns ilo to i until a
           ! submatrix of order 1 splits off at the bottom because a
           ! subdiagonal element has become negligible.
           l = ilo
           loop_130: do its = 0, itmax
              ! look for a single small subdiagonal element.
              do k = i, l + 1, -1
                 if( cabs1( h( k, k-1 ) )<=smlnum )go to 50
                 tst = cabs1( h( k-1, k-1 ) ) + cabs1( h( k, k ) )
                 if( tst==czero ) then
                    if( k-2>=ilo )tst = tst + abs( real( h( k-1, k-2 ),KIND=dp) )
                    if( k+1<=ihi )tst = tst + abs( real( h( k+1, k ),KIND=dp) )
                 end if
                 ! ==== the following is a conservative small subdiagonal
                 ! .    deflation criterion due to ahues
                 ! .    1997). it has better mathematical foundation and
                 ! .    improves accuracy in some examples.  ====
                 if( abs( real( h( k, k-1 ),KIND=dp) )<=ulp*tst ) then
                    ab = max( cabs1( h( k, k-1 ) ), cabs1( h( k-1, k ) ) )
                    ba = min( cabs1( h( k, k-1 ) ), cabs1( h( k-1, k ) ) )
                    aa = max( cabs1( h( k, k ) ),cabs1( h( k-1, k-1 )-h( k, k ) ) )
                    bb = min( cabs1( h( k, k ) ),cabs1( h( k-1, k-1 )-h( k, k ) ) )
                    s = aa + ab
                    if( ba*( ab / s )<=max( smlnum,ulp*( bb*( aa / s ) ) ) )go to 50
                 end if
              end do
              50 continue
              l = k
              if( l>ilo ) then
                 ! h(l,l-1) is negligible
                 h( l, l-1 ) = czero
              end if
              ! exit from loop if a submatrix of order 1 has split off.
              if( l>=i )go to 140
              kdefl = kdefl + 1_${ik}$
              ! now the active submatrix is in rows and columns l to i. if
              ! eigenvalues only are being computed, only the active submatrix
              ! need be transformed.
              if( .not.wantt ) then
                 i1 = l
                 i2 = i
              end if
              if( mod(kdefl,2_${ik}$*kexsh)==0_${ik}$ ) then
                 ! exceptional shift.
                 s = dat1*abs( real( h( i, i-1 ),KIND=dp) )
                 t = s + h( i, i )
              else if( mod(kdefl,kexsh)==0_${ik}$ ) then
                 ! exceptional shift.
                 s = dat1*abs( real( h( l+1, l ),KIND=dp) )
                 t = s + h( l, l )
              else
                 ! wilkinson's shift.
                 t = h( i, i )
                 u = sqrt( h( i-1, i ) )*sqrt( h( i, i-1 ) )
                 s = cabs1( u )
                 if( s/=zero ) then
                    x = half*( h( i-1, i-1 )-t )
                    sx = cabs1( x )
                    s = max( s, cabs1( x ) )
                    y = s*sqrt( ( x / s )**2_${ik}$+( u / s )**2_${ik}$ )
                    if( sx>zero ) then
                       if( real( x / sx,KIND=dp)*real( y,KIND=dp)+aimag( x / sx )*aimag( y )&
                                 <zero )y = -y
                    end if
                    t = t - u*stdlib${ii}$_zladiv( u, ( x+y ) )
                 end if
              end if
              ! look for two consecutive small subdiagonal elements.
              do m = i - 1, l + 1, -1
                 ! determine the effect of starting the single-shift qr
                 ! iteration at row m, and see if this would make h(m,m-1)
                 ! negligible.
                 h11 = h( m, m )
                 h22 = h( m+1, m+1 )
                 h11s = h11 - t
                 h21 = real( h( m+1, m ),KIND=dp)
                 s = cabs1( h11s ) + abs( h21 )
                 h11s = h11s / s
                 h21 = h21 / s
                 v( 1_${ik}$ ) = h11s
                 v( 2_${ik}$ ) = h21
                 h10 = real( h( m, m-1 ),KIND=dp)
                 if( abs( h10 )*abs( h21 )<=ulp*( cabs1( h11s )*( cabs1( h11 )+cabs1( h22 ) ) ) )&
                           go to 70
              end do
              h11 = h( l, l )
              h22 = h( l+1, l+1 )
              h11s = h11 - t
              h21 = real( h( l+1, l ),KIND=dp)
              s = cabs1( h11s ) + abs( h21 )
              h11s = h11s / s
              h21 = h21 / s
              v( 1_${ik}$ ) = h11s
              v( 2_${ik}$ ) = h21
              70 continue
              ! single-shift qr step
              loop_120: do k = m, i - 1
                 ! the first iteration of this loop determines a reflection g
                 ! from the vector v and applies it from left and right to h,
                 ! thus creating a nonzero bulge below the subdiagonal.
                 ! each subsequent iteration determines a reflection g to
                 ! restore the hessenberg form in the (k-1)th column, and thus
                 ! chases the bulge cone step toward the bottom of the active
                 ! submatrix.
                 ! v(2) is always real before the call to stdlib${ii}$_zlarfg, and hence
                 ! after the call t2 ( = t1*v(2) ) is also real.
                 if( k>m )call stdlib${ii}$_zcopy( 2_${ik}$, h( k, k-1 ), 1_${ik}$, v, 1_${ik}$ )
                 call stdlib${ii}$_zlarfg( 2_${ik}$, v( 1_${ik}$ ), v( 2_${ik}$ ), 1_${ik}$, t1 )
                 if( k>m ) then
                    h( k, k-1 ) = v( 1_${ik}$ )
                    h( k+1, k-1 ) = czero
                 end if
                 v2 = v( 2_${ik}$ )
                 t2 = real( t1*v2,KIND=dp)
                 ! apply g from the left to transform the rows of the matrix
                 ! in columns k to i2.
                 do j = k, i2
                    sum = conjg( t1 )*h( k, j ) + t2*h( k+1, j )
                    h( k, j ) = h( k, j ) - sum
                    h( k+1, j ) = h( k+1, j ) - sum*v2
                 end do
                 ! apply g from the right to transform the columns of the
                 ! matrix in rows i1 to min(k+2,i).
                 do j = i1, min( k+2, i )
                    sum = t1*h( j, k ) + t2*h( j, k+1 )
                    h( j, k ) = h( j, k ) - sum
                    h( j, k+1 ) = h( j, k+1 ) - sum*conjg( v2 )
                 end do
                 if( wantz ) then
                    ! accumulate transformations in the matrix z
                    do j = iloz, ihiz
                       sum = t1*z( j, k ) + t2*z( j, k+1 )
                       z( j, k ) = z( j, k ) - sum
                       z( j, k+1 ) = z( j, k+1 ) - sum*conjg( v2 )
                    end do
                 end if
                 if( k==m .and. m>l ) then
                    ! if the qr step was started at row m > l because two
                    ! consecutive small subdiagonals were found, then extra
                    ! scaling must be performed to ensure that h(m,m-1) remains
                    ! real.
                    temp = cone - t1
                    temp = temp / abs( temp )
                    h( m+1, m ) = h( m+1, m )*conjg( temp )
                    if( m+2<=i )h( m+2, m+1 ) = h( m+2, m+1 )*temp
                    do j = m, i
                       if( j/=m+1 ) then
                          if( i2>j )call stdlib${ii}$_zscal( i2-j, temp, h( j, j+1 ), ldh )
                          call stdlib${ii}$_zscal( j-i1, conjg( temp ), h( i1, j ), 1_${ik}$ )
                          if( wantz ) then
                             call stdlib${ii}$_zscal( nz, conjg( temp ), z( iloz, j ),1_${ik}$ )
                          end if
                       end if
                    end do
                 end if
              end do loop_120
              ! ensure that h(i,i-1) is real.
              temp = h( i, i-1 )
              if( aimag( temp )/=zero ) then
                 rtemp = abs( temp )
                 h( i, i-1 ) = rtemp
                 temp = temp / rtemp
                 if( i2>i )call stdlib${ii}$_zscal( i2-i, conjg( temp ), h( i, i+1 ), ldh )
                 call stdlib${ii}$_zscal( i-i1, temp, h( i1, i ), 1_${ik}$ )
                 if( wantz ) then
                    call stdlib${ii}$_zscal( nz, temp, z( iloz, i ), 1_${ik}$ )
                 end if
              end if
           end do loop_130
           ! failure to converge in remaining number of iterations
           info = i
           return
           140 continue
           ! h(i,i-1) is negligible: cone eigenvalue has converged.
           w( i ) = h( i, i )
           ! reset deflation counter
           kdefl = 0_${ik}$
           ! return to start of the main loop with new value of i.
           i = l - 1_${ik}$
           go to 30
           150 continue
           return
     end subroutine stdlib${ii}$_zlahqr

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$lahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, info &
     !! ZLAHQR: is an auxiliary routine called by CHSEQR to update the
     !! eigenvalues and Schur decomposition already computed by CHSEQR, by
     !! dealing with the Hessenberg submatrix in rows and columns ILO to
     !! IHI.
               )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, n
           integer(${ik}$), intent(out) :: info
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: h(ldh,*), z(ldz,*)
           complex(${ck}$), intent(out) :: w(*)
        ! =========================================================
           ! Parameters 
           real(${ck}$), parameter :: dat1 = 3.0_${ck}$/4.0_${ck}$
           integer(${ik}$), parameter :: kexsh = 10_${ik}$
           
           ! Local Scalars 
           complex(${ck}$) :: cdum, h11, h11s, h22, sc, sum, t, t1, temp, u, v2, x, y
           real(${ck}$) :: aa, ab, ba, bb, h10, h21, rtemp, s, safmax, safmin, smlnum, sx, t2, tst, &
                     ulp
           integer(${ik}$) :: i, i1, i2, its, itmax, j, jhi, jlo, k, l, m, nh, nz, kdefl
           ! Local Arrays 
           complex(${ck}$) :: v(2_${ik}$)
           ! Statement Functions 
           real(${ck}$) :: cabs1
           ! Intrinsic Functions 
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=${ck}$) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           info = 0_${ik}$
           ! quick return if possible
           if( n==0 )return
           if( ilo==ihi ) then
              w( ilo ) = h( ilo, ilo )
              return
           end if
           ! ==== clear out the trash ====
           do j = ilo, ihi - 3
              h( j+2, j ) = czero
              h( j+3, j ) = czero
           end do
           if( ilo<=ihi-2 )h( ihi, ihi-2 ) = czero
           ! ==== ensure that subdiagonal entries are real ====
           if( wantt ) then
              jlo = 1_${ik}$
              jhi = n
           else
              jlo = ilo
              jhi = ihi
           end if
           do i = ilo + 1, ihi
              if( aimag( h( i, i-1 ) )/=zero ) then
                 ! ==== the following redundant normalization
                 ! .    avoids problems with both gradual and
                 ! .    sudden underflow in abs(h(i,i-1)) ====
                 sc = h( i, i-1 ) / cabs1( h( i, i-1 ) )
                 sc = conjg( sc ) / abs( sc )
                 h( i, i-1 ) = abs( h( i, i-1 ) )
                 call stdlib${ii}$_${ci}$scal( jhi-i+1, sc, h( i, i ), ldh )
                 call stdlib${ii}$_${ci}$scal( min( jhi, i+1 )-jlo+1, conjg( sc ),h( jlo, i ), 1_${ik}$ )
                 if( wantz )call stdlib${ii}$_${ci}$scal( ihiz-iloz+1, conjg( sc ), z( iloz, i ), 1_${ik}$ )
              end if
           end do
           nh = ihi - ilo + 1_${ik}$
           nz = ihiz - iloz + 1_${ik}$
           ! set machine-dependent constants for the stopping criterion.
           safmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_${c2ri(ci)}$labad( safmin, safmax )
           ulp = stdlib${ii}$_${c2ri(ci)}$lamch( 'PRECISION' )
           smlnum = safmin*( real( nh,KIND=${ck}$) / ulp )
           ! i1 and i2 are the indices of the first row and last column of h
           ! to which transformations must be applied. if eigenvalues only are
           ! being computed, i1 and i2 are set inside the main loop.
           if( wantt ) then
              i1 = 1_${ik}$
              i2 = n
           end if
           ! itmax is the total number of qr iterations allowed.
           itmax = 30_${ik}$ * max( 10_${ik}$, nh )
           ! kdefl counts the number of iterations since a deflation
           kdefl = 0_${ik}$
           ! the main loop begins here. i is the loop index and decreases from
           ! ihi to ilo in steps of 1. each iteration of the loop works
           ! with the active submatrix in rows and columns l to i.
           ! eigenvalues i+1 to ihi have already converged. either l = ilo, or
           ! h(l,l-1) is negligible so that the matrix splits.
           i = ihi
           30 continue
           if( i<ilo )go to 150
           ! perform qr iterations on rows and columns ilo to i until a
           ! submatrix of order 1 splits off at the bottom because a
           ! subdiagonal element has become negligible.
           l = ilo
           loop_130: do its = 0, itmax
              ! look for a single small subdiagonal element.
              do k = i, l + 1, -1
                 if( cabs1( h( k, k-1 ) )<=smlnum )go to 50
                 tst = cabs1( h( k-1, k-1 ) ) + cabs1( h( k, k ) )
                 if( tst==czero ) then
                    if( k-2>=ilo )tst = tst + abs( real( h( k-1, k-2 ),KIND=${ck}$) )
                    if( k+1<=ihi )tst = tst + abs( real( h( k+1, k ),KIND=${ck}$) )
                 end if
                 ! ==== the following is a conservative small subdiagonal
                 ! .    deflation criterion due to ahues
                 ! .    1997). it has better mathematical foundation and
                 ! .    improves accuracy in some examples.  ====
                 if( abs( real( h( k, k-1 ),KIND=${ck}$) )<=ulp*tst ) then
                    ab = max( cabs1( h( k, k-1 ) ), cabs1( h( k-1, k ) ) )
                    ba = min( cabs1( h( k, k-1 ) ), cabs1( h( k-1, k ) ) )
                    aa = max( cabs1( h( k, k ) ),cabs1( h( k-1, k-1 )-h( k, k ) ) )
                    bb = min( cabs1( h( k, k ) ),cabs1( h( k-1, k-1 )-h( k, k ) ) )
                    s = aa + ab
                    if( ba*( ab / s )<=max( smlnum,ulp*( bb*( aa / s ) ) ) )go to 50
                 end if
              end do
              50 continue
              l = k
              if( l>ilo ) then
                 ! h(l,l-1) is negligible
                 h( l, l-1 ) = czero
              end if
              ! exit from loop if a submatrix of order 1 has split off.
              if( l>=i )go to 140
              kdefl = kdefl + 1_${ik}$
              ! now the active submatrix is in rows and columns l to i. if
              ! eigenvalues only are being computed, only the active submatrix
              ! need be transformed.
              if( .not.wantt ) then
                 i1 = l
                 i2 = i
              end if
              if( mod(kdefl,2_${ik}$*kexsh)==0_${ik}$ ) then
                 ! exceptional shift.
                 s = dat1*abs( real( h( i, i-1 ),KIND=${ck}$) )
                 t = s + h( i, i )
              else if( mod(kdefl,kexsh)==0_${ik}$ ) then
                 ! exceptional shift.
                 s = dat1*abs( real( h( l+1, l ),KIND=${ck}$) )
                 t = s + h( l, l )
              else
                 ! wilkinson's shift.
                 t = h( i, i )
                 u = sqrt( h( i-1, i ) )*sqrt( h( i, i-1 ) )
                 s = cabs1( u )
                 if( s/=zero ) then
                    x = half*( h( i-1, i-1 )-t )
                    sx = cabs1( x )
                    s = max( s, cabs1( x ) )
                    y = s*sqrt( ( x / s )**2_${ik}$+( u / s )**2_${ik}$ )
                    if( sx>zero ) then
                       if( real( x / sx,KIND=${ck}$)*real( y,KIND=${ck}$)+aimag( x / sx )*aimag( y )&
                                 <zero )y = -y
                    end if
                    t = t - u*stdlib${ii}$_${ci}$ladiv( u, ( x+y ) )
                 end if
              end if
              ! look for two consecutive small subdiagonal elements.
              do m = i - 1, l + 1, -1
                 ! determine the effect of starting the single-shift qr
                 ! iteration at row m, and see if this would make h(m,m-1)
                 ! negligible.
                 h11 = h( m, m )
                 h22 = h( m+1, m+1 )
                 h11s = h11 - t
                 h21 = real( h( m+1, m ),KIND=${ck}$)
                 s = cabs1( h11s ) + abs( h21 )
                 h11s = h11s / s
                 h21 = h21 / s
                 v( 1_${ik}$ ) = h11s
                 v( 2_${ik}$ ) = h21
                 h10 = real( h( m, m-1 ),KIND=${ck}$)
                 if( abs( h10 )*abs( h21 )<=ulp*( cabs1( h11s )*( cabs1( h11 )+cabs1( h22 ) ) ) )&
                           go to 70
              end do
              h11 = h( l, l )
              h22 = h( l+1, l+1 )
              h11s = h11 - t
              h21 = real( h( l+1, l ),KIND=${ck}$)
              s = cabs1( h11s ) + abs( h21 )
              h11s = h11s / s
              h21 = h21 / s
              v( 1_${ik}$ ) = h11s
              v( 2_${ik}$ ) = h21
              70 continue
              ! single-shift qr step
              loop_120: do k = m, i - 1
                 ! the first iteration of this loop determines a reflection g
                 ! from the vector v and applies it from left and right to h,
                 ! thus creating a nonzero bulge below the subdiagonal.
                 ! each subsequent iteration determines a reflection g to
                 ! restore the hessenberg form in the (k-1)th column, and thus
                 ! chases the bulge cone step toward the bottom of the active
                 ! submatrix.
                 ! v(2) is always real before the call to stdlib${ii}$_${ci}$larfg, and hence
                 ! after the call t2 ( = t1*v(2) ) is also real.
                 if( k>m )call stdlib${ii}$_${ci}$copy( 2_${ik}$, h( k, k-1 ), 1_${ik}$, v, 1_${ik}$ )
                 call stdlib${ii}$_${ci}$larfg( 2_${ik}$, v( 1_${ik}$ ), v( 2_${ik}$ ), 1_${ik}$, t1 )
                 if( k>m ) then
                    h( k, k-1 ) = v( 1_${ik}$ )
                    h( k+1, k-1 ) = czero
                 end if
                 v2 = v( 2_${ik}$ )
                 t2 = real( t1*v2,KIND=${ck}$)
                 ! apply g from the left to transform the rows of the matrix
                 ! in columns k to i2.
                 do j = k, i2
                    sum = conjg( t1 )*h( k, j ) + t2*h( k+1, j )
                    h( k, j ) = h( k, j ) - sum
                    h( k+1, j ) = h( k+1, j ) - sum*v2
                 end do
                 ! apply g from the right to transform the columns of the
                 ! matrix in rows i1 to min(k+2,i).
                 do j = i1, min( k+2, i )
                    sum = t1*h( j, k ) + t2*h( j, k+1 )
                    h( j, k ) = h( j, k ) - sum
                    h( j, k+1 ) = h( j, k+1 ) - sum*conjg( v2 )
                 end do
                 if( wantz ) then
                    ! accumulate transformations in the matrix z
                    do j = iloz, ihiz
                       sum = t1*z( j, k ) + t2*z( j, k+1 )
                       z( j, k ) = z( j, k ) - sum
                       z( j, k+1 ) = z( j, k+1 ) - sum*conjg( v2 )
                    end do
                 end if
                 if( k==m .and. m>l ) then
                    ! if the qr step was started at row m > l because two
                    ! consecutive small subdiagonals were found, then extra
                    ! scaling must be performed to ensure that h(m,m-1) remains
                    ! real.
                    temp = cone - t1
                    temp = temp / abs( temp )
                    h( m+1, m ) = h( m+1, m )*conjg( temp )
                    if( m+2<=i )h( m+2, m+1 ) = h( m+2, m+1 )*temp
                    do j = m, i
                       if( j/=m+1 ) then
                          if( i2>j )call stdlib${ii}$_${ci}$scal( i2-j, temp, h( j, j+1 ), ldh )
                          call stdlib${ii}$_${ci}$scal( j-i1, conjg( temp ), h( i1, j ), 1_${ik}$ )
                          if( wantz ) then
                             call stdlib${ii}$_${ci}$scal( nz, conjg( temp ), z( iloz, j ),1_${ik}$ )
                          end if
                       end if
                    end do
                 end if
              end do loop_120
              ! ensure that h(i,i-1) is real.
              temp = h( i, i-1 )
              if( aimag( temp )/=zero ) then
                 rtemp = abs( temp )
                 h( i, i-1 ) = rtemp
                 temp = temp / rtemp
                 if( i2>i )call stdlib${ii}$_${ci}$scal( i2-i, conjg( temp ), h( i, i+1 ), ldh )
                 call stdlib${ii}$_${ci}$scal( i-i1, temp, h( i1, i ), 1_${ik}$ )
                 if( wantz ) then
                    call stdlib${ii}$_${ci}$scal( nz, temp, z( iloz, i ), 1_${ik}$ )
                 end if
              end if
           end do loop_130
           ! failure to converge in remaining number of iterations
           info = i
           return
           140 continue
           ! h(i,i-1) is negligible: cone eigenvalue has converged.
           w( i ) = h( i, i )
           ! reset deflation counter
           kdefl = 0_${ik}$
           ! return to start of the main loop with new value of i.
           i = l - 1_${ik}$
           go to 30
           150 continue
           return
     end subroutine stdlib${ii}$_${ci}$lahqr

#:endif
#:endfor



     module subroutine stdlib${ii}$_slaqr0( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, work,&
     !! SLAQR0 computes the eigenvalues of a Hessenberg matrix H
     !! and, optionally, the matrices T and Z from the Schur decomposition
     !! H = Z T Z**T, where T is an upper quasi-triangular matrix (the
     !! Schur form), and Z is the orthogonal matrix of Schur vectors.
     !! Optionally Z may be postmultiplied into an input orthogonal
     !! matrix Q so that this routine can give the Schur factorization
     !! of a matrix A which has been reduced to the Hessenberg form H
     !! by the orthogonal matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
                lwork, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n
           integer(${ik}$), intent(out) :: info
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           real(sp), intent(inout) :: h(ldh,*), z(ldz,*)
           real(sp), intent(out) :: wi(*), work(*), wr(*)
        ! ================================================================
           ! Parameters 
           integer(${ik}$), parameter :: ntiny = 15_${ik}$
           integer(${ik}$), parameter :: kexnw = 5_${ik}$
           integer(${ik}$), parameter :: kexsh = 6_${ik}$
           real(sp), parameter :: wilk1 = 0.75_sp
           real(sp), parameter :: wilk2 = -0.4375_sp
           ! ==== matrices of order ntiny or smaller must be processed by
           ! .    stdlib${ii}$_slahqr because of insufficient subdiagonal scratch space.
           ! .    (this is a hard limit.) ====
           
           ! ==== exceptional deflation windows:  try to cure rare
           ! .    slow convergence by varying the size of the
           ! .    deflation window after kexnw iterations. ====
           
           ! ==== exceptional shifts: try to cure rare slow convergence
           ! .    with ad-hoc exceptional shifts every kexsh iterations.
           ! .    ====
           
           ! ==== the constants wilk1 and wilk2 are used to form the
           ! .    exceptional shifts. ====
           
           
           ! Local Scalars 
           real(sp) :: aa, bb, cc, cs, dd, sn, ss, swap
           integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, &
           kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,&
                      nwmax, nwr, nwupbd
           logical(lk) :: sorted
           character(len=2_${ik}$) :: jbcmpz
           ! Local Arrays 
           real(sp) :: zdum(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           ! ==== quick return for n = 0: nothing to do. ====
           if( n==0_${ik}$ ) then
              work( 1_${ik}$ ) = one
              return
           end if
           if( n<=ntiny ) then
              ! ==== tiny matrices must use stdlib_slahqr. ====
              lwkopt = 1_${ik}$
              if( lwork/=-1_${ik}$ )call stdlib${ii}$_slahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, &
                        ihiz, z, ldz, info )
           else
              ! ==== use small bulge multi-shift qr with aggressive early
              ! .    deflation on larger-than-tiny matrices. ====
              ! ==== hope for the best. ====
              info = 0_${ik}$
              ! ==== set up job flags for stdlib${ii}$_ilaenv. ====
              if( wantt ) then
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S'
              else
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E'
              end if
              if( wantz ) then
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V'
              else
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N'
              end if
              ! ==== nwr = recommended deflation window size.  at this
              ! .    point,  n > ntiny = 15, so there is enough
              ! .    subdiagonal workspace for nwr>=2 as required.
              ! .    (in fact, there is enough subdiagonal space for
              ! .    nwr>=4.) ====
              nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'SLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nwr = max( 2_${ik}$, nwr )
              nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr )
              ! ==== nsr = recommended number of simultaneous shifts.
              ! .    at this point n > ntiny = 15, so there is at
              ! .    enough subdiagonal workspace for nsr to be even
              ! .    and greater than or equal to two as required. ====
              nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'SLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo )
              nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) )
              ! ==== estimate optimal workspace ====
              ! ==== workspace query call to stdlib${ii}$_slaqr3 ====
              call stdlib${ii}$_slaqr3( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,&
                         ld, wr, wi, h, ldh, n, h, ldh,n, h, ldh, work, -1_${ik}$ )
              ! ==== optimal workspace = max(stdlib${ii}$_slaqr5, stdlib${ii}$_slaqr3) ====
              lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) )
              ! ==== quick return in case of workspace query. ====
              if( lwork==-1_${ik}$ ) then
                 work( 1_${ik}$ ) = real( lwkopt,KIND=sp)
                 return
              end if
              ! ==== stdlib${ii}$_slahqr/stdlib${ii}$_slaqr0 crossover point ====
              nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'SLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nmin = max( ntiny, nmin )
              ! ==== nibble crossover point ====
              nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'SLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nibble = max( 0_${ik}$, nibble )
              ! ==== accumulate reflections during ttswp?  use block
              ! .    2-by-2 structure during matrix-matrix multiply? ====
              kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'SLAQR0', jbcmpz, n, ilo, ihi, lwork )
              kacc22 = max( 0_${ik}$, kacc22 )
              kacc22 = min( 2_${ik}$, kacc22 )
              ! ==== nwmax = the largest possible deflation window for
              ! .    which there is sufficient workspace. ====
              nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ )
              nw = nwmax
              ! ==== nsmax = the largest number of simultaneous shifts
              ! .    for which there is sufficient workspace. ====
              nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ )
              nsmax = nsmax - mod( nsmax, 2_${ik}$ )
              ! ==== ndfl: an iteration count restarted at deflation. ====
              ndfl = 1_${ik}$
              ! ==== itmax = iteration limit ====
              itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) )
              ! ==== last row and column in the active block ====
              kbot = ihi
              ! ==== main loop ====
              loop_80: do it = 1, itmax
                 ! ==== done when kbot falls below ilo ====
                 if( kbot<ilo )go to 90
                 ! ==== locate active block ====
                 do k = kbot, ilo + 1, -1
                    if( h( k, k-1 )==zero )go to 20
                 end do
                 k = ilo
                 20 continue
                 ktop = k
                 ! ==== select deflation window size:
                 ! .    typical case:
                 ! .      if possible and advisable, nibble the entire
                 ! .      active block.  if not, use size min(nwr,nwmax)
                 ! .      or min(nwr+1,nwmax) depending upon which has
                 ! .      the smaller corresponding subdiagonal entry
                 ! .      (a heuristic).
                 ! .    exceptional case:
                 ! .      if there have been no deflations in kexnw or
                 ! .      more iterations, then vary the deflation window
                 ! .      size.   at first, because, larger windows are,
                 ! .      in general, more powerful than smaller ones,
                 ! .      rapidly increase the window to the maximum possible.
                 ! .      then, gradually reduce the window size. ====
                 nh = kbot - ktop + 1_${ik}$
                 nwupbd = min( nh, nwmax )
                 if( ndfl<kexnw ) then
                    nw = min( nwupbd, nwr )
                 else
                    nw = min( nwupbd, 2_${ik}$*nw )
                 end if
                 if( nw<nwmax ) then
                    if( nw>=nh-1 ) then
                       nw = nh
                    else
                       kwtop = kbot - nw + 1_${ik}$
                       if( abs( h( kwtop, kwtop-1 ) )>abs( h( kwtop-1, kwtop-2 ) ) )nw = nw + &
                                 1_${ik}$
                    end if
                 end if
                 if( ndfl<kexnw ) then
                    ndec = -1_${ik}$
                 else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then
                    ndec = ndec + 1_${ik}$
                    if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$
                    nw = nw - ndec
                 end if
                 ! ==== aggressive early deflation:
                 ! .    split workspace under the subdiagonal into
                 ! .      - an nw-by-nw work array v in the lower
                 ! .        left-hand-corner,
                 ! .      - an nw-by-at-least-nw-but-more-is-better
                 ! .        (nw-by-nho) horizontal work array along
                 ! .        the bottom edge,
                 ! .      - an at-least-nw-but-more-is-better (nhv-by-nw)
                 ! .        vertical work array along the left-hand-edge.
                 ! .        ====
                 kv = n - nw + 1_${ik}$
                 kt = nw + 1_${ik}$
                 nho = ( n-nw-1 ) - kt + 1_${ik}$
                 kwv = nw + 2_${ik}$
                 nve = ( n-nw ) - kwv + 1_${ik}$
                 ! ==== aggressive early deflation ====
                 call stdlib${ii}$_slaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, &
                 ls, ld, wr, wi, h( kv, 1_${ik}$ ), ldh,nho, h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,&
                           work, lwork )
                 ! ==== adjust kbot accounting for new deflations. ====
                 kbot = kbot - ld
                 ! ==== ks points to the shifts. ====
                 ks = kbot - ls + 1_${ik}$
                 ! ==== skip an expensive qr sweep if there is a (partly
                 ! .    heuristic) reason to expect that many eigenvalues
                 ! .    will deflate without it.  here, the qr sweep is
                 ! .    skipped if many eigenvalues have just been deflated
                 ! .    or if the remaining active block is small.
                 if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )&
                            ) ) ) then
                    ! ==== ns = nominal number of simultaneous shifts.
                    ! .    this may be lowered (slightly) if stdlib${ii}$_slaqr3
                    ! .    did not provide that many shifts. ====
                    ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ! ==== if there have been no deflations
                    ! .    in a multiple of kexsh iterations,
                    ! .    then try exceptional shifts.
                    ! .    otherwise use shifts provided by
                    ! .    stdlib${ii}$_slaqr3 above or from the eigenvalues
                    ! .    of a trailing principal submatrix. ====
                    if( mod( ndfl, kexsh )==0_${ik}$ ) then
                       ks = kbot - ns + 1_${ik}$
                       do i = kbot, max( ks+1, ktop+2 ), -2
                          ss = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) )
                          aa = wilk1*ss + h( i, i )
                          bb = ss
                          cc = wilk2*ss
                          dd = aa
                          call stdlib${ii}$_slanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),wr( i ), wi( i &
                                    ), cs, sn )
                       end do
                       if( ks==ktop ) then
                          wr( ks+1 ) = h( ks+1, ks+1 )
                          wi( ks+1 ) = zero
                          wr( ks ) = wr( ks+1 )
                          wi( ks ) = wi( ks+1 )
                       end if
                    else
                       ! ==== got ns/2 or fewer shifts? use stdlib_slaqr4 or
                       ! .    stdlib${ii}$_slahqr on a trailing principal submatrix to
                       ! .    get more. (since ns<=nsmax<=(n-3)/6,
                       ! .    there is enough space below the subdiagonal
                       ! .    to fit an ns-by-ns scratch array.) ====
                       if( kbot-ks+1<=ns / 2_${ik}$ ) then
                          ks = kbot - ns + 1_${ik}$
                          kt = n - ns + 1_${ik}$
                          call stdlib${ii}$_slacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh )
                                    
                          if( ns>nmin ) then
                             call stdlib${ii}$_slaqr4( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, wr( &
                                       ks ),wi( ks ), 1_${ik}$, 1_${ik}$, zdum, 1_${ik}$, work,lwork, inf )
                          else
                             call stdlib${ii}$_slahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, wr( &
                                       ks ),wi( ks ), 1_${ik}$, 1_${ik}$, zdum, 1_${ik}$, inf )
                          end if
                          ks = ks + inf
                          ! ==== in case of a rare qr failure use
                          ! .    eigenvalues of the trailing 2-by-2
                          ! .    principal submatrix.  ====
                          if( ks>=kbot ) then
                             aa = h( kbot-1, kbot-1 )
                             cc = h( kbot, kbot-1 )
                             bb = h( kbot-1, kbot )
                             dd = h( kbot, kbot )
                             call stdlib${ii}$_slanv2( aa, bb, cc, dd, wr( kbot-1 ),wi( kbot-1 ), wr( &
                                       kbot ),wi( kbot ), cs, sn )
                             ks = kbot - 1_${ik}$
                          end if
                       end if
                       if( kbot-ks+1>ns ) then
                          ! ==== sort the shifts (helps a little)
                          ! .    bubble sort keeps complex conjugate
                          ! .    pairs together. ====
                          sorted = .false.
                          do k = kbot, ks + 1, -1
                             if( sorted )go to 60
                             sorted = .true.
                             do i = ks, k - 1
                                if( abs( wr( i ) )+abs( wi( i ) )<abs( wr( i+1 ) )+abs( wi( i+1 ) &
                                          ) ) then
                                   sorted = .false.
                                   swap = wr( i )
                                   wr( i ) = wr( i+1 )
                                   wr( i+1 ) = swap
                                   swap = wi( i )
                                   wi( i ) = wi( i+1 )
                                   wi( i+1 ) = swap
                                end if
                             end do
                          end do
                          60 continue
                       end if
                       ! ==== shuffle shifts into pairs of real shifts
                       ! .    and pairs of complex conjugate shifts
                       ! .    assuming complex conjugate shifts are
                       ! .    already adjacent to one another. (yes,
                       ! .    they are.)  ====
                       do i = kbot, ks + 2, -2
                          if( wi( i )/=-wi( i-1 ) ) then
                             swap = wr( i )
                             wr( i ) = wr( i-1 )
                             wr( i-1 ) = wr( i-2 )
                             wr( i-2 ) = swap
                             swap = wi( i )
                             wi( i ) = wi( i-1 )
                             wi( i-1 ) = wi( i-2 )
                             wi( i-2 ) = swap
                          end if
                       end do
                    end if
                    ! ==== if there are only two shifts and both are
                    ! .    real, then use only one.  ====
                    if( kbot-ks+1==2_${ik}$ ) then
                       if( wi( kbot )==zero ) then
                          if( abs( wr( kbot )-h( kbot, kbot ) )<abs( wr( kbot-1 )-h( kbot, kbot ) &
                                    ) ) then
                             wr( kbot-1 ) = wr( kbot )
                          else
                             wr( kbot ) = wr( kbot-1 )
                          end if
                       end if
                    end if
                    ! ==== use up to ns of the the smallest magnitude
                    ! .    shifts.  if there aren't ns shifts available,
                    ! .    then use them all, possibly dropping one to
                    ! .    make the number of shifts even. ====
                    ns = min( ns, kbot-ks+1 )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ks = kbot - ns + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep:
                    ! .    split workspace under the subdiagonal into
                    ! .    - a kdu-by-kdu work array u in the lower
                    ! .      left-hand-corner,
                    ! .    - a kdu-by-at-least-kdu-but-more-is-better
                    ! .      (kdu-by-nho) horizontal work array wh along
                    ! .      the bottom edge,
                    ! .    - and an at-least-kdu-but-more-is-better-by-kdu
                    ! .      (nve-by-kdu) vertical work wv arrow along
                    ! .      the left-hand-edge. ====
                    kdu = 2_${ik}$*ns
                    ku = n - kdu + 1_${ik}$
                    kwh = kdu + 1_${ik}$
                    nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$
                    kwv = kdu + 4_${ik}$
                    nve = n - kdu - kwv + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep ====
                    call stdlib${ii}$_slaqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,wr( ks ), wi( ks )&
                    , h, ldh, iloz, ihiz, z,ldz, work, 3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve,h( kwv, 1_${ik}$ ), ldh, &
                              nho, h( ku, kwh ), ldh )
                 end if
                 ! ==== note progress (or the lack of it). ====
                 if( ld>0_${ik}$ ) then
                    ndfl = 1_${ik}$
                 else
                    ndfl = ndfl + 1_${ik}$
                 end if
                 ! ==== end of main loop ====
              end do loop_80
              ! ==== iteration limit exceeded.  set info to show where
              ! .    the problem occurred and exit. ====
              info = kbot
              90 continue
           end if
           ! ==== return the optimal value of lwork. ====
           work( 1_${ik}$ ) = real( lwkopt,KIND=sp)
     end subroutine stdlib${ii}$_slaqr0

     module subroutine stdlib${ii}$_dlaqr0( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, work,&
     !! DLAQR0 computes the eigenvalues of a Hessenberg matrix H
     !! and, optionally, the matrices T and Z from the Schur decomposition
     !! H = Z T Z**T, where T is an upper quasi-triangular matrix (the
     !! Schur form), and Z is the orthogonal matrix of Schur vectors.
     !! Optionally Z may be postmultiplied into an input orthogonal
     !! matrix Q so that this routine can give the Schur factorization
     !! of a matrix A which has been reduced to the Hessenberg form H
     !! by the orthogonal matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
                lwork, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n
           integer(${ik}$), intent(out) :: info
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           real(dp), intent(inout) :: h(ldh,*), z(ldz,*)
           real(dp), intent(out) :: wi(*), work(*), wr(*)
        ! ================================================================
           ! Parameters 
           integer(${ik}$), parameter :: ntiny = 15_${ik}$
           integer(${ik}$), parameter :: kexnw = 5_${ik}$
           integer(${ik}$), parameter :: kexsh = 6_${ik}$
           real(dp), parameter :: wilk1 = 0.75_dp
           real(dp), parameter :: wilk2 = -0.4375_dp
           ! ==== matrices of order ntiny or smaller must be processed by
           ! .    stdlib${ii}$_dlahqr because of insufficient subdiagonal scratch space.
           ! .    (this is a hard limit.) ====
           
           ! ==== exceptional deflation windows:  try to cure rare
           ! .    slow convergence by varying the size of the
           ! .    deflation window after kexnw iterations. ====
           
           ! ==== exceptional shifts: try to cure rare slow convergence
           ! .    with ad-hoc exceptional shifts every kexsh iterations.
           ! .    ====
           
           ! ==== the constants wilk1 and wilk2 are used to form the
           ! .    exceptional shifts. ====
           
           
           ! Local Scalars 
           real(dp) :: aa, bb, cc, cs, dd, sn, ss, swap
           integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, &
           kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,&
                      nwmax, nwr, nwupbd
           logical(lk) :: sorted
           character(len=2_${ik}$) :: jbcmpz
           ! Local Arrays 
           real(dp) :: zdum(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           ! ==== quick return for n = 0: nothing to do. ====
           if( n==0_${ik}$ ) then
              work( 1_${ik}$ ) = one
              return
           end if
           if( n<=ntiny ) then
              ! ==== tiny matrices must use stdlib_dlahqr. ====
              lwkopt = 1_${ik}$
              if( lwork/=-1_${ik}$ )call stdlib${ii}$_dlahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, &
                        ihiz, z, ldz, info )
           else
              ! ==== use small bulge multi-shift qr with aggressive early
              ! .    deflation on larger-than-tiny matrices. ====
              ! ==== hope for the best. ====
              info = 0_${ik}$
              ! ==== set up job flags for stdlib${ii}$_ilaenv. ====
              if( wantt ) then
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S'
              else
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E'
              end if
              if( wantz ) then
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V'
              else
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N'
              end if
              ! ==== nwr = recommended deflation window size.  at this
              ! .    point,  n > ntiny = 15, so there is enough
              ! .    subdiagonal workspace for nwr>=2 as required.
              ! .    (in fact, there is enough subdiagonal space for
              ! .    nwr>=4.) ====
              nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nwr = max( 2_${ik}$, nwr )
              nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr )
              ! ==== nsr = recommended number of simultaneous shifts.
              ! .    at this point n > ntiny = 15, so there is at
              ! .    enough subdiagonal workspace for nsr to be even
              ! .    and greater than or equal to two as required. ====
              nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo )
              nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) )
              ! ==== estimate optimal workspace ====
              ! ==== workspace query call to stdlib${ii}$_dlaqr3 ====
              call stdlib${ii}$_dlaqr3( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,&
                         ld, wr, wi, h, ldh, n, h, ldh,n, h, ldh, work, -1_${ik}$ )
              ! ==== optimal workspace = max(stdlib${ii}$_dlaqr5, stdlib${ii}$_dlaqr3) ====
              lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) )
              ! ==== quick return in case of workspace query. ====
              if( lwork==-1_${ik}$ ) then
                 work( 1_${ik}$ ) = real( lwkopt,KIND=dp)
                 return
              end if
              ! ==== stdlib${ii}$_dlahqr/stdlib${ii}$_dlaqr0 crossover point ====
              nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nmin = max( ntiny, nmin )
              ! ==== nibble crossover point ====
              nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nibble = max( 0_${ik}$, nibble )
              ! ==== accumulate reflections during ttswp?  use block
              ! .    2-by-2 structure during matrix-matrix multiply? ====
              kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork )
              kacc22 = max( 0_${ik}$, kacc22 )
              kacc22 = min( 2_${ik}$, kacc22 )
              ! ==== nwmax = the largest possible deflation window for
              ! .    which there is sufficient workspace. ====
              nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ )
              nw = nwmax
              ! ==== nsmax = the largest number of simultaneous shifts
              ! .    for which there is sufficient workspace. ====
              nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ )
              nsmax = nsmax - mod( nsmax, 2_${ik}$ )
              ! ==== ndfl: an iteration count restarted at deflation. ====
              ndfl = 1_${ik}$
              ! ==== itmax = iteration limit ====
              itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) )
              ! ==== last row and column in the active block ====
              kbot = ihi
              ! ==== main loop ====
              loop_80: do it = 1, itmax
                 ! ==== done when kbot falls below ilo ====
                 if( kbot<ilo )go to 90
                 ! ==== locate active block ====
                 do k = kbot, ilo + 1, -1
                    if( h( k, k-1 )==zero )go to 20
                 end do
                 k = ilo
                 20 continue
                 ktop = k
                 ! ==== select deflation window size:
                 ! .    typical case:
                 ! .      if possible and advisable, nibble the entire
                 ! .      active block.  if not, use size min(nwr,nwmax)
                 ! .      or min(nwr+1,nwmax) depending upon which has
                 ! .      the smaller corresponding subdiagonal entry
                 ! .      (a heuristic).
                 ! .    exceptional case:
                 ! .      if there have been no deflations in kexnw or
                 ! .      more iterations, then vary the deflation window
                 ! .      size.   at first, because, larger windows are,
                 ! .      in general, more powerful than smaller ones,
                 ! .      rapidly increase the window to the maximum possible.
                 ! .      then, gradually reduce the window size. ====
                 nh = kbot - ktop + 1_${ik}$
                 nwupbd = min( nh, nwmax )
                 if( ndfl<kexnw ) then
                    nw = min( nwupbd, nwr )
                 else
                    nw = min( nwupbd, 2_${ik}$*nw )
                 end if
                 if( nw<nwmax ) then
                    if( nw>=nh-1 ) then
                       nw = nh
                    else
                       kwtop = kbot - nw + 1_${ik}$
                       if( abs( h( kwtop, kwtop-1 ) )>abs( h( kwtop-1, kwtop-2 ) ) )nw = nw + &
                                 1_${ik}$
                    end if
                 end if
                 if( ndfl<kexnw ) then
                    ndec = -1_${ik}$
                 else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then
                    ndec = ndec + 1_${ik}$
                    if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$
                    nw = nw - ndec
                 end if
                 ! ==== aggressive early deflation:
                 ! .    split workspace under the subdiagonal into
                 ! .      - an nw-by-nw work array v in the lower
                 ! .        left-hand-corner,
                 ! .      - an nw-by-at-least-nw-but-more-is-better
                 ! .        (nw-by-nho) horizontal work array along
                 ! .        the bottom edge,
                 ! .      - an at-least-nw-but-more-is-better (nhv-by-nw)
                 ! .        vertical work array along the left-hand-edge.
                 ! .        ====
                 kv = n - nw + 1_${ik}$
                 kt = nw + 1_${ik}$
                 nho = ( n-nw-1 ) - kt + 1_${ik}$
                 kwv = nw + 2_${ik}$
                 nve = ( n-nw ) - kwv + 1_${ik}$
                 ! ==== aggressive early deflation ====
                 call stdlib${ii}$_dlaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, &
                 ls, ld, wr, wi, h( kv, 1_${ik}$ ), ldh,nho, h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,&
                           work, lwork )
                 ! ==== adjust kbot accounting for new deflations. ====
                 kbot = kbot - ld
                 ! ==== ks points to the shifts. ====
                 ks = kbot - ls + 1_${ik}$
                 ! ==== skip an expensive qr sweep if there is a (partly
                 ! .    heuristic) reason to expect that many eigenvalues
                 ! .    will deflate without it.  here, the qr sweep is
                 ! .    skipped if many eigenvalues have just been deflated
                 ! .    or if the remaining active block is small.
                 if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )&
                            ) ) ) then
                    ! ==== ns = nominal number of simultaneous shifts.
                    ! .    this may be lowered (slightly) if stdlib${ii}$_dlaqr3
                    ! .    did not provide that many shifts. ====
                    ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ! ==== if there have been no deflations
                    ! .    in a multiple of kexsh iterations,
                    ! .    then try exceptional shifts.
                    ! .    otherwise use shifts provided by
                    ! .    stdlib${ii}$_dlaqr3 above or from the eigenvalues
                    ! .    of a trailing principal submatrix. ====
                    if( mod( ndfl, kexsh )==0_${ik}$ ) then
                       ks = kbot - ns + 1_${ik}$
                       do i = kbot, max( ks+1, ktop+2 ), -2
                          ss = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) )
                          aa = wilk1*ss + h( i, i )
                          bb = ss
                          cc = wilk2*ss
                          dd = aa
                          call stdlib${ii}$_dlanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),wr( i ), wi( i &
                                    ), cs, sn )
                       end do
                       if( ks==ktop ) then
                          wr( ks+1 ) = h( ks+1, ks+1 )
                          wi( ks+1 ) = zero
                          wr( ks ) = wr( ks+1 )
                          wi( ks ) = wi( ks+1 )
                       end if
                    else
                       ! ==== got ns/2 or fewer shifts? use stdlib_dlaqr4 or
                       ! .    stdlib${ii}$_dlahqr on a trailing principal submatrix to
                       ! .    get more. (since ns<=nsmax<=(n-3)/6,
                       ! .    there is enough space below the subdiagonal
                       ! .    to fit an ns-by-ns scratch array.) ====
                       if( kbot-ks+1<=ns / 2_${ik}$ ) then
                          ks = kbot - ns + 1_${ik}$
                          kt = n - ns + 1_${ik}$
                          call stdlib${ii}$_dlacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh )
                                    
                          if( ns>nmin ) then
                             call stdlib${ii}$_dlaqr4( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, wr( &
                                       ks ),wi( ks ), 1_${ik}$, 1_${ik}$, zdum, 1_${ik}$, work,lwork, inf )
                          else
                             call stdlib${ii}$_dlahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, wr( &
                                       ks ),wi( ks ), 1_${ik}$, 1_${ik}$, zdum, 1_${ik}$, inf )
                          end if
                          ks = ks + inf
                          ! ==== in case of a rare qr failure use
                          ! .    eigenvalues of the trailing 2-by-2
                          ! .    principal submatrix.  ====
                          if( ks>=kbot ) then
                             aa = h( kbot-1, kbot-1 )
                             cc = h( kbot, kbot-1 )
                             bb = h( kbot-1, kbot )
                             dd = h( kbot, kbot )
                             call stdlib${ii}$_dlanv2( aa, bb, cc, dd, wr( kbot-1 ),wi( kbot-1 ), wr( &
                                       kbot ),wi( kbot ), cs, sn )
                             ks = kbot - 1_${ik}$
                          end if
                       end if
                       if( kbot-ks+1>ns ) then
                          ! ==== sort the shifts (helps a little)
                          ! .    bubble sort keeps complex conjugate
                          ! .    pairs together. ====
                          sorted = .false.
                          do k = kbot, ks + 1, -1
                             if( sorted )go to 60
                             sorted = .true.
                             do i = ks, k - 1
                                if( abs( wr( i ) )+abs( wi( i ) )<abs( wr( i+1 ) )+abs( wi( i+1 ) &
                                          ) ) then
                                   sorted = .false.
                                   swap = wr( i )
                                   wr( i ) = wr( i+1 )
                                   wr( i+1 ) = swap
                                   swap = wi( i )
                                   wi( i ) = wi( i+1 )
                                   wi( i+1 ) = swap
                                end if
                             end do
                          end do
                          60 continue
                       end if
                       ! ==== shuffle shifts into pairs of real shifts
                       ! .    and pairs of complex conjugate shifts
                       ! .    assuming complex conjugate shifts are
                       ! .    already adjacent to one another. (yes,
                       ! .    they are.)  ====
                       do i = kbot, ks + 2, -2
                          if( wi( i )/=-wi( i-1 ) ) then
                             swap = wr( i )
                             wr( i ) = wr( i-1 )
                             wr( i-1 ) = wr( i-2 )
                             wr( i-2 ) = swap
                             swap = wi( i )
                             wi( i ) = wi( i-1 )
                             wi( i-1 ) = wi( i-2 )
                             wi( i-2 ) = swap
                          end if
                       end do
                    end if
                    ! ==== if there are only two shifts and both are
                    ! .    real, then use only one.  ====
                    if( kbot-ks+1==2_${ik}$ ) then
                       if( wi( kbot )==zero ) then
                          if( abs( wr( kbot )-h( kbot, kbot ) )<abs( wr( kbot-1 )-h( kbot, kbot ) &
                                    ) ) then
                             wr( kbot-1 ) = wr( kbot )
                          else
                             wr( kbot ) = wr( kbot-1 )
                          end if
                       end if
                    end if
                    ! ==== use up to ns of the the smallest magnitude
                    ! .    shifts.  if there aren't ns shifts available,
                    ! .    then use them all, possibly dropping one to
                    ! .    make the number of shifts even. ====
                    ns = min( ns, kbot-ks+1 )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ks = kbot - ns + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep:
                    ! .    split workspace under the subdiagonal into
                    ! .    - a kdu-by-kdu work array u in the lower
                    ! .      left-hand-corner,
                    ! .    - a kdu-by-at-least-kdu-but-more-is-better
                    ! .      (kdu-by-nho) horizontal work array wh along
                    ! .      the bottom edge,
                    ! .    - and an at-least-kdu-but-more-is-better-by-kdu
                    ! .      (nve-by-kdu) vertical work wv arrow along
                    ! .      the left-hand-edge. ====
                    kdu = 2_${ik}$*ns
                    ku = n - kdu + 1_${ik}$
                    kwh = kdu + 1_${ik}$
                    nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$
                    kwv = kdu + 4_${ik}$
                    nve = n - kdu - kwv + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep ====
                    call stdlib${ii}$_dlaqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,wr( ks ), wi( ks )&
                    , h, ldh, iloz, ihiz, z,ldz, work, 3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve,h( kwv, 1_${ik}$ ), ldh, &
                              nho, h( ku, kwh ), ldh )
                 end if
                 ! ==== note progress (or the lack of it). ====
                 if( ld>0_${ik}$ ) then
                    ndfl = 1_${ik}$
                 else
                    ndfl = ndfl + 1_${ik}$
                 end if
                 ! ==== end of main loop ====
              end do loop_80
              ! ==== iteration limit exceeded.  set info to show where
              ! .    the problem occurred and exit. ====
              info = kbot
              90 continue
           end if
           ! ==== return the optimal value of lwork. ====
           work( 1_${ik}$ ) = real( lwkopt,KIND=dp)
     end subroutine stdlib${ii}$_dlaqr0

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     module subroutine stdlib${ii}$_${ri}$laqr0( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, work,&
     !! DLAQR0: computes the eigenvalues of a Hessenberg matrix H
     !! and, optionally, the matrices T and Z from the Schur decomposition
     !! H = Z T Z**T, where T is an upper quasi-triangular matrix (the
     !! Schur form), and Z is the orthogonal matrix of Schur vectors.
     !! Optionally Z may be postmultiplied into an input orthogonal
     !! matrix Q so that this routine can give the Schur factorization
     !! of a matrix A which has been reduced to the Hessenberg form H
     !! by the orthogonal matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
                lwork, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n
           integer(${ik}$), intent(out) :: info
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           real(${rk}$), intent(inout) :: h(ldh,*), z(ldz,*)
           real(${rk}$), intent(out) :: wi(*), work(*), wr(*)
        ! ================================================================
           ! Parameters 
           integer(${ik}$), parameter :: ntiny = 15_${ik}$
           integer(${ik}$), parameter :: kexnw = 5_${ik}$
           integer(${ik}$), parameter :: kexsh = 6_${ik}$
           real(${rk}$), parameter :: wilk1 = 0.75_${rk}$
           real(${rk}$), parameter :: wilk2 = -0.4375_${rk}$
           ! ==== matrices of order ntiny or smaller must be processed by
           ! .    stdlib${ii}$_${ri}$lahqr because of insufficient subdiagonal scratch space.
           ! .    (this is a hard limit.) ====
           
           ! ==== exceptional deflation windows:  try to cure rare
           ! .    slow convergence by varying the size of the
           ! .    deflation window after kexnw iterations. ====
           
           ! ==== exceptional shifts: try to cure rare slow convergence
           ! .    with ad-hoc exceptional shifts every kexsh iterations.
           ! .    ====
           
           ! ==== the constants wilk1 and wilk2 are used to form the
           ! .    exceptional shifts. ====
           
           
           ! Local Scalars 
           real(${rk}$) :: aa, bb, cc, cs, dd, sn, ss, swap
           integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, &
           kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,&
                      nwmax, nwr, nwupbd
           logical(lk) :: sorted
           character(len=2_${ik}$) :: jbcmpz
           ! Local Arrays 
           real(${rk}$) :: zdum(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           ! ==== quick return for n = 0: nothing to do. ====
           if( n==0_${ik}$ ) then
              work( 1_${ik}$ ) = one
              return
           end if
           if( n<=ntiny ) then
              ! ==== tiny matrices must use stdlib_${ri}$lahqr. ====
              lwkopt = 1_${ik}$
              if( lwork/=-1_${ik}$ )call stdlib${ii}$_${ri}$lahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, &
                        ihiz, z, ldz, info )
           else
              ! ==== use small bulge multi-shift qr with aggressive early
              ! .    deflation on larger-than-tiny matrices. ====
              ! ==== hope for the best. ====
              info = 0_${ik}$
              ! ==== set up job flags for stdlib${ii}$_ilaenv. ====
              if( wantt ) then
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S'
              else
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E'
              end if
              if( wantz ) then
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V'
              else
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N'
              end if
              ! ==== nwr = recommended deflation window size.  at this
              ! .    point,  n > ntiny = 15, so there is enough
              ! .    subdiagonal workspace for nwr>=2 as required.
              ! .    (in fact, there is enough subdiagonal space for
              ! .    nwr>=4.) ====
              nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nwr = max( 2_${ik}$, nwr )
              nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr )
              ! ==== nsr = recommended number of simultaneous shifts.
              ! .    at this point n > ntiny = 15, so there is at
              ! .    enough subdiagonal workspace for nsr to be even
              ! .    and greater than or equal to two as required. ====
              nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo )
              nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) )
              ! ==== estimate optimal workspace ====
              ! ==== workspace query call to stdlib${ii}$_${ri}$laqr3 ====
              call stdlib${ii}$_${ri}$laqr3( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,&
                         ld, wr, wi, h, ldh, n, h, ldh,n, h, ldh, work, -1_${ik}$ )
              ! ==== optimal workspace = max(stdlib${ii}$_${ri}$laqr5, stdlib${ii}$_${ri}$laqr3) ====
              lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) )
              ! ==== quick return in case of workspace query. ====
              if( lwork==-1_${ik}$ ) then
                 work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$)
                 return
              end if
              ! ==== stdlib${ii}$_${ri}$lahqr/stdlib${ii}$_${ri}$laqr0 crossover point ====
              nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nmin = max( ntiny, nmin )
              ! ==== nibble crossover point ====
              nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nibble = max( 0_${ik}$, nibble )
              ! ==== accumulate reflections during ttswp?  use block
              ! .    2-by-2 structure during matrix-matrix multiply? ====
              kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'DLAQR0', jbcmpz, n, ilo, ihi, lwork )
              kacc22 = max( 0_${ik}$, kacc22 )
              kacc22 = min( 2_${ik}$, kacc22 )
              ! ==== nwmax = the largest possible deflation window for
              ! .    which there is sufficient workspace. ====
              nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ )
              nw = nwmax
              ! ==== nsmax = the largest number of simultaneous shifts
              ! .    for which there is sufficient workspace. ====
              nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ )
              nsmax = nsmax - mod( nsmax, 2_${ik}$ )
              ! ==== ndfl: an iteration count restarted at deflation. ====
              ndfl = 1_${ik}$
              ! ==== itmax = iteration limit ====
              itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) )
              ! ==== last row and column in the active block ====
              kbot = ihi
              ! ==== main loop ====
              loop_80: do it = 1, itmax
                 ! ==== done when kbot falls below ilo ====
                 if( kbot<ilo )go to 90
                 ! ==== locate active block ====
                 do k = kbot, ilo + 1, -1
                    if( h( k, k-1 )==zero )go to 20
                 end do
                 k = ilo
                 20 continue
                 ktop = k
                 ! ==== select deflation window size:
                 ! .    typical case:
                 ! .      if possible and advisable, nibble the entire
                 ! .      active block.  if not, use size min(nwr,nwmax)
                 ! .      or min(nwr+1,nwmax) depending upon which has
                 ! .      the smaller corresponding subdiagonal entry
                 ! .      (a heuristic).
                 ! .    exceptional case:
                 ! .      if there have been no deflations in kexnw or
                 ! .      more iterations, then vary the deflation window
                 ! .      size.   at first, because, larger windows are,
                 ! .      in general, more powerful than smaller ones,
                 ! .      rapidly increase the window to the maximum possible.
                 ! .      then, gradually reduce the window size. ====
                 nh = kbot - ktop + 1_${ik}$
                 nwupbd = min( nh, nwmax )
                 if( ndfl<kexnw ) then
                    nw = min( nwupbd, nwr )
                 else
                    nw = min( nwupbd, 2_${ik}$*nw )
                 end if
                 if( nw<nwmax ) then
                    if( nw>=nh-1 ) then
                       nw = nh
                    else
                       kwtop = kbot - nw + 1_${ik}$
                       if( abs( h( kwtop, kwtop-1 ) )>abs( h( kwtop-1, kwtop-2 ) ) )nw = nw + &
                                 1_${ik}$
                    end if
                 end if
                 if( ndfl<kexnw ) then
                    ndec = -1_${ik}$
                 else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then
                    ndec = ndec + 1_${ik}$
                    if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$
                    nw = nw - ndec
                 end if
                 ! ==== aggressive early deflation:
                 ! .    split workspace under the subdiagonal into
                 ! .      - an nw-by-nw work array v in the lower
                 ! .        left-hand-corner,
                 ! .      - an nw-by-at-least-nw-but-more-is-better
                 ! .        (nw-by-nho) horizontal work array along
                 ! .        the bottom edge,
                 ! .      - an at-least-nw-but-more-is-better (nhv-by-nw)
                 ! .        vertical work array along the left-hand-edge.
                 ! .        ====
                 kv = n - nw + 1_${ik}$
                 kt = nw + 1_${ik}$
                 nho = ( n-nw-1 ) - kt + 1_${ik}$
                 kwv = nw + 2_${ik}$
                 nve = ( n-nw ) - kwv + 1_${ik}$
                 ! ==== aggressive early deflation ====
                 call stdlib${ii}$_${ri}$laqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, &
                 ls, ld, wr, wi, h( kv, 1_${ik}$ ), ldh,nho, h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,&
                           work, lwork )
                 ! ==== adjust kbot accounting for new deflations. ====
                 kbot = kbot - ld
                 ! ==== ks points to the shifts. ====
                 ks = kbot - ls + 1_${ik}$
                 ! ==== skip an expensive qr sweep if there is a (partly
                 ! .    heuristic) reason to expect that many eigenvalues
                 ! .    will deflate without it.  here, the qr sweep is
                 ! .    skipped if many eigenvalues have just been deflated
                 ! .    or if the remaining active block is small.
                 if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )&
                            ) ) ) then
                    ! ==== ns = nominal number of simultaneous shifts.
                    ! .    this may be lowered (slightly) if stdlib${ii}$_${ri}$laqr3
                    ! .    did not provide that many shifts. ====
                    ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ! ==== if there have been no deflations
                    ! .    in a multiple of kexsh iterations,
                    ! .    then try exceptional shifts.
                    ! .    otherwise use shifts provided by
                    ! .    stdlib${ii}$_${ri}$laqr3 above or from the eigenvalues
                    ! .    of a trailing principal submatrix. ====
                    if( mod( ndfl, kexsh )==0_${ik}$ ) then
                       ks = kbot - ns + 1_${ik}$
                       do i = kbot, max( ks+1, ktop+2 ), -2
                          ss = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) )
                          aa = wilk1*ss + h( i, i )
                          bb = ss
                          cc = wilk2*ss
                          dd = aa
                          call stdlib${ii}$_${ri}$lanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),wr( i ), wi( i &
                                    ), cs, sn )
                       end do
                       if( ks==ktop ) then
                          wr( ks+1 ) = h( ks+1, ks+1 )
                          wi( ks+1 ) = zero
                          wr( ks ) = wr( ks+1 )
                          wi( ks ) = wi( ks+1 )
                       end if
                    else
                       ! ==== got ns/2 or fewer shifts? use stdlib_${ri}$laqr4 or
                       ! .    stdlib${ii}$_${ri}$lahqr on a trailing principal submatrix to
                       ! .    get more. (since ns<=nsmax<=(n-3)/6,
                       ! .    there is enough space below the subdiagonal
                       ! .    to fit an ns-by-ns scratch array.) ====
                       if( kbot-ks+1<=ns / 2_${ik}$ ) then
                          ks = kbot - ns + 1_${ik}$
                          kt = n - ns + 1_${ik}$
                          call stdlib${ii}$_${ri}$lacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh )
                                    
                          if( ns>nmin ) then
                             call stdlib${ii}$_${ri}$laqr4( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, wr( &
                                       ks ),wi( ks ), 1_${ik}$, 1_${ik}$, zdum, 1_${ik}$, work,lwork, inf )
                          else
                             call stdlib${ii}$_${ri}$lahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, wr( &
                                       ks ),wi( ks ), 1_${ik}$, 1_${ik}$, zdum, 1_${ik}$, inf )
                          end if
                          ks = ks + inf
                          ! ==== in case of a rare qr failure use
                          ! .    eigenvalues of the trailing 2-by-2
                          ! .    principal submatrix.  ====
                          if( ks>=kbot ) then
                             aa = h( kbot-1, kbot-1 )
                             cc = h( kbot, kbot-1 )
                             bb = h( kbot-1, kbot )
                             dd = h( kbot, kbot )
                             call stdlib${ii}$_${ri}$lanv2( aa, bb, cc, dd, wr( kbot-1 ),wi( kbot-1 ), wr( &
                                       kbot ),wi( kbot ), cs, sn )
                             ks = kbot - 1_${ik}$
                          end if
                       end if
                       if( kbot-ks+1>ns ) then
                          ! ==== sort the shifts (helps a little)
                          ! .    bubble sort keeps complex conjugate
                          ! .    pairs together. ====
                          sorted = .false.
                          do k = kbot, ks + 1, -1
                             if( sorted )go to 60
                             sorted = .true.
                             do i = ks, k - 1
                                if( abs( wr( i ) )+abs( wi( i ) )<abs( wr( i+1 ) )+abs( wi( i+1 ) &
                                          ) ) then
                                   sorted = .false.
                                   swap = wr( i )
                                   wr( i ) = wr( i+1 )
                                   wr( i+1 ) = swap
                                   swap = wi( i )
                                   wi( i ) = wi( i+1 )
                                   wi( i+1 ) = swap
                                end if
                             end do
                          end do
                          60 continue
                       end if
                       ! ==== shuffle shifts into pairs of real shifts
                       ! .    and pairs of complex conjugate shifts
                       ! .    assuming complex conjugate shifts are
                       ! .    already adjacent to one another. (yes,
                       ! .    they are.)  ====
                       do i = kbot, ks + 2, -2
                          if( wi( i )/=-wi( i-1 ) ) then
                             swap = wr( i )
                             wr( i ) = wr( i-1 )
                             wr( i-1 ) = wr( i-2 )
                             wr( i-2 ) = swap
                             swap = wi( i )
                             wi( i ) = wi( i-1 )
                             wi( i-1 ) = wi( i-2 )
                             wi( i-2 ) = swap
                          end if
                       end do
                    end if
                    ! ==== if there are only two shifts and both are
                    ! .    real, then use only one.  ====
                    if( kbot-ks+1==2_${ik}$ ) then
                       if( wi( kbot )==zero ) then
                          if( abs( wr( kbot )-h( kbot, kbot ) )<abs( wr( kbot-1 )-h( kbot, kbot ) &
                                    ) ) then
                             wr( kbot-1 ) = wr( kbot )
                          else
                             wr( kbot ) = wr( kbot-1 )
                          end if
                       end if
                    end if
                    ! ==== use up to ns of the the smallest magnitude
                    ! .    shifts.  if there aren't ns shifts available,
                    ! .    then use them all, possibly dropping one to
                    ! .    make the number of shifts even. ====
                    ns = min( ns, kbot-ks+1 )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ks = kbot - ns + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep:
                    ! .    split workspace under the subdiagonal into
                    ! .    - a kdu-by-kdu work array u in the lower
                    ! .      left-hand-corner,
                    ! .    - a kdu-by-at-least-kdu-but-more-is-better
                    ! .      (kdu-by-nho) horizontal work array wh along
                    ! .      the bottom edge,
                    ! .    - and an at-least-kdu-but-more-is-better-by-kdu
                    ! .      (nve-by-kdu) vertical work wv arrow along
                    ! .      the left-hand-edge. ====
                    kdu = 2_${ik}$*ns
                    ku = n - kdu + 1_${ik}$
                    kwh = kdu + 1_${ik}$
                    nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$
                    kwv = kdu + 4_${ik}$
                    nve = n - kdu - kwv + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep ====
                    call stdlib${ii}$_${ri}$laqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,wr( ks ), wi( ks )&
                    , h, ldh, iloz, ihiz, z,ldz, work, 3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve,h( kwv, 1_${ik}$ ), ldh, &
                              nho, h( ku, kwh ), ldh )
                 end if
                 ! ==== note progress (or the lack of it). ====
                 if( ld>0_${ik}$ ) then
                    ndfl = 1_${ik}$
                 else
                    ndfl = ndfl + 1_${ik}$
                 end if
                 ! ==== end of main loop ====
              end do loop_80
              ! ==== iteration limit exceeded.  set info to show where
              ! .    the problem occurred and exit. ====
              info = kbot
              90 continue
           end if
           ! ==== return the optimal value of lwork. ====
           work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$)
     end subroutine stdlib${ii}$_${ri}$laqr0

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_claqr0( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, work,&
     !! CLAQR0 computes the eigenvalues of a Hessenberg matrix H
     !! and, optionally, the matrices T and Z from the Schur decomposition
     !! H = Z T Z**H, where T is an upper triangular matrix (the
     !! Schur form), and Z is the unitary matrix of Schur vectors.
     !! Optionally Z may be postmultiplied into an input unitary
     !! matrix Q so that this routine can give the Schur factorization
     !! of a matrix A which has been reduced to the Hessenberg form H
     !! by the unitary matrix Q:  A = Q*H*Q**H = (QZ)*H*(QZ)**H.
                lwork, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n
           integer(${ik}$), intent(out) :: info
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           complex(sp), intent(inout) :: h(ldh,*), z(ldz,*)
           complex(sp), intent(out) :: w(*), work(*)
        ! ================================================================
           ! Parameters 
           integer(${ik}$), parameter :: ntiny = 15_${ik}$
           integer(${ik}$), parameter :: kexnw = 5_${ik}$
           integer(${ik}$), parameter :: kexsh = 6_${ik}$
           real(sp), parameter :: wilk1 = 0.75_sp
           ! ==== matrices of order ntiny or smaller must be processed by
           ! .    stdlib${ii}$_clahqr because of insufficient subdiagonal scratch space.
           ! .    (this is a hard limit.) ====
           
           ! ==== exceptional deflation windows:  try to cure rare
           ! .    slow convergence by varying the size of the
           ! .    deflation window after kexnw iterations. ====
           
           ! ==== exceptional shifts: try to cure rare slow convergence
           ! .    with ad-hoc exceptional shifts every kexsh iterations.
           ! .    ====
           
           ! ==== the constant wilk1 is used to form the exceptional
           ! .    shifts. ====
           
           
           
           ! Local Scalars 
           complex(sp) :: aa, bb, cc, cdum, dd, det, rtdisc, swap, tr2
           real(sp) :: s
           integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, &
           kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,&
                      nwmax, nwr, nwupbd
           logical(lk) :: sorted
           character(len=2) :: jbcmpz
           ! Local Arrays 
           complex(sp) :: zdum(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Statement Functions 
           real(sp) :: cabs1
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           info = 0_${ik}$
           ! ==== quick return for n = 0: nothing to do. ====
           if( n==0_${ik}$ ) then
              work( 1_${ik}$ ) = cone
              return
           end if
           if( n<=ntiny ) then
              ! ==== tiny matrices must use stdlib_clahqr. ====
              lwkopt = 1_${ik}$
              if( lwork/=-1_${ik}$ )call stdlib${ii}$_clahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, &
                        z, ldz, info )
           else
              ! ==== use small bulge multi-shift qr with aggressive early
              ! .    deflation on larger-than-tiny matrices. ====
              ! ==== hope for the best. ====
              info = 0_${ik}$
              ! ==== set up job flags for stdlib${ii}$_ilaenv. ====
              if( wantt ) then
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S'
              else
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E'
              end if
              if( wantz ) then
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V'
              else
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N'
              end if
              ! ==== nwr = recommended deflation window size.  at this
              ! .    point,  n > ntiny = 15, so there is enough
              ! .    subdiagonal workspace for nwr>=2 as required.
              ! .    (in fact, there is enough subdiagonal space for
              ! .    nwr>=4.) ====
              nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'CLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nwr = max( 2_${ik}$, nwr )
              nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr )
              ! ==== nsr = recommended number of simultaneous shifts.
              ! .    at this point n > ntiny = 15, so there is at
              ! .    enough subdiagonal workspace for nsr to be even
              ! .    and greater than or equal to two as required. ====
              nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'CLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo )
              nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) )
              ! ==== estimate optimal workspace ====
              ! ==== workspace query call to stdlib${ii}$_claqr3 ====
              call stdlib${ii}$_claqr3( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,&
                         ld, w, h, ldh, n, h, ldh, n, h,ldh, work, -1_${ik}$ )
              ! ==== optimal workspace = max(stdlib${ii}$_claqr5, stdlib${ii}$_claqr3) ====
              lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) )
              ! ==== quick return in case of workspace query. ====
              if( lwork==-1_${ik}$ ) then
                 work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=sp)
                 return
              end if
              ! ==== stdlib${ii}$_clahqr/stdlib${ii}$_claqr0 crossover point ====
              nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'CLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nmin = max( ntiny, nmin )
              ! ==== nibble crossover point ====
              nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'CLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nibble = max( 0_${ik}$, nibble )
              ! ==== accumulate reflections during ttswp?  use block
              ! .    2-by-2 structure during matrix-matrix multiply? ====
              kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'CLAQR0', jbcmpz, n, ilo, ihi, lwork )
              kacc22 = max( 0_${ik}$, kacc22 )
              kacc22 = min( 2_${ik}$, kacc22 )
              ! ==== nwmax = the largest possible deflation window for
              ! .    which there is sufficient workspace. ====
              nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ )
              nw = nwmax
              ! ==== nsmax = the largest number of simultaneous shifts
              ! .    for which there is sufficient workspace. ====
              nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ )
              nsmax = nsmax - mod( nsmax, 2_${ik}$ )
              ! ==== ndfl: an iteration count restarted at deflation. ====
              ndfl = 1_${ik}$
              ! ==== itmax = iteration limit ====
              itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) )
              ! ==== last row and column in the active block ====
              kbot = ihi
              ! ==== main loop ====
              loop_70: do it = 1, itmax
                 ! ==== done when kbot falls below ilo ====
                 if( kbot<ilo )go to 80
                 ! ==== locate active block ====
                 do k = kbot, ilo + 1, -1
                    if( h( k, k-1 )==czero )go to 20
                 end do
                 k = ilo
                 20 continue
                 ktop = k
                 ! ==== select deflation window size:
                 ! .    typical case:
                 ! .      if possible and advisable, nibble the entire
                 ! .      active block.  if not, use size min(nwr,nwmax)
                 ! .      or min(nwr+1,nwmax) depending upon which has
                 ! .      the smaller corresponding subdiagonal entry
                 ! .      (a heuristic).
                 ! .    exceptional case:
                 ! .      if there have been no deflations in kexnw or
                 ! .      more iterations, then vary the deflation window
                 ! .      size.   at first, because, larger windows are,
                 ! .      in general, more powerful than smaller ones,
                 ! .      rapidly increase the window to the maximum possible.
                 ! .      then, gradually reduce the window size. ====
                 nh = kbot - ktop + 1_${ik}$
                 nwupbd = min( nh, nwmax )
                 if( ndfl<kexnw ) then
                    nw = min( nwupbd, nwr )
                 else
                    nw = min( nwupbd, 2_${ik}$*nw )
                 end if
                 if( nw<nwmax ) then
                    if( nw>=nh-1 ) then
                       nw = nh
                    else
                       kwtop = kbot - nw + 1_${ik}$
                       if( cabs1( h( kwtop, kwtop-1 ) )>cabs1( h( kwtop-1, kwtop-2 ) ) )nw = nw + &
                                 1_${ik}$
                    end if
                 end if
                 if( ndfl<kexnw ) then
                    ndec = -1_${ik}$
                 else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then
                    ndec = ndec + 1_${ik}$
                    if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$
                    nw = nw - ndec
                 end if
                 ! ==== aggressive early deflation:
                 ! .    split workspace under the subdiagonal into
                 ! .      - an nw-by-nw work array v in the lower
                 ! .        left-hand-corner,
                 ! .      - an nw-by-at-least-nw-but-more-is-better
                 ! .        (nw-by-nho) horizontal work array along
                 ! .        the bottom edge,
                 ! .      - an at-least-nw-but-more-is-better (nhv-by-nw)
                 ! .        vertical work array along the left-hand-edge.
                 ! .        ====
                 kv = n - nw + 1_${ik}$
                 kt = nw + 1_${ik}$
                 nho = ( n-nw-1 ) - kt + 1_${ik}$
                 kwv = nw + 2_${ik}$
                 nve = ( n-nw ) - kwv + 1_${ik}$
                 ! ==== aggressive early deflation ====
                 call stdlib${ii}$_claqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, &
                 ls, ld, w, h( kv, 1_${ik}$ ), ldh, nho,h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh, work,&
                           lwork )
                 ! ==== adjust kbot accounting for new deflations. ====
                 kbot = kbot - ld
                 ! ==== ks points to the shifts. ====
                 ks = kbot - ls + 1_${ik}$
                 ! ==== skip an expensive qr sweep if there is a (partly
                 ! .    heuristic) reason to expect that many eigenvalues
                 ! .    will deflate without it.  here, the qr sweep is
                 ! .    skipped if many eigenvalues have just been deflated
                 ! .    or if the remaining active block is small.
                 if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )&
                            ) ) ) then
                    ! ==== ns = nominal number of simultaneous shifts.
                    ! .    this may be lowered (slightly) if stdlib${ii}$_claqr3
                    ! .    did not provide that many shifts. ====
                    ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ! ==== if there have been no deflations
                    ! .    in a multiple of kexsh iterations,
                    ! .    then try exceptional shifts.
                    ! .    otherwise use shifts provided by
                    ! .    stdlib${ii}$_claqr3 above or from the eigenvalues
                    ! .    of a trailing principal submatrix. ====
                    if( mod( ndfl, kexsh )==0_${ik}$ ) then
                       ks = kbot - ns + 1_${ik}$
                       do i = kbot, ks + 1, -2
                          w( i ) = h( i, i ) + wilk1*cabs1( h( i, i-1 ) )
                          w( i-1 ) = w( i )
                       end do
                    else
                       ! ==== got ns/2 or fewer shifts? use stdlib_claqr4 or
                       ! .    stdlib${ii}$_clahqr on a trailing principal submatrix to
                       ! .    get more. (since ns<=nsmax<=(n-3)/6,
                       ! .    there is enough space below the subdiagonal
                       ! .    to fit an ns-by-ns scratch array.) ====
                       if( kbot-ks+1<=ns / 2_${ik}$ ) then
                          ks = kbot - ns + 1_${ik}$
                          kt = n - ns + 1_${ik}$
                          call stdlib${ii}$_clacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh )
                                    
                          if( ns>nmin ) then
                             call stdlib${ii}$_claqr4( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, w( &
                                       ks ), 1_${ik}$, 1_${ik}$,zdum, 1_${ik}$, work, lwork, inf )
                          else
                             call stdlib${ii}$_clahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, w( &
                                       ks ), 1_${ik}$, 1_${ik}$,zdum, 1_${ik}$, inf )
                          end if
                          ks = ks + inf
                          ! ==== in case of a rare qr failure use
                          ! .    eigenvalues of the trailing 2-by-2
                          ! .    principal submatrix.  scale to avoid
                          ! .    overflows, underflows and subnormals.
                          ! .    (the scale factor s can not be czero,
                          ! .    because h(kbot,kbot-1) is nonzero.) ====
                          if( ks>=kbot ) then
                             s = cabs1( h( kbot-1, kbot-1 ) ) +cabs1( h( kbot, kbot-1 ) ) +cabs1( &
                                       h( kbot-1, kbot ) ) +cabs1( h( kbot, kbot ) )
                             aa = h( kbot-1, kbot-1 ) / s
                             cc = h( kbot, kbot-1 ) / s
                             bb = h( kbot-1, kbot ) / s
                             dd = h( kbot, kbot ) / s
                             tr2 = ( aa+dd ) / two
                             det = ( aa-tr2 )*( dd-tr2 ) - bb*cc
                             rtdisc = sqrt( -det )
                             w( kbot-1 ) = ( tr2+rtdisc )*s
                             w( kbot ) = ( tr2-rtdisc )*s
                             ks = kbot - 1_${ik}$
                          end if
                       end if
                       if( kbot-ks+1>ns ) then
                          ! ==== sort the shifts (helps a little) ====
                          sorted = .false.
                          do k = kbot, ks + 1, -1
                             if( sorted )go to 60
                             sorted = .true.
                             do i = ks, k - 1
                                if( cabs1( w( i ) )<cabs1( w( i+1 ) ) )then
                                   sorted = .false.
                                   swap = w( i )
                                   w( i ) = w( i+1 )
                                   w( i+1 ) = swap
                                end if
                             end do
                          end do
                          60 continue
                       end if
                    end if
                    ! ==== if there are only two shifts, then use
                    ! .    only cone.  ====
                    if( kbot-ks+1==2_${ik}$ ) then
                       if( cabs1( w( kbot )-h( kbot, kbot ) )<cabs1( w( kbot-1 )-h( kbot, kbot ) )&
                                  ) then
                          w( kbot-1 ) = w( kbot )
                       else
                          w( kbot ) = w( kbot-1 )
                       end if
                    end if
                    ! ==== use up to ns of the the smallest magnitude
                    ! .    shifts.  if there aren't ns shifts available,
                    ! .    then use them all, possibly dropping cone to
                    ! .    make the number of shifts even. ====
                    ns = min( ns, kbot-ks+1 )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ks = kbot - ns + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep:
                    ! .    split workspace under the subdiagonal into
                    ! .    - a kdu-by-kdu work array u in the lower
                    ! .      left-hand-corner,
                    ! .    - a kdu-by-at-least-kdu-but-more-is-better
                    ! .      (kdu-by-nho) horizontal work array wh along
                    ! .      the bottom edge,
                    ! .    - and an at-least-kdu-but-more-is-better-by-kdu
                    ! .      (nve-by-kdu) vertical work wv arrow along
                    ! .      the left-hand-edge. ====
                    kdu = 2_${ik}$*ns
                    ku = n - kdu + 1_${ik}$
                    kwh = kdu + 1_${ik}$
                    nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$
                    kwv = kdu + 4_${ik}$
                    nve = n - kdu - kwv + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep ====
                    call stdlib${ii}$_claqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,w( ks ), h, ldh, &
                    iloz, ihiz, z, ldz, work,3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,nho, h( ku,&
                               kwh ), ldh )
                 end if
                 ! ==== note progress (or the lack of it). ====
                 if( ld>0_${ik}$ ) then
                    ndfl = 1_${ik}$
                 else
                    ndfl = ndfl + 1_${ik}$
                 end if
                 ! ==== end of main loop ====
              end do loop_70
              ! ==== iteration limit exceeded.  set info to show where
              ! .    the problem occurred and exit. ====
              info = kbot
              80 continue
           end if
           ! ==== return the optimal value of lwork. ====
           work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=sp)
     end subroutine stdlib${ii}$_claqr0

     pure module subroutine stdlib${ii}$_zlaqr0( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, work,&
     !! ZLAQR0 computes the eigenvalues of a Hessenberg matrix H
     !! and, optionally, the matrices T and Z from the Schur decomposition
     !! H = Z T Z**H, where T is an upper triangular matrix (the
     !! Schur form), and Z is the unitary matrix of Schur vectors.
     !! Optionally Z may be postmultiplied into an input unitary
     !! matrix Q so that this routine can give the Schur factorization
     !! of a matrix A which has been reduced to the Hessenberg form H
     !! by the unitary matrix Q:  A = Q*H*Q**H = (QZ)*H*(QZ)**H.
                lwork, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n
           integer(${ik}$), intent(out) :: info
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           complex(dp), intent(inout) :: h(ldh,*), z(ldz,*)
           complex(dp), intent(out) :: w(*), work(*)
        ! ================================================================
           ! Parameters 
           integer(${ik}$), parameter :: ntiny = 15_${ik}$
           integer(${ik}$), parameter :: kexnw = 5_${ik}$
           integer(${ik}$), parameter :: kexsh = 6_${ik}$
           real(dp), parameter :: wilk1 = 0.75_dp
           ! ==== matrices of order ntiny or smaller must be processed by
           ! .    stdlib${ii}$_zlahqr because of insufficient subdiagonal scratch space.
           ! .    (this is a hard limit.) ====
           
           ! ==== exceptional deflation windows:  try to cure rare
           ! .    slow convergence by varying the size of the
           ! .    deflation window after kexnw iterations. ====
           
           ! ==== exceptional shifts: try to cure rare slow convergence
           ! .    with ad-hoc exceptional shifts every kexsh iterations.
           ! .    ====
           
           ! ==== the constant wilk1 is used to form the exceptional
           ! .    shifts. ====
           
           
           
           ! Local Scalars 
           complex(dp) :: aa, bb, cc, cdum, dd, det, rtdisc, swap, tr2
           real(dp) :: s
           integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, &
           kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,&
                      nwmax, nwr, nwupbd
           logical(lk) :: sorted
           character(len=2) :: jbcmpz
           ! Local Arrays 
           complex(dp) :: zdum(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Statement Functions 
           real(dp) :: cabs1
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           info = 0_${ik}$
           ! ==== quick return for n = 0: nothing to do. ====
           if( n==0_${ik}$ ) then
              work( 1_${ik}$ ) = cone
              return
           end if
           if( n<=ntiny ) then
              ! ==== tiny matrices must use stdlib_zlahqr. ====
              lwkopt = 1_${ik}$
              if( lwork/=-1_${ik}$ )call stdlib${ii}$_zlahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, &
                        z, ldz, info )
           else
              ! ==== use small bulge multi-shift qr with aggressive early
              ! .    deflation on larger-than-tiny matrices. ====
              ! ==== hope for the best. ====
              info = 0_${ik}$
              ! ==== set up job flags for stdlib${ii}$_ilaenv. ====
              if( wantt ) then
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S'
              else
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E'
              end if
              if( wantz ) then
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V'
              else
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N'
              end if
              ! ==== nwr = recommended deflation window size.  at this
              ! .    point,  n > ntiny = 15, so there is enough
              ! .    subdiagonal workspace for nwr>=2 as required.
              ! .    (in fact, there is enough subdiagonal space for
              ! .    nwr>=4.) ====
              nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nwr = max( 2_${ik}$, nwr )
              nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr )
              ! ==== nsr = recommended number of simultaneous shifts.
              ! .    at this point n > ntiny = 15, so there is at
              ! .    enough subdiagonal workspace for nsr to be even
              ! .    and greater than or equal to two as required. ====
              nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo )
              nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) )
              ! ==== estimate optimal workspace ====
              ! ==== workspace query call to stdlib${ii}$_zlaqr3 ====
              call stdlib${ii}$_zlaqr3( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,&
                         ld, w, h, ldh, n, h, ldh, n, h,ldh, work, -1_${ik}$ )
              ! ==== optimal workspace = max(stdlib${ii}$_zlaqr5, stdlib${ii}$_zlaqr3) ====
              lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) )
              ! ==== quick return in case of workspace query. ====
              if( lwork==-1_${ik}$ ) then
                 work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=dp)
                 return
              end if
              ! ==== stdlib${ii}$_zlahqr/stdlib${ii}$_zlaqr0 crossover point ====
              nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nmin = max( ntiny, nmin )
              ! ==== nibble crossover point ====
              nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nibble = max( 0_${ik}$, nibble )
              ! ==== accumulate reflections during ttswp?  use block
              ! .    2-by-2 structure during matrix-matrix multiply? ====
              kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork )
              kacc22 = max( 0_${ik}$, kacc22 )
              kacc22 = min( 2_${ik}$, kacc22 )
              ! ==== nwmax = the largest possible deflation window for
              ! .    which there is sufficient workspace. ====
              nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ )
              nw = nwmax
              ! ==== nsmax = the largest number of simultaneous shifts
              ! .    for which there is sufficient workspace. ====
              nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ )
              nsmax = nsmax - mod( nsmax, 2_${ik}$ )
              ! ==== ndfl: an iteration count restarted at deflation. ====
              ndfl = 1_${ik}$
              ! ==== itmax = iteration limit ====
              itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) )
              ! ==== last row and column in the active block ====
              kbot = ihi
              ! ==== main loop ====
              loop_70: do it = 1, itmax
                 ! ==== done when kbot falls below ilo ====
                 if( kbot<ilo )go to 80
                 ! ==== locate active block ====
                 do k = kbot, ilo + 1, -1
                    if( h( k, k-1 )==czero )go to 20
                 end do
                 k = ilo
                 20 continue
                 ktop = k
                 ! ==== select deflation window size:
                 ! .    typical case:
                 ! .      if possible and advisable, nibble the entire
                 ! .      active block.  if not, use size min(nwr,nwmax)
                 ! .      or min(nwr+1,nwmax) depending upon which has
                 ! .      the smaller corresponding subdiagonal entry
                 ! .      (a heuristic).
                 ! .    exceptional case:
                 ! .      if there have been no deflations in kexnw or
                 ! .      more iterations, then vary the deflation window
                 ! .      size.   at first, because, larger windows are,
                 ! .      in general, more powerful than smaller ones,
                 ! .      rapidly increase the window to the maximum possible.
                 ! .      then, gradually reduce the window size. ====
                 nh = kbot - ktop + 1_${ik}$
                 nwupbd = min( nh, nwmax )
                 if( ndfl<kexnw ) then
                    nw = min( nwupbd, nwr )
                 else
                    nw = min( nwupbd, 2_${ik}$*nw )
                 end if
                 if( nw<nwmax ) then
                    if( nw>=nh-1 ) then
                       nw = nh
                    else
                       kwtop = kbot - nw + 1_${ik}$
                       if( cabs1( h( kwtop, kwtop-1 ) )>cabs1( h( kwtop-1, kwtop-2 ) ) )nw = nw + &
                                 1_${ik}$
                    end if
                 end if
                 if( ndfl<kexnw ) then
                    ndec = -1_${ik}$
                 else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then
                    ndec = ndec + 1_${ik}$
                    if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$
                    nw = nw - ndec
                 end if
                 ! ==== aggressive early deflation:
                 ! .    split workspace under the subdiagonal into
                 ! .      - an nw-by-nw work array v in the lower
                 ! .        left-hand-corner,
                 ! .      - an nw-by-at-least-nw-but-more-is-better
                 ! .        (nw-by-nho) horizontal work array along
                 ! .        the bottom edge,
                 ! .      - an at-least-nw-but-more-is-better (nhv-by-nw)
                 ! .        vertical work array along the left-hand-edge.
                 ! .        ====
                 kv = n - nw + 1_${ik}$
                 kt = nw + 1_${ik}$
                 nho = ( n-nw-1 ) - kt + 1_${ik}$
                 kwv = nw + 2_${ik}$
                 nve = ( n-nw ) - kwv + 1_${ik}$
                 ! ==== aggressive early deflation ====
                 call stdlib${ii}$_zlaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, &
                 ls, ld, w, h( kv, 1_${ik}$ ), ldh, nho,h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh, work,&
                           lwork )
                 ! ==== adjust kbot accounting for new deflations. ====
                 kbot = kbot - ld
                 ! ==== ks points to the shifts. ====
                 ks = kbot - ls + 1_${ik}$
                 ! ==== skip an expensive qr sweep if there is a (partly
                 ! .    heuristic) reason to expect that many eigenvalues
                 ! .    will deflate without it.  here, the qr sweep is
                 ! .    skipped if many eigenvalues have just been deflated
                 ! .    or if the remaining active block is small.
                 if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )&
                            ) ) ) then
                    ! ==== ns = nominal number of simultaneous shifts.
                    ! .    this may be lowered (slightly) if stdlib${ii}$_zlaqr3
                    ! .    did not provide that many shifts. ====
                    ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ! ==== if there have been no deflations
                    ! .    in a multiple of kexsh iterations,
                    ! .    then try exceptional shifts.
                    ! .    otherwise use shifts provided by
                    ! .    stdlib${ii}$_zlaqr3 above or from the eigenvalues
                    ! .    of a trailing principal submatrix. ====
                    if( mod( ndfl, kexsh )==0_${ik}$ ) then
                       ks = kbot - ns + 1_${ik}$
                       do i = kbot, ks + 1, -2
                          w( i ) = h( i, i ) + wilk1*cabs1( h( i, i-1 ) )
                          w( i-1 ) = w( i )
                       end do
                    else
                       ! ==== got ns/2 or fewer shifts? use stdlib_zlaqr4 or
                       ! .    stdlib${ii}$_zlahqr on a trailing principal submatrix to
                       ! .    get more. (since ns<=nsmax<=(n-3)/6,
                       ! .    there is enough space below the subdiagonal
                       ! .    to fit an ns-by-ns scratch array.) ====
                       if( kbot-ks+1<=ns / 2_${ik}$ ) then
                          ks = kbot - ns + 1_${ik}$
                          kt = n - ns + 1_${ik}$
                          call stdlib${ii}$_zlacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh )
                                    
                          if( ns>nmin ) then
                             call stdlib${ii}$_zlaqr4( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, w( &
                                       ks ), 1_${ik}$, 1_${ik}$,zdum, 1_${ik}$, work, lwork, inf )
                          else
                             call stdlib${ii}$_zlahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, w( &
                                       ks ), 1_${ik}$, 1_${ik}$,zdum, 1_${ik}$, inf )
                          end if
                          ks = ks + inf
                          ! ==== in case of a rare qr failure use
                          ! .    eigenvalues of the trailing 2-by-2
                          ! .    principal submatrix.  scale to avoid
                          ! .    overflows, underflows and subnormals.
                          ! .    (the scale factor s can not be czero,
                          ! .    because h(kbot,kbot-1) is nonzero.) ====
                          if( ks>=kbot ) then
                             s = cabs1( h( kbot-1, kbot-1 ) ) +cabs1( h( kbot, kbot-1 ) ) +cabs1( &
                                       h( kbot-1, kbot ) ) +cabs1( h( kbot, kbot ) )
                             aa = h( kbot-1, kbot-1 ) / s
                             cc = h( kbot, kbot-1 ) / s
                             bb = h( kbot-1, kbot ) / s
                             dd = h( kbot, kbot ) / s
                             tr2 = ( aa+dd ) / two
                             det = ( aa-tr2 )*( dd-tr2 ) - bb*cc
                             rtdisc = sqrt( -det )
                             w( kbot-1 ) = ( tr2+rtdisc )*s
                             w( kbot ) = ( tr2-rtdisc )*s
                             ks = kbot - 1_${ik}$
                          end if
                       end if
                       if( kbot-ks+1>ns ) then
                          ! ==== sort the shifts (helps a little) ====
                          sorted = .false.
                          do k = kbot, ks + 1, -1
                             if( sorted )go to 60
                             sorted = .true.
                             do i = ks, k - 1
                                if( cabs1( w( i ) )<cabs1( w( i+1 ) ) )then
                                   sorted = .false.
                                   swap = w( i )
                                   w( i ) = w( i+1 )
                                   w( i+1 ) = swap
                                end if
                             end do
                          end do
                          60 continue
                       end if
                    end if
                    ! ==== if there are only two shifts, then use
                    ! .    only cone.  ====
                    if( kbot-ks+1==2_${ik}$ ) then
                       if( cabs1( w( kbot )-h( kbot, kbot ) )<cabs1( w( kbot-1 )-h( kbot, kbot ) )&
                                  ) then
                          w( kbot-1 ) = w( kbot )
                       else
                          w( kbot ) = w( kbot-1 )
                       end if
                    end if
                    ! ==== use up to ns of the the smallest magnitude
                    ! .    shifts.  if there aren't ns shifts available,
                    ! .    then use them all, possibly dropping cone to
                    ! .    make the number of shifts even. ====
                    ns = min( ns, kbot-ks+1 )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ks = kbot - ns + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep:
                    ! .    split workspace under the subdiagonal into
                    ! .    - a kdu-by-kdu work array u in the lower
                    ! .      left-hand-corner,
                    ! .    - a kdu-by-at-least-kdu-but-more-is-better
                    ! .      (kdu-by-nho) horizontal work array wh along
                    ! .      the bottom edge,
                    ! .    - and an at-least-kdu-but-more-is-better-by-kdu
                    ! .      (nve-by-kdu) vertical work wv arrow along
                    ! .      the left-hand-edge. ====
                    kdu = 2_${ik}$*ns
                    ku = n - kdu + 1_${ik}$
                    kwh = kdu + 1_${ik}$
                    nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$
                    kwv = kdu + 4_${ik}$
                    nve = n - kdu - kwv + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep ====
                    call stdlib${ii}$_zlaqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,w( ks ), h, ldh, &
                    iloz, ihiz, z, ldz, work,3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,nho, h( ku,&
                               kwh ), ldh )
                 end if
                 ! ==== note progress (or the lack of it). ====
                 if( ld>0_${ik}$ ) then
                    ndfl = 1_${ik}$
                 else
                    ndfl = ndfl + 1_${ik}$
                 end if
                 ! ==== end of main loop ====
              end do loop_70
              ! ==== iteration limit exceeded.  set info to show where
              ! .    the problem occurred and exit. ====
              info = kbot
              80 continue
           end if
           ! ==== return the optimal value of lwork. ====
           work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=dp)
     end subroutine stdlib${ii}$_zlaqr0

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$laqr0( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, work,&
     !! ZLAQR0: computes the eigenvalues of a Hessenberg matrix H
     !! and, optionally, the matrices T and Z from the Schur decomposition
     !! H = Z T Z**H, where T is an upper triangular matrix (the
     !! Schur form), and Z is the unitary matrix of Schur vectors.
     !! Optionally Z may be postmultiplied into an input unitary
     !! matrix Q so that this routine can give the Schur factorization
     !! of a matrix A which has been reduced to the Hessenberg form H
     !! by the unitary matrix Q:  A = Q*H*Q**H = (QZ)*H*(QZ)**H.
                lwork, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n
           integer(${ik}$), intent(out) :: info
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: h(ldh,*), z(ldz,*)
           complex(${ck}$), intent(out) :: w(*), work(*)
        ! ================================================================
           ! Parameters 
           integer(${ik}$), parameter :: ntiny = 15_${ik}$
           integer(${ik}$), parameter :: kexnw = 5_${ik}$
           integer(${ik}$), parameter :: kexsh = 6_${ik}$
           real(${ck}$), parameter :: wilk1 = 0.75_${ck}$
           ! ==== matrices of order ntiny or smaller must be processed by
           ! .    stdlib${ii}$_${ci}$lahqr because of insufficient subdiagonal scratch space.
           ! .    (this is a hard limit.) ====
           
           ! ==== exceptional deflation windows:  try to cure rare
           ! .    slow convergence by varying the size of the
           ! .    deflation window after kexnw iterations. ====
           
           ! ==== exceptional shifts: try to cure rare slow convergence
           ! .    with ad-hoc exceptional shifts every kexsh iterations.
           ! .    ====
           
           ! ==== the constant wilk1 is used to form the exceptional
           ! .    shifts. ====
           
           
           
           ! Local Scalars 
           complex(${ck}$) :: aa, bb, cc, cdum, dd, det, rtdisc, swap, tr2
           real(${ck}$) :: s
           integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, &
           kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,&
                      nwmax, nwr, nwupbd
           logical(lk) :: sorted
           character(len=2) :: jbcmpz
           ! Local Arrays 
           complex(${ck}$) :: zdum(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Statement Functions 
           real(${ck}$) :: cabs1
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=${ck}$) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           info = 0_${ik}$
           ! ==== quick return for n = 0: nothing to do. ====
           if( n==0_${ik}$ ) then
              work( 1_${ik}$ ) = cone
              return
           end if
           if( n<=ntiny ) then
              ! ==== tiny matrices must use stdlib_${ci}$lahqr. ====
              lwkopt = 1_${ik}$
              if( lwork/=-1_${ik}$ )call stdlib${ii}$_${ci}$lahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, &
                        z, ldz, info )
           else
              ! ==== use small bulge multi-shift qr with aggressive early
              ! .    deflation on larger-than-tiny matrices. ====
              ! ==== hope for the best. ====
              info = 0_${ik}$
              ! ==== set up job flags for stdlib${ii}$_ilaenv. ====
              if( wantt ) then
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S'
              else
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E'
              end if
              if( wantz ) then
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V'
              else
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N'
              end if
              ! ==== nwr = recommended deflation window size.  at this
              ! .    point,  n > ntiny = 15, so there is enough
              ! .    subdiagonal workspace for nwr>=2 as required.
              ! .    (in fact, there is enough subdiagonal space for
              ! .    nwr>=4.) ====
              nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nwr = max( 2_${ik}$, nwr )
              nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr )
              ! ==== nsr = recommended number of simultaneous shifts.
              ! .    at this point n > ntiny = 15, so there is at
              ! .    enough subdiagonal workspace for nsr to be even
              ! .    and greater than or equal to two as required. ====
              nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo )
              nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) )
              ! ==== estimate optimal workspace ====
              ! ==== workspace query call to stdlib${ii}$_${ci}$laqr3 ====
              call stdlib${ii}$_${ci}$laqr3( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,&
                         ld, w, h, ldh, n, h, ldh, n, h,ldh, work, -1_${ik}$ )
              ! ==== optimal workspace = max(stdlib${ii}$_${ci}$laqr5, stdlib${ii}$_${ci}$laqr3) ====
              lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) )
              ! ==== quick return in case of workspace query. ====
              if( lwork==-1_${ik}$ ) then
                 work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=${ck}$)
                 return
              end if
              ! ==== stdlib${ii}$_${ci}$lahqr/stdlib${ii}$_${ci}$laqr0 crossover point ====
              nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nmin = max( ntiny, nmin )
              ! ==== nibble crossover point ====
              nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork )
              nibble = max( 0_${ik}$, nibble )
              ! ==== accumulate reflections during ttswp?  use block
              ! .    2-by-2 structure during matrix-matrix multiply? ====
              kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'ZLAQR0', jbcmpz, n, ilo, ihi, lwork )
              kacc22 = max( 0_${ik}$, kacc22 )
              kacc22 = min( 2_${ik}$, kacc22 )
              ! ==== nwmax = the largest possible deflation window for
              ! .    which there is sufficient workspace. ====
              nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ )
              nw = nwmax
              ! ==== nsmax = the largest number of simultaneous shifts
              ! .    for which there is sufficient workspace. ====
              nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ )
              nsmax = nsmax - mod( nsmax, 2_${ik}$ )
              ! ==== ndfl: an iteration count restarted at deflation. ====
              ndfl = 1_${ik}$
              ! ==== itmax = iteration limit ====
              itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) )
              ! ==== last row and column in the active block ====
              kbot = ihi
              ! ==== main loop ====
              loop_70: do it = 1, itmax
                 ! ==== done when kbot falls below ilo ====
                 if( kbot<ilo )go to 80
                 ! ==== locate active block ====
                 do k = kbot, ilo + 1, -1
                    if( h( k, k-1 )==czero )go to 20
                 end do
                 k = ilo
                 20 continue
                 ktop = k
                 ! ==== select deflation window size:
                 ! .    typical case:
                 ! .      if possible and advisable, nibble the entire
                 ! .      active block.  if not, use size min(nwr,nwmax)
                 ! .      or min(nwr+1,nwmax) depending upon which has
                 ! .      the smaller corresponding subdiagonal entry
                 ! .      (a heuristic).
                 ! .    exceptional case:
                 ! .      if there have been no deflations in kexnw or
                 ! .      more iterations, then vary the deflation window
                 ! .      size.   at first, because, larger windows are,
                 ! .      in general, more powerful than smaller ones,
                 ! .      rapidly increase the window to the maximum possible.
                 ! .      then, gradually reduce the window size. ====
                 nh = kbot - ktop + 1_${ik}$
                 nwupbd = min( nh, nwmax )
                 if( ndfl<kexnw ) then
                    nw = min( nwupbd, nwr )
                 else
                    nw = min( nwupbd, 2_${ik}$*nw )
                 end if
                 if( nw<nwmax ) then
                    if( nw>=nh-1 ) then
                       nw = nh
                    else
                       kwtop = kbot - nw + 1_${ik}$
                       if( cabs1( h( kwtop, kwtop-1 ) )>cabs1( h( kwtop-1, kwtop-2 ) ) )nw = nw + &
                                 1_${ik}$
                    end if
                 end if
                 if( ndfl<kexnw ) then
                    ndec = -1_${ik}$
                 else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then
                    ndec = ndec + 1_${ik}$
                    if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$
                    nw = nw - ndec
                 end if
                 ! ==== aggressive early deflation:
                 ! .    split workspace under the subdiagonal into
                 ! .      - an nw-by-nw work array v in the lower
                 ! .        left-hand-corner,
                 ! .      - an nw-by-at-least-nw-but-more-is-better
                 ! .        (nw-by-nho) horizontal work array along
                 ! .        the bottom edge,
                 ! .      - an at-least-nw-but-more-is-better (nhv-by-nw)
                 ! .        vertical work array along the left-hand-edge.
                 ! .        ====
                 kv = n - nw + 1_${ik}$
                 kt = nw + 1_${ik}$
                 nho = ( n-nw-1 ) - kt + 1_${ik}$
                 kwv = nw + 2_${ik}$
                 nve = ( n-nw ) - kwv + 1_${ik}$
                 ! ==== aggressive early deflation ====
                 call stdlib${ii}$_${ci}$laqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, &
                 ls, ld, w, h( kv, 1_${ik}$ ), ldh, nho,h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh, work,&
                           lwork )
                 ! ==== adjust kbot accounting for new deflations. ====
                 kbot = kbot - ld
                 ! ==== ks points to the shifts. ====
                 ks = kbot - ls + 1_${ik}$
                 ! ==== skip an expensive qr sweep if there is a (partly
                 ! .    heuristic) reason to expect that many eigenvalues
                 ! .    will deflate without it.  here, the qr sweep is
                 ! .    skipped if many eigenvalues have just been deflated
                 ! .    or if the remaining active block is small.
                 if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )&
                            ) ) ) then
                    ! ==== ns = nominal number of simultaneous shifts.
                    ! .    this may be lowered (slightly) if stdlib${ii}$_${ci}$laqr3
                    ! .    did not provide that many shifts. ====
                    ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ! ==== if there have been no deflations
                    ! .    in a multiple of kexsh iterations,
                    ! .    then try exceptional shifts.
                    ! .    otherwise use shifts provided by
                    ! .    stdlib${ii}$_${ci}$laqr3 above or from the eigenvalues
                    ! .    of a trailing principal submatrix. ====
                    if( mod( ndfl, kexsh )==0_${ik}$ ) then
                       ks = kbot - ns + 1_${ik}$
                       do i = kbot, ks + 1, -2
                          w( i ) = h( i, i ) + wilk1*cabs1( h( i, i-1 ) )
                          w( i-1 ) = w( i )
                       end do
                    else
                       ! ==== got ns/2 or fewer shifts? use stdlib_${ci}$laqr4 or
                       ! .    stdlib${ii}$_${ci}$lahqr on a trailing principal submatrix to
                       ! .    get more. (since ns<=nsmax<=(n-3)/6,
                       ! .    there is enough space below the subdiagonal
                       ! .    to fit an ns-by-ns scratch array.) ====
                       if( kbot-ks+1<=ns / 2_${ik}$ ) then
                          ks = kbot - ns + 1_${ik}$
                          kt = n - ns + 1_${ik}$
                          call stdlib${ii}$_${ci}$lacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh )
                                    
                          if( ns>nmin ) then
                             call stdlib${ii}$_${ci}$laqr4( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, w( &
                                       ks ), 1_${ik}$, 1_${ik}$,zdum, 1_${ik}$, work, lwork, inf )
                          else
                             call stdlib${ii}$_${ci}$lahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, w( &
                                       ks ), 1_${ik}$, 1_${ik}$,zdum, 1_${ik}$, inf )
                          end if
                          ks = ks + inf
                          ! ==== in case of a rare qr failure use
                          ! .    eigenvalues of the trailing 2-by-2
                          ! .    principal submatrix.  scale to avoid
                          ! .    overflows, underflows and subnormals.
                          ! .    (the scale factor s can not be czero,
                          ! .    because h(kbot,kbot-1) is nonzero.) ====
                          if( ks>=kbot ) then
                             s = cabs1( h( kbot-1, kbot-1 ) ) +cabs1( h( kbot, kbot-1 ) ) +cabs1( &
                                       h( kbot-1, kbot ) ) +cabs1( h( kbot, kbot ) )
                             aa = h( kbot-1, kbot-1 ) / s
                             cc = h( kbot, kbot-1 ) / s
                             bb = h( kbot-1, kbot ) / s
                             dd = h( kbot, kbot ) / s
                             tr2 = ( aa+dd ) / two
                             det = ( aa-tr2 )*( dd-tr2 ) - bb*cc
                             rtdisc = sqrt( -det )
                             w( kbot-1 ) = ( tr2+rtdisc )*s
                             w( kbot ) = ( tr2-rtdisc )*s
                             ks = kbot - 1_${ik}$
                          end if
                       end if
                       if( kbot-ks+1>ns ) then
                          ! ==== sort the shifts (helps a little) ====
                          sorted = .false.
                          do k = kbot, ks + 1, -1
                             if( sorted )go to 60
                             sorted = .true.
                             do i = ks, k - 1
                                if( cabs1( w( i ) )<cabs1( w( i+1 ) ) )then
                                   sorted = .false.
                                   swap = w( i )
                                   w( i ) = w( i+1 )
                                   w( i+1 ) = swap
                                end if
                             end do
                          end do
                          60 continue
                       end if
                    end if
                    ! ==== if there are only two shifts, then use
                    ! .    only cone.  ====
                    if( kbot-ks+1==2_${ik}$ ) then
                       if( cabs1( w( kbot )-h( kbot, kbot ) )<cabs1( w( kbot-1 )-h( kbot, kbot ) )&
                                  ) then
                          w( kbot-1 ) = w( kbot )
                       else
                          w( kbot ) = w( kbot-1 )
                       end if
                    end if
                    ! ==== use up to ns of the the smallest magnitude
                    ! .    shifts.  if there aren't ns shifts available,
                    ! .    then use them all, possibly dropping cone to
                    ! .    make the number of shifts even. ====
                    ns = min( ns, kbot-ks+1 )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ks = kbot - ns + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep:
                    ! .    split workspace under the subdiagonal into
                    ! .    - a kdu-by-kdu work array u in the lower
                    ! .      left-hand-corner,
                    ! .    - a kdu-by-at-least-kdu-but-more-is-better
                    ! .      (kdu-by-nho) horizontal work array wh along
                    ! .      the bottom edge,
                    ! .    - and an at-least-kdu-but-more-is-better-by-kdu
                    ! .      (nve-by-kdu) vertical work wv arrow along
                    ! .      the left-hand-edge. ====
                    kdu = 2_${ik}$*ns
                    ku = n - kdu + 1_${ik}$
                    kwh = kdu + 1_${ik}$
                    nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$
                    kwv = kdu + 4_${ik}$
                    nve = n - kdu - kwv + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep ====
                    call stdlib${ii}$_${ci}$laqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,w( ks ), h, ldh, &
                    iloz, ihiz, z, ldz, work,3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,nho, h( ku,&
                               kwh ), ldh )
                 end if
                 ! ==== note progress (or the lack of it). ====
                 if( ld>0_${ik}$ ) then
                    ndfl = 1_${ik}$
                 else
                    ndfl = ndfl + 1_${ik}$
                 end if
                 ! ==== end of main loop ====
              end do loop_70
              ! ==== iteration limit exceeded.  set info to show where
              ! .    the problem occurred and exit. ====
              info = kbot
              80 continue
           end if
           ! ==== return the optimal value of lwork. ====
           work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=${ck}$)
     end subroutine stdlib${ii}$_${ci}$laqr0

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slaqr1( n, h, ldh, sr1, si1, sr2, si2, v )
     !! Given a 2-by-2 or 3-by-3 matrix H, SLAQR1: sets v to a
     !! scalar multiple of the first column of the product
     !! (*)  K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I)
     !! scaling to avoid overflows and most underflows. It
     !! is assumed that either
     !! 1) sr1 = sr2 and si1 = -si2
     !! or
     !! 2) si1 = si2 = 0.
     !! This is useful for starting double implicit shift bulges
     !! in the QR algorithm.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           real(sp), intent(in) :: si1, si2, sr1, sr2
           integer(${ik}$), intent(in) :: ldh, n
           ! Array Arguments 
           real(sp), intent(in) :: h(ldh,*)
           real(sp), intent(out) :: v(*)
        ! ================================================================
           
           ! Local Scalars 
           real(sp) :: h21s, h31s, s
           ! Intrinsic Functions 
           ! Executable Statements 
           ! quick return if possible
           if( n/=2_${ik}$ .and. n/=3_${ik}$ ) then
              return
           end if
           if( n==2_${ik}$ ) then
              s = abs( h( 1_${ik}$, 1_${ik}$ )-sr2 ) + abs( si2 ) + abs( h( 2_${ik}$, 1_${ik}$ ) )
              if( s==zero ) then
                 v( 1_${ik}$ ) = zero
                 v( 2_${ik}$ ) = zero
              else
                 h21s = h( 2_${ik}$, 1_${ik}$ ) / s
                 v( 1_${ik}$ ) = h21s*h( 1_${ik}$, 2_${ik}$ ) + ( h( 1_${ik}$, 1_${ik}$ )-sr1 )*( ( h( 1_${ik}$, 1_${ik}$ )-sr2 ) / s ) - si1*( &
                           si2 / s )
                 v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-sr1-sr2 )
              end if
           else
              s = abs( h( 1_${ik}$, 1_${ik}$ )-sr2 ) + abs( si2 ) + abs( h( 2_${ik}$, 1_${ik}$ ) ) +abs( h( 3_${ik}$, 1_${ik}$ ) )
              if( s==zero ) then
                 v( 1_${ik}$ ) = zero
                 v( 2_${ik}$ ) = zero
                 v( 3_${ik}$ ) = zero
              else
                 h21s = h( 2_${ik}$, 1_${ik}$ ) / s
                 h31s = h( 3_${ik}$, 1_${ik}$ ) / s
                 v( 1_${ik}$ ) = ( h( 1_${ik}$, 1_${ik}$ )-sr1 )*( ( h( 1_${ik}$, 1_${ik}$ )-sr2 ) / s ) -si1*( si2 / s ) + h( 1_${ik}$, 2_${ik}$ )&
                           *h21s + h( 1_${ik}$, 3_${ik}$ )*h31s
                 v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-sr1-sr2 ) +h( 2_${ik}$, 3_${ik}$ )*h31s
                 v( 3_${ik}$ ) = h31s*( h( 1_${ik}$, 1_${ik}$ )+h( 3_${ik}$, 3_${ik}$ )-sr1-sr2 ) +h21s*h( 3_${ik}$, 2_${ik}$ )
              end if
           end if
     end subroutine stdlib${ii}$_slaqr1

     pure module subroutine stdlib${ii}$_dlaqr1( n, h, ldh, sr1, si1, sr2, si2, v )
     !! Given a 2-by-2 or 3-by-3 matrix H, DLAQR1: sets v to a
     !! scalar multiple of the first column of the product
     !! (*)  K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I)
     !! scaling to avoid overflows and most underflows. It
     !! is assumed that either
     !! 1) sr1 = sr2 and si1 = -si2
     !! or
     !! 2) si1 = si2 = 0.
     !! This is useful for starting double implicit shift bulges
     !! in the QR algorithm.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           real(dp), intent(in) :: si1, si2, sr1, sr2
           integer(${ik}$), intent(in) :: ldh, n
           ! Array Arguments 
           real(dp), intent(in) :: h(ldh,*)
           real(dp), intent(out) :: v(*)
        ! ================================================================
           
           ! Local Scalars 
           real(dp) :: h21s, h31s, s
           ! Intrinsic Functions 
           ! Executable Statements 
           ! quick return if possible
           if( n/=2_${ik}$ .and. n/=3_${ik}$ ) then
              return
           end if
           if( n==2_${ik}$ ) then
              s = abs( h( 1_${ik}$, 1_${ik}$ )-sr2 ) + abs( si2 ) + abs( h( 2_${ik}$, 1_${ik}$ ) )
              if( s==zero ) then
                 v( 1_${ik}$ ) = zero
                 v( 2_${ik}$ ) = zero
              else
                 h21s = h( 2_${ik}$, 1_${ik}$ ) / s
                 v( 1_${ik}$ ) = h21s*h( 1_${ik}$, 2_${ik}$ ) + ( h( 1_${ik}$, 1_${ik}$ )-sr1 )*( ( h( 1_${ik}$, 1_${ik}$ )-sr2 ) / s ) - si1*( &
                           si2 / s )
                 v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-sr1-sr2 )
              end if
           else
              s = abs( h( 1_${ik}$, 1_${ik}$ )-sr2 ) + abs( si2 ) + abs( h( 2_${ik}$, 1_${ik}$ ) ) +abs( h( 3_${ik}$, 1_${ik}$ ) )
              if( s==zero ) then
                 v( 1_${ik}$ ) = zero
                 v( 2_${ik}$ ) = zero
                 v( 3_${ik}$ ) = zero
              else
                 h21s = h( 2_${ik}$, 1_${ik}$ ) / s
                 h31s = h( 3_${ik}$, 1_${ik}$ ) / s
                 v( 1_${ik}$ ) = ( h( 1_${ik}$, 1_${ik}$ )-sr1 )*( ( h( 1_${ik}$, 1_${ik}$ )-sr2 ) / s ) -si1*( si2 / s ) + h( 1_${ik}$, 2_${ik}$ )&
                           *h21s + h( 1_${ik}$, 3_${ik}$ )*h31s
                 v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-sr1-sr2 ) +h( 2_${ik}$, 3_${ik}$ )*h31s
                 v( 3_${ik}$ ) = h31s*( h( 1_${ik}$, 1_${ik}$ )+h( 3_${ik}$, 3_${ik}$ )-sr1-sr2 ) +h21s*h( 3_${ik}$, 2_${ik}$ )
              end if
           end if
     end subroutine stdlib${ii}$_dlaqr1

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$laqr1( n, h, ldh, sr1, si1, sr2, si2, v )
     !! Given a 2-by-2 or 3-by-3 matrix H, DLAQR1: sets v to a
     !! scalar multiple of the first column of the product
     !! (*)  K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I)
     !! scaling to avoid overflows and most underflows. It
     !! is assumed that either
     !! 1) sr1 = sr2 and si1 = -si2
     !! or
     !! 2) si1 = si2 = 0.
     !! This is useful for starting double implicit shift bulges
     !! in the QR algorithm.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           real(${rk}$), intent(in) :: si1, si2, sr1, sr2
           integer(${ik}$), intent(in) :: ldh, n
           ! Array Arguments 
           real(${rk}$), intent(in) :: h(ldh,*)
           real(${rk}$), intent(out) :: v(*)
        ! ================================================================
           
           ! Local Scalars 
           real(${rk}$) :: h21s, h31s, s
           ! Intrinsic Functions 
           ! Executable Statements 
           ! quick return if possible
           if( n/=2_${ik}$ .and. n/=3_${ik}$ ) then
              return
           end if
           if( n==2_${ik}$ ) then
              s = abs( h( 1_${ik}$, 1_${ik}$ )-sr2 ) + abs( si2 ) + abs( h( 2_${ik}$, 1_${ik}$ ) )
              if( s==zero ) then
                 v( 1_${ik}$ ) = zero
                 v( 2_${ik}$ ) = zero
              else
                 h21s = h( 2_${ik}$, 1_${ik}$ ) / s
                 v( 1_${ik}$ ) = h21s*h( 1_${ik}$, 2_${ik}$ ) + ( h( 1_${ik}$, 1_${ik}$ )-sr1 )*( ( h( 1_${ik}$, 1_${ik}$ )-sr2 ) / s ) - si1*( &
                           si2 / s )
                 v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-sr1-sr2 )
              end if
           else
              s = abs( h( 1_${ik}$, 1_${ik}$ )-sr2 ) + abs( si2 ) + abs( h( 2_${ik}$, 1_${ik}$ ) ) +abs( h( 3_${ik}$, 1_${ik}$ ) )
              if( s==zero ) then
                 v( 1_${ik}$ ) = zero
                 v( 2_${ik}$ ) = zero
                 v( 3_${ik}$ ) = zero
              else
                 h21s = h( 2_${ik}$, 1_${ik}$ ) / s
                 h31s = h( 3_${ik}$, 1_${ik}$ ) / s
                 v( 1_${ik}$ ) = ( h( 1_${ik}$, 1_${ik}$ )-sr1 )*( ( h( 1_${ik}$, 1_${ik}$ )-sr2 ) / s ) -si1*( si2 / s ) + h( 1_${ik}$, 2_${ik}$ )&
                           *h21s + h( 1_${ik}$, 3_${ik}$ )*h31s
                 v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-sr1-sr2 ) +h( 2_${ik}$, 3_${ik}$ )*h31s
                 v( 3_${ik}$ ) = h31s*( h( 1_${ik}$, 1_${ik}$ )+h( 3_${ik}$, 3_${ik}$ )-sr1-sr2 ) +h21s*h( 3_${ik}$, 2_${ik}$ )
              end if
           end if
     end subroutine stdlib${ii}$_${ri}$laqr1

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_claqr1( n, h, ldh, s1, s2, v )
     !! Given a 2-by-2 or 3-by-3 matrix H, CLAQR1: sets v to a
     !! scalar multiple of the first column of the product
     !! (*)  K = (H - s1*I)*(H - s2*I)
     !! scaling to avoid overflows and most underflows.
     !! This is useful for starting double implicit shift bulges
     !! in the QR algorithm.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           complex(sp), intent(in) :: s1, s2
           integer(${ik}$), intent(in) :: ldh, n
           ! Array Arguments 
           complex(sp), intent(in) :: h(ldh,*)
           complex(sp), intent(out) :: v(*)
        ! ================================================================
           ! Parameters 
           
           ! Local Scalars 
           complex(sp) :: cdum, h21s, h31s
           real(sp) :: s
           ! Intrinsic Functions 
           ! Statement Functions 
           real(sp) :: cabs1
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           ! quick return if possible
           if( n/=2_${ik}$ .and. n/=3_${ik}$ ) then
              return
           end if
           if( n==2_${ik}$ ) then
              s = cabs1( h( 1_${ik}$, 1_${ik}$ )-s2 ) + cabs1( h( 2_${ik}$, 1_${ik}$ ) )
              if( s==zero ) then
                 v( 1_${ik}$ ) = czero
                 v( 2_${ik}$ ) = czero
              else
                 h21s = h( 2_${ik}$, 1_${ik}$ ) / s
                 v( 1_${ik}$ ) = h21s*h( 1_${ik}$, 2_${ik}$ ) + ( h( 1_${ik}$, 1_${ik}$ )-s1 )*( ( h( 1_${ik}$, 1_${ik}$ )-s2 ) / s )
                 v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-s1-s2 )
              end if
           else
              s = cabs1( h( 1_${ik}$, 1_${ik}$ )-s2 ) + cabs1( h( 2_${ik}$, 1_${ik}$ ) ) +cabs1( h( 3_${ik}$, 1_${ik}$ ) )
              if( s==czero ) then
                 v( 1_${ik}$ ) = czero
                 v( 2_${ik}$ ) = czero
                 v( 3_${ik}$ ) = czero
              else
                 h21s = h( 2_${ik}$, 1_${ik}$ ) / s
                 h31s = h( 3_${ik}$, 1_${ik}$ ) / s
                 v( 1_${ik}$ ) = ( h( 1_${ik}$, 1_${ik}$ )-s1 )*( ( h( 1_${ik}$, 1_${ik}$ )-s2 ) / s ) +h( 1_${ik}$, 2_${ik}$ )*h21s + h( 1_${ik}$, 3_${ik}$ )&
                           *h31s
                 v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-s1-s2 ) + h( 2_${ik}$, 3_${ik}$ )*h31s
                 v( 3_${ik}$ ) = h31s*( h( 1_${ik}$, 1_${ik}$ )+h( 3_${ik}$, 3_${ik}$ )-s1-s2 ) + h21s*h( 3_${ik}$, 2_${ik}$ )
              end if
           end if
     end subroutine stdlib${ii}$_claqr1

     pure module subroutine stdlib${ii}$_zlaqr1( n, h, ldh, s1, s2, v )
     !! Given a 2-by-2 or 3-by-3 matrix H, ZLAQR1: sets v to a
     !! scalar multiple of the first column of the product
     !! (*)  K = (H - s1*I)*(H - s2*I)
     !! scaling to avoid overflows and most underflows.
     !! This is useful for starting double implicit shift bulges
     !! in the QR algorithm.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           complex(dp), intent(in) :: s1, s2
           integer(${ik}$), intent(in) :: ldh, n
           ! Array Arguments 
           complex(dp), intent(in) :: h(ldh,*)
           complex(dp), intent(out) :: v(*)
        ! ================================================================
           ! Parameters 
           
           ! Local Scalars 
           complex(dp) :: cdum, h21s, h31s
           real(dp) :: s
           ! Intrinsic Functions 
           ! Statement Functions 
           real(dp) :: cabs1
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           ! quick return if possible
           if( n/=2_${ik}$ .and. n/=3_${ik}$ ) then
              return
           end if
           if( n==2_${ik}$ ) then
              s = cabs1( h( 1_${ik}$, 1_${ik}$ )-s2 ) + cabs1( h( 2_${ik}$, 1_${ik}$ ) )
              if( s==zero ) then
                 v( 1_${ik}$ ) = czero
                 v( 2_${ik}$ ) = czero
              else
                 h21s = h( 2_${ik}$, 1_${ik}$ ) / s
                 v( 1_${ik}$ ) = h21s*h( 1_${ik}$, 2_${ik}$ ) + ( h( 1_${ik}$, 1_${ik}$ )-s1 )*( ( h( 1_${ik}$, 1_${ik}$ )-s2 ) / s )
                 v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-s1-s2 )
              end if
           else
              s = cabs1( h( 1_${ik}$, 1_${ik}$ )-s2 ) + cabs1( h( 2_${ik}$, 1_${ik}$ ) ) +cabs1( h( 3_${ik}$, 1_${ik}$ ) )
              if( s==czero ) then
                 v( 1_${ik}$ ) = czero
                 v( 2_${ik}$ ) = czero
                 v( 3_${ik}$ ) = czero
              else
                 h21s = h( 2_${ik}$, 1_${ik}$ ) / s
                 h31s = h( 3_${ik}$, 1_${ik}$ ) / s
                 v( 1_${ik}$ ) = ( h( 1_${ik}$, 1_${ik}$ )-s1 )*( ( h( 1_${ik}$, 1_${ik}$ )-s2 ) / s ) +h( 1_${ik}$, 2_${ik}$ )*h21s + h( 1_${ik}$, 3_${ik}$ )&
                           *h31s
                 v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-s1-s2 ) + h( 2_${ik}$, 3_${ik}$ )*h31s
                 v( 3_${ik}$ ) = h31s*( h( 1_${ik}$, 1_${ik}$ )+h( 3_${ik}$, 3_${ik}$ )-s1-s2 ) + h21s*h( 3_${ik}$, 2_${ik}$ )
              end if
           end if
     end subroutine stdlib${ii}$_zlaqr1

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$laqr1( n, h, ldh, s1, s2, v )
     !! Given a 2-by-2 or 3-by-3 matrix H, ZLAQR1: sets v to a
     !! scalar multiple of the first column of the product
     !! (*)  K = (H - s1*I)*(H - s2*I)
     !! scaling to avoid overflows and most underflows.
     !! This is useful for starting double implicit shift bulges
     !! in the QR algorithm.
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           complex(${ck}$), intent(in) :: s1, s2
           integer(${ik}$), intent(in) :: ldh, n
           ! Array Arguments 
           complex(${ck}$), intent(in) :: h(ldh,*)
           complex(${ck}$), intent(out) :: v(*)
        ! ================================================================
           ! Parameters 
           
           ! Local Scalars 
           complex(${ck}$) :: cdum, h21s, h31s
           real(${ck}$) :: s
           ! Intrinsic Functions 
           ! Statement Functions 
           real(${ck}$) :: cabs1
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=${ck}$) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           ! quick return if possible
           if( n/=2_${ik}$ .and. n/=3_${ik}$ ) then
              return
           end if
           if( n==2_${ik}$ ) then
              s = cabs1( h( 1_${ik}$, 1_${ik}$ )-s2 ) + cabs1( h( 2_${ik}$, 1_${ik}$ ) )
              if( s==zero ) then
                 v( 1_${ik}$ ) = czero
                 v( 2_${ik}$ ) = czero
              else
                 h21s = h( 2_${ik}$, 1_${ik}$ ) / s
                 v( 1_${ik}$ ) = h21s*h( 1_${ik}$, 2_${ik}$ ) + ( h( 1_${ik}$, 1_${ik}$ )-s1 )*( ( h( 1_${ik}$, 1_${ik}$ )-s2 ) / s )
                 v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-s1-s2 )
              end if
           else
              s = cabs1( h( 1_${ik}$, 1_${ik}$ )-s2 ) + cabs1( h( 2_${ik}$, 1_${ik}$ ) ) +cabs1( h( 3_${ik}$, 1_${ik}$ ) )
              if( s==czero ) then
                 v( 1_${ik}$ ) = czero
                 v( 2_${ik}$ ) = czero
                 v( 3_${ik}$ ) = czero
              else
                 h21s = h( 2_${ik}$, 1_${ik}$ ) / s
                 h31s = h( 3_${ik}$, 1_${ik}$ ) / s
                 v( 1_${ik}$ ) = ( h( 1_${ik}$, 1_${ik}$ )-s1 )*( ( h( 1_${ik}$, 1_${ik}$ )-s2 ) / s ) +h( 1_${ik}$, 2_${ik}$ )*h21s + h( 1_${ik}$, 3_${ik}$ )&
                           *h31s
                 v( 2_${ik}$ ) = h21s*( h( 1_${ik}$, 1_${ik}$ )+h( 2_${ik}$, 2_${ik}$ )-s1-s2 ) + h( 2_${ik}$, 3_${ik}$ )*h31s
                 v( 3_${ik}$ ) = h31s*( h( 1_${ik}$, 1_${ik}$ )+h( 3_${ik}$, 3_${ik}$ )-s1-s2 ) + h21s*h( 3_${ik}$, 2_${ik}$ )
              end if
           end if
     end subroutine stdlib${ii}$_${ci}$laqr1

#:endif
#:endfor



     module subroutine stdlib${ii}$_slaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, ns, nd,&
     !! SLAQR2 is identical to SLAQR3 except that it avoids
     !! recursion by calling SLAHQR instead of SLAQR4.
     !! Aggressive early deflation:
     !! This subroutine accepts as input an upper Hessenberg matrix
     !! H and performs an orthogonal similarity transformation
     !! designed to detect and deflate fully converged eigenvalues from
     !! a trailing principal submatrix.  On output H has been over-
     !! written by a new Hessenberg matrix that is a perturbation of
     !! an orthogonal similarity transformation of H.  It is to be
     !! hoped that the final version of H has many zero subdiagonal
     !! entries.
                sr, si, v, ldv, nh, t,ldt, nv, wv, ldwv, work, lwork )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,&
                      nh, nv, nw
           integer(${ik}$), intent(out) :: nd, ns
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           real(sp), intent(inout) :: h(ldh,*), z(ldz,*)
           real(sp), intent(out) :: si(*), sr(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*)
        ! ================================================================
           
           ! Local Scalars 
           real(sp) :: aa, bb, beta, cc, cs, dd, evi, evk, foo, s, safmax, safmin, smlnum, sn, &
                     tau, ulp
           integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, k, kcol, kend, kln, krow, kwtop, &
                     ltop, lwk1, lwk2, lwkopt
           logical(lk) :: bulge, sorted
           ! Intrinsic Functions 
           ! Executable Statements 
           ! ==== estimate optimal workspace. ====
           jw = min( nw, kbot-ktop+1 )
           if( jw<=2_${ik}$ ) then
              lwkopt = 1_${ik}$
           else
              ! ==== workspace query call to stdlib${ii}$_sgehrd ====
              call stdlib${ii}$_sgehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info )
              lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== workspace query call to stdlib${ii}$_sormhr ====
              call stdlib${ii}$_sormhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info )
                        
              lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== optimal workspace ====
              lwkopt = jw + max( lwk1, lwk2 )
           end if
           ! ==== quick return in case of workspace query. ====
           if( lwork==-1_${ik}$ ) then
              work( 1_${ik}$ ) = real( lwkopt,KIND=sp)
              return
           end if
           ! ==== nothing to do ...
           ! ... for an empty active block ... ====
           ns = 0_${ik}$
           nd = 0_${ik}$
           work( 1_${ik}$ ) = one
           if( ktop>kbot )return
           ! ... nor for an empty deflation window. ====
           if( nw<1 )return
           ! ==== machine constants ====
           safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_slabad( safmin, safmax )
           ulp = stdlib${ii}$_slamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=sp) / ulp )
           ! ==== setup deflation window ====
           jw = min( nw, kbot-ktop+1 )
           kwtop = kbot - jw + 1_${ik}$
           if( kwtop==ktop ) then
              s = zero
           else
              s = h( kwtop, kwtop-1 )
           end if
           if( kbot==kwtop ) then
              ! ==== 1-by-1 deflation window: not much to do ====
              sr( kwtop ) = h( kwtop, kwtop )
              si( kwtop ) = zero
              ns = 1_${ik}$
              nd = 0_${ik}$
              if( abs( s )<=max( smlnum, ulp*abs( h( kwtop, kwtop ) ) ) )then
                 ns = 0_${ik}$
                 nd = 1_${ik}$
                 if( kwtop>ktop )h( kwtop, kwtop-1 ) = zero
              end if
              work( 1_${ik}$ ) = one
              return
           end if
           ! ==== convert to spike-triangular form.  (in case of a
           ! .    rare qr failure, this routine continues to do
           ! .    aggressive early deflation using that part of
           ! .    the deflation window that converged using infqr
           ! .    here and there to keep track.) ====
           call stdlib${ii}$_slacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt )
           call stdlib${ii}$_scopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 )
           call stdlib${ii}$_slaset( 'A', jw, jw, zero, one, v, ldv )
           call stdlib${ii}$_slahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr( kwtop ),si( kwtop ), 1_${ik}$, jw, &
                     v, ldv, infqr )
           ! ==== stdlib${ii}$_strexc needs a clean margin near the diagonal ====
           do j = 1, jw - 3
              t( j+2, j ) = zero
              t( j+3, j ) = zero
           end do
           if( jw>2_${ik}$ )t( jw, jw-2 ) = zero
           ! ==== deflation detection loop ====
           ns = jw
           ilst = infqr + 1_${ik}$
           20 continue
           if( ilst<=ns ) then
              if( ns==1_${ik}$ ) then
                 bulge = .false.
              else
                 bulge = t( ns, ns-1 )/=zero
              end if
              ! ==== small spike tip test for deflation ====
              if( .not.bulge ) then
                 ! ==== real eigenvalue ====
                 foo = abs( t( ns, ns ) )
                 if( foo==zero )foo = abs( s )
                 if( abs( s*v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) ) then
                    ! ==== deflatable ====
                    ns = ns - 1_${ik}$
                 else
                    ! ==== undeflatable.   move it up out of the way.
                    ! .    (stdlib${ii}$_strexc can not fail in this case.) ====
                    ifst = ns
                    call stdlib${ii}$_strexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info )
                    ilst = ilst + 1_${ik}$
                 end if
              else
                 ! ==== complex conjugate pair ====
                 foo = abs( t( ns, ns ) ) + sqrt( abs( t( ns, ns-1 ) ) )*sqrt( abs( t( ns-1, ns ) &
                           ) )
                 if( foo==zero )foo = abs( s )
                 if( max( abs( s*v( 1_${ik}$, ns ) ), abs( s*v( 1_${ik}$, ns-1 ) ) )<=max( smlnum, ulp*foo ) ) &
                           then
                    ! ==== deflatable ====
                    ns = ns - 2_${ik}$
                 else
                    ! ==== undeflatable. move them up out of the way.
                    ! .    fortunately, stdlib${ii}$_strexc does the right thing with
                    ! .    ilst in case of a rare exchange failure. ====
                    ifst = ns
                    call stdlib${ii}$_strexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info )
                    ilst = ilst + 2_${ik}$
                 end if
              end if
              ! ==== end deflation detection loop ====
              go to 20
           end if
              ! ==== return to hessenberg form ====
           if( ns==0_${ik}$ )s = zero
           if( ns<jw ) then
              ! ==== sorting diagonal blocks of t improves accuracy for
              ! .    graded matrices.  bubble sort deals well with
              ! .    exchange failures. ====
              sorted = .false.
              i = ns + 1_${ik}$
              30 continue
              if( sorted )go to 50
              sorted = .true.
              kend = i - 1_${ik}$
              i = infqr + 1_${ik}$
              if( i==ns ) then
                 k = i + 1_${ik}$
              else if( t( i+1, i )==zero ) then
                 k = i + 1_${ik}$
              else
                 k = i + 2_${ik}$
              end if
              40 continue
              if( k<=kend ) then
                 if( k==i+1 ) then
                    evi = abs( t( i, i ) )
                 else
                    evi = abs( t( i, i ) ) + sqrt( abs( t( i+1, i ) ) )*sqrt( abs( t( i, i+1 ) ) )
                              
                 end if
                 if( k==kend ) then
                    evk = abs( t( k, k ) )
                 else if( t( k+1, k )==zero ) then
                    evk = abs( t( k, k ) )
                 else
                    evk = abs( t( k, k ) ) + sqrt( abs( t( k+1, k ) ) )*sqrt( abs( t( k, k+1 ) ) )
                              
                 end if
                 if( evi>=evk ) then
                    i = k
                 else
                    sorted = .false.
                    ifst = i
                    ilst = k
                    call stdlib${ii}$_strexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info )
                    if( info==0_${ik}$ ) then
                       i = ilst
                    else
                       i = k
                    end if
                 end if
                 if( i==kend ) then
                    k = i + 1_${ik}$
                 else if( t( i+1, i )==zero ) then
                    k = i + 1_${ik}$
                 else
                    k = i + 2_${ik}$
                 end if
                 go to 40
              end if
              go to 30
              50 continue
           end if
           ! ==== restore shift/eigenvalue array from t ====
           i = jw
           60 continue
           if( i>=infqr+1 ) then
              if( i==infqr+1 ) then
                 sr( kwtop+i-1 ) = t( i, i )
                 si( kwtop+i-1 ) = zero
                 i = i - 1_${ik}$
              else if( t( i, i-1 )==zero ) then
                 sr( kwtop+i-1 ) = t( i, i )
                 si( kwtop+i-1 ) = zero
                 i = i - 1_${ik}$
              else
                 aa = t( i-1, i-1 )
                 cc = t( i, i-1 )
                 bb = t( i-1, i )
                 dd = t( i, i )
                 call stdlib${ii}$_slanv2( aa, bb, cc, dd, sr( kwtop+i-2 ),si( kwtop+i-2 ), sr( kwtop+i-&
                           1_${ik}$ ),si( kwtop+i-1 ), cs, sn )
                 i = i - 2_${ik}$
              end if
              go to 60
           end if
           if( ns<jw .or. s==zero ) then
              if( ns>1_${ik}$ .and. s/=zero ) then
                 ! ==== reflect spike back into lower triangle ====
                 call stdlib${ii}$_scopy( ns, v, ldv, work, 1_${ik}$ )
                 beta = work( 1_${ik}$ )
                 call stdlib${ii}$_slarfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau )
                 work( 1_${ik}$ ) = one
                 call stdlib${ii}$_slaset( 'L', jw-2, jw-2, zero, zero, t( 3_${ik}$, 1_${ik}$ ), ldt )
                 call stdlib${ii}$_slarf( 'L', ns, jw, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) )
                 call stdlib${ii}$_slarf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) )
                 call stdlib${ii}$_slarf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) )
                 call stdlib${ii}$_sgehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info )
                           
              end if
              ! ==== copy updated reduced window into place ====
              if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*v( 1_${ik}$, 1_${ik}$ )
              call stdlib${ii}$_slacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh )
              call stdlib${ii}$_scopy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 )
              ! ==== accumulate orthogonal matrix in order update
              ! .    h and z, if requested.  ====
              if( ns>1_${ik}$ .and. s/=zero )call stdlib${ii}$_sormhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, &
                        v, ldv,work( jw+1 ), lwork-jw, info )
              ! ==== update vertical slab in h ====
              if( wantt ) then
                 ltop = 1_${ik}$
              else
                 ltop = ktop
              end if
              do krow = ltop, kwtop - 1, nv
                 kln = min( nv, kwtop-krow )
                 call stdlib${ii}$_sgemm( 'N', 'N', kln, jw, jw, one, h( krow, kwtop ),ldh, v, ldv, &
                           zero, wv, ldwv )
                 call stdlib${ii}$_slacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh )
              end do
              ! ==== update horizontal slab in h ====
              if( wantt ) then
                 do kcol = kbot + 1, n, nh
                    kln = min( nh, n-kcol+1 )
                    call stdlib${ii}$_sgemm( 'C', 'N', jw, kln, jw, one, v, ldv,h( kwtop, kcol ), ldh, &
                              zero, t, ldt )
                    call stdlib${ii}$_slacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh )
                 end do
              end if
              ! ==== update vertical slab in z ====
              if( wantz ) then
                 do krow = iloz, ihiz, nv
                    kln = min( nv, ihiz-krow+1 )
                    call stdlib${ii}$_sgemm( 'N', 'N', kln, jw, jw, one, z( krow, kwtop ),ldz, v, ldv, &
                              zero, wv, ldwv )
                    call stdlib${ii}$_slacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz )
                 end do
              end if
           end if
           ! ==== return the number of deflations ... ====
           nd = jw - ns
           ! ==== ... and the number of shifts. (subtracting
           ! .    infqr from the spike length takes care
           ! .    of the case of a rare qr failure while
           ! .    calculating eigenvalues of the deflation
           ! .    window.)  ====
           ns = ns - infqr
            ! ==== return optimal workspace. ====
           work( 1_${ik}$ ) = real( lwkopt,KIND=sp)
     end subroutine stdlib${ii}$_slaqr2

     module subroutine stdlib${ii}$_dlaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, ns, nd,&
     !! DLAQR2 is identical to DLAQR3 except that it avoids
     !! recursion by calling DLAHQR instead of DLAQR4.
     !! Aggressive early deflation:
     !! This subroutine accepts as input an upper Hessenberg matrix
     !! H and performs an orthogonal similarity transformation
     !! designed to detect and deflate fully converged eigenvalues from
     !! a trailing principal submatrix.  On output H has been over-
     !! written by a new Hessenberg matrix that is a perturbation of
     !! an orthogonal similarity transformation of H.  It is to be
     !! hoped that the final version of H has many zero subdiagonal
     !! entries.
                sr, si, v, ldv, nh, t,ldt, nv, wv, ldwv, work, lwork )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,&
                      nh, nv, nw
           integer(${ik}$), intent(out) :: nd, ns
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           real(dp), intent(inout) :: h(ldh,*), z(ldz,*)
           real(dp), intent(out) :: si(*), sr(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*)
        ! ================================================================
           
           ! Local Scalars 
           real(dp) :: aa, bb, beta, cc, cs, dd, evi, evk, foo, s, safmax, safmin, smlnum, sn, &
                     tau, ulp
           integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, k, kcol, kend, kln, krow, kwtop, &
                     ltop, lwk1, lwk2, lwkopt
           logical(lk) :: bulge, sorted
           ! Intrinsic Functions 
           ! Executable Statements 
           ! ==== estimate optimal workspace. ====
           jw = min( nw, kbot-ktop+1 )
           if( jw<=2_${ik}$ ) then
              lwkopt = 1_${ik}$
           else
              ! ==== workspace query call to stdlib${ii}$_dgehrd ====
              call stdlib${ii}$_dgehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info )
              lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== workspace query call to stdlib${ii}$_dormhr ====
              call stdlib${ii}$_dormhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info )
                        
              lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== optimal workspace ====
              lwkopt = jw + max( lwk1, lwk2 )
           end if
           ! ==== quick return in case of workspace query. ====
           if( lwork==-1_${ik}$ ) then
              work( 1_${ik}$ ) = real( lwkopt,KIND=dp)
              return
           end if
           ! ==== nothing to do ...
           ! ... for an empty active block ... ====
           ns = 0_${ik}$
           nd = 0_${ik}$
           work( 1_${ik}$ ) = one
           if( ktop>kbot )return
           ! ... nor for an empty deflation window. ====
           if( nw<1 )return
           ! ==== machine constants ====
           safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_dlabad( safmin, safmax )
           ulp = stdlib${ii}$_dlamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=dp) / ulp )
           ! ==== setup deflation window ====
           jw = min( nw, kbot-ktop+1 )
           kwtop = kbot - jw + 1_${ik}$
           if( kwtop==ktop ) then
              s = zero
           else
              s = h( kwtop, kwtop-1 )
           end if
           if( kbot==kwtop ) then
              ! ==== 1-by-1 deflation window: not much to do ====
              sr( kwtop ) = h( kwtop, kwtop )
              si( kwtop ) = zero
              ns = 1_${ik}$
              nd = 0_${ik}$
              if( abs( s )<=max( smlnum, ulp*abs( h( kwtop, kwtop ) ) ) )then
                 ns = 0_${ik}$
                 nd = 1_${ik}$
                 if( kwtop>ktop )h( kwtop, kwtop-1 ) = zero
              end if
              work( 1_${ik}$ ) = one
              return
           end if
           ! ==== convert to spike-triangular form.  (in case of a
           ! .    rare qr failure, this routine continues to do
           ! .    aggressive early deflation using that part of
           ! .    the deflation window that converged using infqr
           ! .    here and there to keep track.) ====
           call stdlib${ii}$_dlacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt )
           call stdlib${ii}$_dcopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 )
           call stdlib${ii}$_dlaset( 'A', jw, jw, zero, one, v, ldv )
           call stdlib${ii}$_dlahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr( kwtop ),si( kwtop ), 1_${ik}$, jw, &
                     v, ldv, infqr )
           ! ==== stdlib${ii}$_dtrexc needs a clean margin near the diagonal ====
           do j = 1, jw - 3
              t( j+2, j ) = zero
              t( j+3, j ) = zero
           end do
           if( jw>2_${ik}$ )t( jw, jw-2 ) = zero
           ! ==== deflation detection loop ====
           ns = jw
           ilst = infqr + 1_${ik}$
           20 continue
           if( ilst<=ns ) then
              if( ns==1_${ik}$ ) then
                 bulge = .false.
              else
                 bulge = t( ns, ns-1 )/=zero
              end if
              ! ==== small spike tip test for deflation ====
              if( .not.bulge ) then
                 ! ==== real eigenvalue ====
                 foo = abs( t( ns, ns ) )
                 if( foo==zero )foo = abs( s )
                 if( abs( s*v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) ) then
                    ! ==== deflatable ====
                    ns = ns - 1_${ik}$
                 else
                    ! ==== undeflatable.   move it up out of the way.
                    ! .    (stdlib${ii}$_dtrexc can not fail in this case.) ====
                    ifst = ns
                    call stdlib${ii}$_dtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info )
                    ilst = ilst + 1_${ik}$
                 end if
              else
                 ! ==== complex conjugate pair ====
                 foo = abs( t( ns, ns ) ) + sqrt( abs( t( ns, ns-1 ) ) )*sqrt( abs( t( ns-1, ns ) &
                           ) )
                 if( foo==zero )foo = abs( s )
                 if( max( abs( s*v( 1_${ik}$, ns ) ), abs( s*v( 1_${ik}$, ns-1 ) ) )<=max( smlnum, ulp*foo ) ) &
                           then
                    ! ==== deflatable ====
                    ns = ns - 2_${ik}$
                 else
                    ! ==== undeflatable. move them up out of the way.
                    ! .    fortunately, stdlib${ii}$_dtrexc does the right thing with
                    ! .    ilst in case of a rare exchange failure. ====
                    ifst = ns
                    call stdlib${ii}$_dtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info )
                    ilst = ilst + 2_${ik}$
                 end if
              end if
              ! ==== end deflation detection loop ====
              go to 20
           end if
              ! ==== return to hessenberg form ====
           if( ns==0_${ik}$ )s = zero
           if( ns<jw ) then
              ! ==== sorting diagonal blocks of t improves accuracy for
              ! .    graded matrices.  bubble sort deals well with
              ! .    exchange failures. ====
              sorted = .false.
              i = ns + 1_${ik}$
              30 continue
              if( sorted )go to 50
              sorted = .true.
              kend = i - 1_${ik}$
              i = infqr + 1_${ik}$
              if( i==ns ) then
                 k = i + 1_${ik}$
              else if( t( i+1, i )==zero ) then
                 k = i + 1_${ik}$
              else
                 k = i + 2_${ik}$
              end if
              40 continue
              if( k<=kend ) then
                 if( k==i+1 ) then
                    evi = abs( t( i, i ) )
                 else
                    evi = abs( t( i, i ) ) + sqrt( abs( t( i+1, i ) ) )*sqrt( abs( t( i, i+1 ) ) )
                              
                 end if
                 if( k==kend ) then
                    evk = abs( t( k, k ) )
                 else if( t( k+1, k )==zero ) then
                    evk = abs( t( k, k ) )
                 else
                    evk = abs( t( k, k ) ) + sqrt( abs( t( k+1, k ) ) )*sqrt( abs( t( k, k+1 ) ) )
                              
                 end if
                 if( evi>=evk ) then
                    i = k
                 else
                    sorted = .false.
                    ifst = i
                    ilst = k
                    call stdlib${ii}$_dtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info )
                    if( info==0_${ik}$ ) then
                       i = ilst
                    else
                       i = k
                    end if
                 end if
                 if( i==kend ) then
                    k = i + 1_${ik}$
                 else if( t( i+1, i )==zero ) then
                    k = i + 1_${ik}$
                 else
                    k = i + 2_${ik}$
                 end if
                 go to 40
              end if
              go to 30
              50 continue
           end if
           ! ==== restore shift/eigenvalue array from t ====
           i = jw
           60 continue
           if( i>=infqr+1 ) then
              if( i==infqr+1 ) then
                 sr( kwtop+i-1 ) = t( i, i )
                 si( kwtop+i-1 ) = zero
                 i = i - 1_${ik}$
              else if( t( i, i-1 )==zero ) then
                 sr( kwtop+i-1 ) = t( i, i )
                 si( kwtop+i-1 ) = zero
                 i = i - 1_${ik}$
              else
                 aa = t( i-1, i-1 )
                 cc = t( i, i-1 )
                 bb = t( i-1, i )
                 dd = t( i, i )
                 call stdlib${ii}$_dlanv2( aa, bb, cc, dd, sr( kwtop+i-2 ),si( kwtop+i-2 ), sr( kwtop+i-&
                           1_${ik}$ ),si( kwtop+i-1 ), cs, sn )
                 i = i - 2_${ik}$
              end if
              go to 60
           end if
           if( ns<jw .or. s==zero ) then
              if( ns>1_${ik}$ .and. s/=zero ) then
                 ! ==== reflect spike back into lower triangle ====
                 call stdlib${ii}$_dcopy( ns, v, ldv, work, 1_${ik}$ )
                 beta = work( 1_${ik}$ )
                 call stdlib${ii}$_dlarfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau )
                 work( 1_${ik}$ ) = one
                 call stdlib${ii}$_dlaset( 'L', jw-2, jw-2, zero, zero, t( 3_${ik}$, 1_${ik}$ ), ldt )
                 call stdlib${ii}$_dlarf( 'L', ns, jw, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) )
                 call stdlib${ii}$_dlarf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) )
                 call stdlib${ii}$_dlarf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) )
                 call stdlib${ii}$_dgehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info )
                           
              end if
              ! ==== copy updated reduced window into place ====
              if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*v( 1_${ik}$, 1_${ik}$ )
              call stdlib${ii}$_dlacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh )
              call stdlib${ii}$_dcopy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 )
              ! ==== accumulate orthogonal matrix in order update
              ! .    h and z, if requested.  ====
              if( ns>1_${ik}$ .and. s/=zero )call stdlib${ii}$_dormhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, &
                        v, ldv,work( jw+1 ), lwork-jw, info )
              ! ==== update vertical slab in h ====
              if( wantt ) then
                 ltop = 1_${ik}$
              else
                 ltop = ktop
              end if
              do krow = ltop, kwtop - 1, nv
                 kln = min( nv, kwtop-krow )
                 call stdlib${ii}$_dgemm( 'N', 'N', kln, jw, jw, one, h( krow, kwtop ),ldh, v, ldv, &
                           zero, wv, ldwv )
                 call stdlib${ii}$_dlacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh )
              end do
              ! ==== update horizontal slab in h ====
              if( wantt ) then
                 do kcol = kbot + 1, n, nh
                    kln = min( nh, n-kcol+1 )
                    call stdlib${ii}$_dgemm( 'C', 'N', jw, kln, jw, one, v, ldv,h( kwtop, kcol ), ldh, &
                              zero, t, ldt )
                    call stdlib${ii}$_dlacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh )
                 end do
              end if
              ! ==== update vertical slab in z ====
              if( wantz ) then
                 do krow = iloz, ihiz, nv
                    kln = min( nv, ihiz-krow+1 )
                    call stdlib${ii}$_dgemm( 'N', 'N', kln, jw, jw, one, z( krow, kwtop ),ldz, v, ldv, &
                              zero, wv, ldwv )
                    call stdlib${ii}$_dlacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz )
                 end do
              end if
           end if
           ! ==== return the number of deflations ... ====
           nd = jw - ns
           ! ==== ... and the number of shifts. (subtracting
           ! .    infqr from the spike length takes care
           ! .    of the case of a rare qr failure while
           ! .    calculating eigenvalues of the deflation
           ! .    window.)  ====
           ns = ns - infqr
            ! ==== return optimal workspace. ====
           work( 1_${ik}$ ) = real( lwkopt,KIND=dp)
     end subroutine stdlib${ii}$_dlaqr2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     module subroutine stdlib${ii}$_${ri}$laqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, ns, nd,&
     !! DLAQR2: is identical to DLAQR3 except that it avoids
     !! recursion by calling DLAHQR instead of DLAQR4.
     !! Aggressive early deflation:
     !! This subroutine accepts as input an upper Hessenberg matrix
     !! H and performs an orthogonal similarity transformation
     !! designed to detect and deflate fully converged eigenvalues from
     !! a trailing principal submatrix.  On output H has been over-
     !! written by a new Hessenberg matrix that is a perturbation of
     !! an orthogonal similarity transformation of H.  It is to be
     !! hoped that the final version of H has many zero subdiagonal
     !! entries.
                sr, si, v, ldv, nh, t,ldt, nv, wv, ldwv, work, lwork )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,&
                      nh, nv, nw
           integer(${ik}$), intent(out) :: nd, ns
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           real(${rk}$), intent(inout) :: h(ldh,*), z(ldz,*)
           real(${rk}$), intent(out) :: si(*), sr(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*)
        ! ================================================================
           
           ! Local Scalars 
           real(${rk}$) :: aa, bb, beta, cc, cs, dd, evi, evk, foo, s, safmax, safmin, smlnum, sn, &
                     tau, ulp
           integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, k, kcol, kend, kln, krow, kwtop, &
                     ltop, lwk1, lwk2, lwkopt
           logical(lk) :: bulge, sorted
           ! Intrinsic Functions 
           ! Executable Statements 
           ! ==== estimate optimal workspace. ====
           jw = min( nw, kbot-ktop+1 )
           if( jw<=2_${ik}$ ) then
              lwkopt = 1_${ik}$
           else
              ! ==== workspace query call to stdlib${ii}$_${ri}$gehrd ====
              call stdlib${ii}$_${ri}$gehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info )
              lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== workspace query call to stdlib${ii}$_${ri}$ormhr ====
              call stdlib${ii}$_${ri}$ormhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info )
                        
              lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== optimal workspace ====
              lwkopt = jw + max( lwk1, lwk2 )
           end if
           ! ==== quick return in case of workspace query. ====
           if( lwork==-1_${ik}$ ) then
              work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$)
              return
           end if
           ! ==== nothing to do ...
           ! ... for an empty active block ... ====
           ns = 0_${ik}$
           nd = 0_${ik}$
           work( 1_${ik}$ ) = one
           if( ktop>kbot )return
           ! ... nor for an empty deflation window. ====
           if( nw<1 )return
           ! ==== machine constants ====
           safmin = stdlib${ii}$_${ri}$lamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_${ri}$labad( safmin, safmax )
           ulp = stdlib${ii}$_${ri}$lamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=${rk}$) / ulp )
           ! ==== setup deflation window ====
           jw = min( nw, kbot-ktop+1 )
           kwtop = kbot - jw + 1_${ik}$
           if( kwtop==ktop ) then
              s = zero
           else
              s = h( kwtop, kwtop-1 )
           end if
           if( kbot==kwtop ) then
              ! ==== 1-by-1 deflation window: not much to do ====
              sr( kwtop ) = h( kwtop, kwtop )
              si( kwtop ) = zero
              ns = 1_${ik}$
              nd = 0_${ik}$
              if( abs( s )<=max( smlnum, ulp*abs( h( kwtop, kwtop ) ) ) )then
                 ns = 0_${ik}$
                 nd = 1_${ik}$
                 if( kwtop>ktop )h( kwtop, kwtop-1 ) = zero
              end if
              work( 1_${ik}$ ) = one
              return
           end if
           ! ==== convert to spike-triangular form.  (in case of a
           ! .    rare qr failure, this routine continues to do
           ! .    aggressive early deflation using that part of
           ! .    the deflation window that converged using infqr
           ! .    here and there to keep track.) ====
           call stdlib${ii}$_${ri}$lacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt )
           call stdlib${ii}$_${ri}$copy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 )
           call stdlib${ii}$_${ri}$laset( 'A', jw, jw, zero, one, v, ldv )
           call stdlib${ii}$_${ri}$lahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr( kwtop ),si( kwtop ), 1_${ik}$, jw, &
                     v, ldv, infqr )
           ! ==== stdlib${ii}$_${ri}$trexc needs a clean margin near the diagonal ====
           do j = 1, jw - 3
              t( j+2, j ) = zero
              t( j+3, j ) = zero
           end do
           if( jw>2_${ik}$ )t( jw, jw-2 ) = zero
           ! ==== deflation detection loop ====
           ns = jw
           ilst = infqr + 1_${ik}$
           20 continue
           if( ilst<=ns ) then
              if( ns==1_${ik}$ ) then
                 bulge = .false.
              else
                 bulge = t( ns, ns-1 )/=zero
              end if
              ! ==== small spike tip test for deflation ====
              if( .not.bulge ) then
                 ! ==== real eigenvalue ====
                 foo = abs( t( ns, ns ) )
                 if( foo==zero )foo = abs( s )
                 if( abs( s*v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) ) then
                    ! ==== deflatable ====
                    ns = ns - 1_${ik}$
                 else
                    ! ==== undeflatable.   move it up out of the way.
                    ! .    (stdlib${ii}$_${ri}$trexc can not fail in this case.) ====
                    ifst = ns
                    call stdlib${ii}$_${ri}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info )
                    ilst = ilst + 1_${ik}$
                 end if
              else
                 ! ==== complex conjugate pair ====
                 foo = abs( t( ns, ns ) ) + sqrt( abs( t( ns, ns-1 ) ) )*sqrt( abs( t( ns-1, ns ) &
                           ) )
                 if( foo==zero )foo = abs( s )
                 if( max( abs( s*v( 1_${ik}$, ns ) ), abs( s*v( 1_${ik}$, ns-1 ) ) )<=max( smlnum, ulp*foo ) ) &
                           then
                    ! ==== deflatable ====
                    ns = ns - 2_${ik}$
                 else
                    ! ==== undeflatable. move them up out of the way.
                    ! .    fortunately, stdlib${ii}$_${ri}$trexc does the right thing with
                    ! .    ilst in case of a rare exchange failure. ====
                    ifst = ns
                    call stdlib${ii}$_${ri}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info )
                    ilst = ilst + 2_${ik}$
                 end if
              end if
              ! ==== end deflation detection loop ====
              go to 20
           end if
              ! ==== return to hessenberg form ====
           if( ns==0_${ik}$ )s = zero
           if( ns<jw ) then
              ! ==== sorting diagonal blocks of t improves accuracy for
              ! .    graded matrices.  bubble sort deals well with
              ! .    exchange failures. ====
              sorted = .false.
              i = ns + 1_${ik}$
              30 continue
              if( sorted )go to 50
              sorted = .true.
              kend = i - 1_${ik}$
              i = infqr + 1_${ik}$
              if( i==ns ) then
                 k = i + 1_${ik}$
              else if( t( i+1, i )==zero ) then
                 k = i + 1_${ik}$
              else
                 k = i + 2_${ik}$
              end if
              40 continue
              if( k<=kend ) then
                 if( k==i+1 ) then
                    evi = abs( t( i, i ) )
                 else
                    evi = abs( t( i, i ) ) + sqrt( abs( t( i+1, i ) ) )*sqrt( abs( t( i, i+1 ) ) )
                              
                 end if
                 if( k==kend ) then
                    evk = abs( t( k, k ) )
                 else if( t( k+1, k )==zero ) then
                    evk = abs( t( k, k ) )
                 else
                    evk = abs( t( k, k ) ) + sqrt( abs( t( k+1, k ) ) )*sqrt( abs( t( k, k+1 ) ) )
                              
                 end if
                 if( evi>=evk ) then
                    i = k
                 else
                    sorted = .false.
                    ifst = i
                    ilst = k
                    call stdlib${ii}$_${ri}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info )
                    if( info==0_${ik}$ ) then
                       i = ilst
                    else
                       i = k
                    end if
                 end if
                 if( i==kend ) then
                    k = i + 1_${ik}$
                 else if( t( i+1, i )==zero ) then
                    k = i + 1_${ik}$
                 else
                    k = i + 2_${ik}$
                 end if
                 go to 40
              end if
              go to 30
              50 continue
           end if
           ! ==== restore shift/eigenvalue array from t ====
           i = jw
           60 continue
           if( i>=infqr+1 ) then
              if( i==infqr+1 ) then
                 sr( kwtop+i-1 ) = t( i, i )
                 si( kwtop+i-1 ) = zero
                 i = i - 1_${ik}$
              else if( t( i, i-1 )==zero ) then
                 sr( kwtop+i-1 ) = t( i, i )
                 si( kwtop+i-1 ) = zero
                 i = i - 1_${ik}$
              else
                 aa = t( i-1, i-1 )
                 cc = t( i, i-1 )
                 bb = t( i-1, i )
                 dd = t( i, i )
                 call stdlib${ii}$_${ri}$lanv2( aa, bb, cc, dd, sr( kwtop+i-2 ),si( kwtop+i-2 ), sr( kwtop+i-&
                           1_${ik}$ ),si( kwtop+i-1 ), cs, sn )
                 i = i - 2_${ik}$
              end if
              go to 60
           end if
           if( ns<jw .or. s==zero ) then
              if( ns>1_${ik}$ .and. s/=zero ) then
                 ! ==== reflect spike back into lower triangle ====
                 call stdlib${ii}$_${ri}$copy( ns, v, ldv, work, 1_${ik}$ )
                 beta = work( 1_${ik}$ )
                 call stdlib${ii}$_${ri}$larfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau )
                 work( 1_${ik}$ ) = one
                 call stdlib${ii}$_${ri}$laset( 'L', jw-2, jw-2, zero, zero, t( 3_${ik}$, 1_${ik}$ ), ldt )
                 call stdlib${ii}$_${ri}$larf( 'L', ns, jw, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) )
                 call stdlib${ii}$_${ri}$larf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) )
                 call stdlib${ii}$_${ri}$larf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) )
                 call stdlib${ii}$_${ri}$gehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info )
                           
              end if
              ! ==== copy updated reduced window into place ====
              if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*v( 1_${ik}$, 1_${ik}$ )
              call stdlib${ii}$_${ri}$lacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh )
              call stdlib${ii}$_${ri}$copy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 )
              ! ==== accumulate orthogonal matrix in order update
              ! .    h and z, if requested.  ====
              if( ns>1_${ik}$ .and. s/=zero )call stdlib${ii}$_${ri}$ormhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, &
                        v, ldv,work( jw+1 ), lwork-jw, info )
              ! ==== update vertical slab in h ====
              if( wantt ) then
                 ltop = 1_${ik}$
              else
                 ltop = ktop
              end if
              do krow = ltop, kwtop - 1, nv
                 kln = min( nv, kwtop-krow )
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', kln, jw, jw, one, h( krow, kwtop ),ldh, v, ldv, &
                           zero, wv, ldwv )
                 call stdlib${ii}$_${ri}$lacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh )
              end do
              ! ==== update horizontal slab in h ====
              if( wantt ) then
                 do kcol = kbot + 1, n, nh
                    kln = min( nh, n-kcol+1 )
                    call stdlib${ii}$_${ri}$gemm( 'C', 'N', jw, kln, jw, one, v, ldv,h( kwtop, kcol ), ldh, &
                              zero, t, ldt )
                    call stdlib${ii}$_${ri}$lacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh )
                 end do
              end if
              ! ==== update vertical slab in z ====
              if( wantz ) then
                 do krow = iloz, ihiz, nv
                    kln = min( nv, ihiz-krow+1 )
                    call stdlib${ii}$_${ri}$gemm( 'N', 'N', kln, jw, jw, one, z( krow, kwtop ),ldz, v, ldv, &
                              zero, wv, ldwv )
                    call stdlib${ii}$_${ri}$lacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz )
                 end do
              end if
           end if
           ! ==== return the number of deflations ... ====
           nd = jw - ns
           ! ==== ... and the number of shifts. (subtracting
           ! .    infqr from the spike length takes care
           ! .    of the case of a rare qr failure while
           ! .    calculating eigenvalues of the deflation
           ! .    window.)  ====
           ns = ns - infqr
            ! ==== return optimal workspace. ====
           work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$)
     end subroutine stdlib${ii}$_${ri}$laqr2

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_claqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, &
     !! CLAQR2 is identical to CLAQR3 except that it avoids
     !! recursion by calling CLAHQR instead of CLAQR4.
     !! Aggressive early deflation:
     !! This subroutine accepts as input an upper Hessenberg matrix
     !! H and performs an unitary similarity transformation
     !! designed to detect and deflate fully converged eigenvalues from
     !! a trailing principal submatrix.  On output H has been over-
     !! written by a new Hessenberg matrix that is a perturbation of
     !! an unitary similarity transformation of H.  It is to be
     !! hoped that the final version of H has many zero subdiagonal
     !! entries.
               ns, nd, sh, v, ldv, nh, t, ldt,nv, wv, ldwv, work, lwork )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,&
                      nh, nv, nw
           integer(${ik}$), intent(out) :: nd, ns
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           complex(sp), intent(inout) :: h(ldh,*), z(ldz,*)
           complex(sp), intent(out) :: sh(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*)
        ! ================================================================
           ! Parameters 
           
           ! Local Scalars 
           complex(sp) :: beta, cdum, s, tau
           real(sp) :: foo, safmax, safmin, smlnum, ulp
           integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, kcol, kln, knt, krow, kwtop, ltop, &
                     lwk1, lwk2, lwkopt
           ! Intrinsic Functions 
           ! Statement Functions 
           real(sp) :: cabs1
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           ! ==== estimate optimal workspace. ====
           jw = min( nw, kbot-ktop+1 )
           if( jw<=2_${ik}$ ) then
              lwkopt = 1_${ik}$
           else
              ! ==== workspace query call to stdlib${ii}$_cgehrd ====
              call stdlib${ii}$_cgehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info )
              lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== workspace query call to stdlib${ii}$_cunmhr ====
              call stdlib${ii}$_cunmhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info )
                        
              lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== optimal workspace ====
              lwkopt = jw + max( lwk1, lwk2 )
           end if
           ! ==== quick return in case of workspace query. ====
           if( lwork==-1_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=sp)
              return
           end if
           ! ==== nothing to do ...
           ! ... for an empty active block ... ====
           ns = 0_${ik}$
           nd = 0_${ik}$
           work( 1_${ik}$ ) = cone
           if( ktop>kbot )return
           ! ... nor for an empty deflation window. ====
           if( nw<1 )return
           ! ==== machine constants ====
           safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_slabad( safmin, safmax )
           ulp = stdlib${ii}$_slamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=sp) / ulp )
           ! ==== setup deflation window ====
           jw = min( nw, kbot-ktop+1 )
           kwtop = kbot - jw + 1_${ik}$
           if( kwtop==ktop ) then
              s = czero
           else
              s = h( kwtop, kwtop-1 )
           end if
           if( kbot==kwtop ) then
              ! ==== 1-by-1 deflation window: not much to do ====
              sh( kwtop ) = h( kwtop, kwtop )
              ns = 1_${ik}$
              nd = 0_${ik}$
              if( cabs1( s )<=max( smlnum, ulp*cabs1( h( kwtop,kwtop ) ) ) ) then
                 ns = 0_${ik}$
                 nd = 1_${ik}$
                 if( kwtop>ktop )h( kwtop, kwtop-1 ) = czero
              end if
              work( 1_${ik}$ ) = cone
              return
           end if
           ! ==== convert to spike-triangular form.  (in case of a
           ! .    rare qr failure, this routine continues to do
           ! .    aggressive early deflation using that part of
           ! .    the deflation window that converged using infqr
           ! .    here and there to keep track.) ====
           call stdlib${ii}$_clacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt )
           call stdlib${ii}$_ccopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 )
           call stdlib${ii}$_claset( 'A', jw, jw, czero, cone, v, ldv )
           call stdlib${ii}$_clahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh( kwtop ), 1_${ik}$,jw, v, ldv, &
                     infqr )
           ! ==== deflation detection loop ====
           ns = jw
           ilst = infqr + 1_${ik}$
           do knt = infqr + 1, jw
              ! ==== small spike tip deflation test ====
              foo = cabs1( t( ns, ns ) )
              if( foo==zero )foo = cabs1( s )
              if( cabs1( s )*cabs1( v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) )then
                 ! ==== cone more converged eigenvalue ====
                 ns = ns - 1_${ik}$
              else
                 ! ==== cone undeflatable eigenvalue.  move it up out of the
                 ! .    way.   (stdlib${ii}$_ctrexc can not fail in this case.) ====
                 ifst = ns
                 call stdlib${ii}$_ctrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info )
                 ilst = ilst + 1_${ik}$
              end if
           end do
              ! ==== return to hessenberg form ====
           if( ns==0_${ik}$ )s = czero
           if( ns<jw ) then
              ! ==== sorting the diagonal of t improves accuracy for
              ! .    graded matrices.  ====
              do i = infqr + 1, ns
                 ifst = i
                 do j = i + 1, ns
                    if( cabs1( t( j, j ) )>cabs1( t( ifst, ifst ) ) )ifst = j
                 end do
                 ilst = i
                 if( ifst/=ilst )call stdlib${ii}$_ctrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info )
                           
              end do
           end if
           ! ==== restore shift/eigenvalue array from t ====
           do i = infqr + 1, jw
              sh( kwtop+i-1 ) = t( i, i )
           end do
           if( ns<jw .or. s==czero ) then
              if( ns>1_${ik}$ .and. s/=czero ) then
                 ! ==== reflect spike back into lower triangle ====
                 call stdlib${ii}$_ccopy( ns, v, ldv, work, 1_${ik}$ )
                 do i = 1, ns
                    work( i ) = conjg( work( i ) )
                 end do
                 beta = work( 1_${ik}$ )
                 call stdlib${ii}$_clarfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau )
                 work( 1_${ik}$ ) = cone
                 call stdlib${ii}$_claset( 'L', jw-2, jw-2, czero, czero, t( 3_${ik}$, 1_${ik}$ ), ldt )
                 call stdlib${ii}$_clarf( 'L', ns, jw, work, 1_${ik}$, conjg( tau ), t, ldt,work( jw+1 ) )
                           
                 call stdlib${ii}$_clarf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) )
                 call stdlib${ii}$_clarf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) )
                 call stdlib${ii}$_cgehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info )
                           
              end if
              ! ==== copy updated reduced window into place ====
              if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*conjg( v( 1_${ik}$, 1_${ik}$ ) )
              call stdlib${ii}$_clacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh )
              call stdlib${ii}$_ccopy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 )
              ! ==== accumulate orthogonal matrix in order update
              ! .    h and z, if requested.  ====
              if( ns>1_${ik}$ .and. s/=czero )call stdlib${ii}$_cunmhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, &
                        v, ldv,work( jw+1 ), lwork-jw, info )
              ! ==== update vertical slab in h ====
              if( wantt ) then
                 ltop = 1_${ik}$
              else
                 ltop = ktop
              end if
              do krow = ltop, kwtop - 1, nv
                 kln = min( nv, kwtop-krow )
                 call stdlib${ii}$_cgemm( 'N', 'N', kln, jw, jw, cone, h( krow, kwtop ),ldh, v, ldv, &
                           czero, wv, ldwv )
                 call stdlib${ii}$_clacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh )
              end do
              ! ==== update horizontal slab in h ====
              if( wantt ) then
                 do kcol = kbot + 1, n, nh
                    kln = min( nh, n-kcol+1 )
                    call stdlib${ii}$_cgemm( 'C', 'N', jw, kln, jw, cone, v, ldv,h( kwtop, kcol ), ldh, &
                              czero, t, ldt )
                    call stdlib${ii}$_clacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh )
                 end do
              end if
              ! ==== update vertical slab in z ====
              if( wantz ) then
                 do krow = iloz, ihiz, nv
                    kln = min( nv, ihiz-krow+1 )
                    call stdlib${ii}$_cgemm( 'N', 'N', kln, jw, jw, cone, z( krow, kwtop ),ldz, v, ldv, &
                              czero, wv, ldwv )
                    call stdlib${ii}$_clacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz )
                 end do
              end if
           end if
           ! ==== return the number of deflations ... ====
           nd = jw - ns
           ! ==== ... and the number of shifts. (subtracting
           ! .    infqr from the spike length takes care
           ! .    of the case of a rare qr failure while
           ! .    calculating eigenvalues of the deflation
           ! .    window.)  ====
           ns = ns - infqr
            ! ==== return optimal workspace. ====
           work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=sp)
     end subroutine stdlib${ii}$_claqr2

     pure module subroutine stdlib${ii}$_zlaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, &
     !! ZLAQR2 is identical to ZLAQR3 except that it avoids
     !! recursion by calling ZLAHQR instead of ZLAQR4.
     !! Aggressive early deflation:
     !! ZLAQR2 accepts as input an upper Hessenberg matrix
     !! H and performs an unitary similarity transformation
     !! designed to detect and deflate fully converged eigenvalues from
     !! a trailing principal submatrix.  On output H has been over-
     !! written by a new Hessenberg matrix that is a perturbation of
     !! an unitary similarity transformation of H.  It is to be
     !! hoped that the final version of H has many zero subdiagonal
     !! entries.
               ns, nd, sh, v, ldv, nh, t, ldt,nv, wv, ldwv, work, lwork )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,&
                      nh, nv, nw
           integer(${ik}$), intent(out) :: nd, ns
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           complex(dp), intent(inout) :: h(ldh,*), z(ldz,*)
           complex(dp), intent(out) :: sh(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*)
        ! ================================================================
           ! Parameters 
           
           ! Local Scalars 
           complex(dp) :: beta, cdum, s, tau
           real(dp) :: foo, safmax, safmin, smlnum, ulp
           integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, kcol, kln, knt, krow, kwtop, ltop, &
                     lwk1, lwk2, lwkopt
           ! Intrinsic Functions 
           ! Statement Functions 
           real(dp) :: cabs1
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           ! ==== estimate optimal workspace. ====
           jw = min( nw, kbot-ktop+1 )
           if( jw<=2_${ik}$ ) then
              lwkopt = 1_${ik}$
           else
              ! ==== workspace query call to stdlib${ii}$_zgehrd ====
              call stdlib${ii}$_zgehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info )
              lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== workspace query call to stdlib${ii}$_zunmhr ====
              call stdlib${ii}$_zunmhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info )
                        
              lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== optimal workspace ====
              lwkopt = jw + max( lwk1, lwk2 )
           end if
           ! ==== quick return in case of workspace query. ====
           if( lwork==-1_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=dp)
              return
           end if
           ! ==== nothing to do ...
           ! ... for an empty active block ... ====
           ns = 0_${ik}$
           nd = 0_${ik}$
           work( 1_${ik}$ ) = cone
           if( ktop>kbot )return
           ! ... nor for an empty deflation window. ====
           if( nw<1 )return
           ! ==== machine constants ====
           safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_dlabad( safmin, safmax )
           ulp = stdlib${ii}$_dlamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=dp) / ulp )
           ! ==== setup deflation window ====
           jw = min( nw, kbot-ktop+1 )
           kwtop = kbot - jw + 1_${ik}$
           if( kwtop==ktop ) then
              s = czero
           else
              s = h( kwtop, kwtop-1 )
           end if
           if( kbot==kwtop ) then
              ! ==== 1-by-1 deflation window: not much to do ====
              sh( kwtop ) = h( kwtop, kwtop )
              ns = 1_${ik}$
              nd = 0_${ik}$
              if( cabs1( s )<=max( smlnum, ulp*cabs1( h( kwtop,kwtop ) ) ) ) then
                 ns = 0_${ik}$
                 nd = 1_${ik}$
                 if( kwtop>ktop )h( kwtop, kwtop-1 ) = czero
              end if
              work( 1_${ik}$ ) = cone
              return
           end if
           ! ==== convert to spike-triangular form.  (in case of a
           ! .    rare qr failure, this routine continues to do
           ! .    aggressive early deflation using that part of
           ! .    the deflation window that converged using infqr
           ! .    here and there to keep track.) ====
           call stdlib${ii}$_zlacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt )
           call stdlib${ii}$_zcopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 )
           call stdlib${ii}$_zlaset( 'A', jw, jw, czero, cone, v, ldv )
           call stdlib${ii}$_zlahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh( kwtop ), 1_${ik}$,jw, v, ldv, &
                     infqr )
           ! ==== deflation detection loop ====
           ns = jw
           ilst = infqr + 1_${ik}$
           do knt = infqr + 1, jw
              ! ==== small spike tip deflation test ====
              foo = cabs1( t( ns, ns ) )
              if( foo==zero )foo = cabs1( s )
              if( cabs1( s )*cabs1( v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) )then
                 ! ==== cone more converged eigenvalue ====
                 ns = ns - 1_${ik}$
              else
                 ! ==== cone undeflatable eigenvalue.  move it up out of the
                 ! .    way.   (stdlib${ii}$_ztrexc can not fail in this case.) ====
                 ifst = ns
                 call stdlib${ii}$_ztrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info )
                 ilst = ilst + 1_${ik}$
              end if
           end do
              ! ==== return to hessenberg form ====
           if( ns==0_${ik}$ )s = czero
           if( ns<jw ) then
              ! ==== sorting the diagonal of t improves accuracy for
              ! .    graded matrices.  ====
              do i = infqr + 1, ns
                 ifst = i
                 do j = i + 1, ns
                    if( cabs1( t( j, j ) )>cabs1( t( ifst, ifst ) ) )ifst = j
                 end do
                 ilst = i
                 if( ifst/=ilst )call stdlib${ii}$_ztrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info )
                           
              end do
           end if
           ! ==== restore shift/eigenvalue array from t ====
           do i = infqr + 1, jw
              sh( kwtop+i-1 ) = t( i, i )
           end do
           if( ns<jw .or. s==czero ) then
              if( ns>1_${ik}$ .and. s/=czero ) then
                 ! ==== reflect spike back into lower triangle ====
                 call stdlib${ii}$_zcopy( ns, v, ldv, work, 1_${ik}$ )
                 do i = 1, ns
                    work( i ) = conjg( work( i ) )
                 end do
                 beta = work( 1_${ik}$ )
                 call stdlib${ii}$_zlarfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau )
                 work( 1_${ik}$ ) = cone
                 call stdlib${ii}$_zlaset( 'L', jw-2, jw-2, czero, czero, t( 3_${ik}$, 1_${ik}$ ), ldt )
                 call stdlib${ii}$_zlarf( 'L', ns, jw, work, 1_${ik}$, conjg( tau ), t, ldt,work( jw+1 ) )
                           
                 call stdlib${ii}$_zlarf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) )
                 call stdlib${ii}$_zlarf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) )
                 call stdlib${ii}$_zgehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info )
                           
              end if
              ! ==== copy updated reduced window into place ====
              if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*conjg( v( 1_${ik}$, 1_${ik}$ ) )
              call stdlib${ii}$_zlacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh )
              call stdlib${ii}$_zcopy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 )
              ! ==== accumulate orthogonal matrix in order update
              ! .    h and z, if requested.  ====
              if( ns>1_${ik}$ .and. s/=czero )call stdlib${ii}$_zunmhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, &
                        v, ldv,work( jw+1 ), lwork-jw, info )
              ! ==== update vertical slab in h ====
              if( wantt ) then
                 ltop = 1_${ik}$
              else
                 ltop = ktop
              end if
              do krow = ltop, kwtop - 1, nv
                 kln = min( nv, kwtop-krow )
                 call stdlib${ii}$_zgemm( 'N', 'N', kln, jw, jw, cone, h( krow, kwtop ),ldh, v, ldv, &
                           czero, wv, ldwv )
                 call stdlib${ii}$_zlacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh )
              end do
              ! ==== update horizontal slab in h ====
              if( wantt ) then
                 do kcol = kbot + 1, n, nh
                    kln = min( nh, n-kcol+1 )
                    call stdlib${ii}$_zgemm( 'C', 'N', jw, kln, jw, cone, v, ldv,h( kwtop, kcol ), ldh, &
                              czero, t, ldt )
                    call stdlib${ii}$_zlacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh )
                 end do
              end if
              ! ==== update vertical slab in z ====
              if( wantz ) then
                 do krow = iloz, ihiz, nv
                    kln = min( nv, ihiz-krow+1 )
                    call stdlib${ii}$_zgemm( 'N', 'N', kln, jw, jw, cone, z( krow, kwtop ),ldz, v, ldv, &
                              czero, wv, ldwv )
                    call stdlib${ii}$_zlacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz )
                 end do
              end if
           end if
           ! ==== return the number of deflations ... ====
           nd = jw - ns
           ! ==== ... and the number of shifts. (subtracting
           ! .    infqr from the spike length takes care
           ! .    of the case of a rare qr failure while
           ! .    calculating eigenvalues of the deflation
           ! .    window.)  ====
           ns = ns - infqr
            ! ==== return optimal workspace. ====
           work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=dp)
     end subroutine stdlib${ii}$_zlaqr2

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$laqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, &
     !! ZLAQR2: is identical to ZLAQR3 except that it avoids
     !! recursion by calling ZLAHQR instead of ZLAQR4.
     !! Aggressive early deflation:
     !! ZLAQR2 accepts as input an upper Hessenberg matrix
     !! H and performs an unitary similarity transformation
     !! designed to detect and deflate fully converged eigenvalues from
     !! a trailing principal submatrix.  On output H has been over-
     !! written by a new Hessenberg matrix that is a perturbation of
     !! an unitary similarity transformation of H.  It is to be
     !! hoped that the final version of H has many zero subdiagonal
     !! entries.
               ns, nd, sh, v, ldv, nh, t, ldt,nv, wv, ldwv, work, lwork )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,&
                      nh, nv, nw
           integer(${ik}$), intent(out) :: nd, ns
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: h(ldh,*), z(ldz,*)
           complex(${ck}$), intent(out) :: sh(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*)
        ! ================================================================
           ! Parameters 
           
           ! Local Scalars 
           complex(${ck}$) :: beta, cdum, s, tau
           real(${ck}$) :: foo, safmax, safmin, smlnum, ulp
           integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, kcol, kln, knt, krow, kwtop, ltop, &
                     lwk1, lwk2, lwkopt
           ! Intrinsic Functions 
           ! Statement Functions 
           real(${ck}$) :: cabs1
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=${ck}$) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           ! ==== estimate optimal workspace. ====
           jw = min( nw, kbot-ktop+1 )
           if( jw<=2_${ik}$ ) then
              lwkopt = 1_${ik}$
           else
              ! ==== workspace query call to stdlib${ii}$_${ci}$gehrd ====
              call stdlib${ii}$_${ci}$gehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info )
              lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== workspace query call to stdlib${ii}$_${ci}$unmhr ====
              call stdlib${ii}$_${ci}$unmhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info )
                        
              lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== optimal workspace ====
              lwkopt = jw + max( lwk1, lwk2 )
           end if
           ! ==== quick return in case of workspace query. ====
           if( lwork==-1_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=${ck}$)
              return
           end if
           ! ==== nothing to do ...
           ! ... for an empty active block ... ====
           ns = 0_${ik}$
           nd = 0_${ik}$
           work( 1_${ik}$ ) = cone
           if( ktop>kbot )return
           ! ... nor for an empty deflation window. ====
           if( nw<1 )return
           ! ==== machine constants ====
           safmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_${c2ri(ci)}$labad( safmin, safmax )
           ulp = stdlib${ii}$_${c2ri(ci)}$lamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=${ck}$) / ulp )
           ! ==== setup deflation window ====
           jw = min( nw, kbot-ktop+1 )
           kwtop = kbot - jw + 1_${ik}$
           if( kwtop==ktop ) then
              s = czero
           else
              s = h( kwtop, kwtop-1 )
           end if
           if( kbot==kwtop ) then
              ! ==== 1-by-1 deflation window: not much to do ====
              sh( kwtop ) = h( kwtop, kwtop )
              ns = 1_${ik}$
              nd = 0_${ik}$
              if( cabs1( s )<=max( smlnum, ulp*cabs1( h( kwtop,kwtop ) ) ) ) then
                 ns = 0_${ik}$
                 nd = 1_${ik}$
                 if( kwtop>ktop )h( kwtop, kwtop-1 ) = czero
              end if
              work( 1_${ik}$ ) = cone
              return
           end if
           ! ==== convert to spike-triangular form.  (in case of a
           ! .    rare qr failure, this routine continues to do
           ! .    aggressive early deflation using that part of
           ! .    the deflation window that converged using infqr
           ! .    here and there to keep track.) ====
           call stdlib${ii}$_${ci}$lacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt )
           call stdlib${ii}$_${ci}$copy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 )
           call stdlib${ii}$_${ci}$laset( 'A', jw, jw, czero, cone, v, ldv )
           call stdlib${ii}$_${ci}$lahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh( kwtop ), 1_${ik}$,jw, v, ldv, &
                     infqr )
           ! ==== deflation detection loop ====
           ns = jw
           ilst = infqr + 1_${ik}$
           do knt = infqr + 1, jw
              ! ==== small spike tip deflation test ====
              foo = cabs1( t( ns, ns ) )
              if( foo==zero )foo = cabs1( s )
              if( cabs1( s )*cabs1( v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) )then
                 ! ==== cone more converged eigenvalue ====
                 ns = ns - 1_${ik}$
              else
                 ! ==== cone undeflatable eigenvalue.  move it up out of the
                 ! .    way.   (stdlib${ii}$_${ci}$trexc can not fail in this case.) ====
                 ifst = ns
                 call stdlib${ii}$_${ci}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info )
                 ilst = ilst + 1_${ik}$
              end if
           end do
              ! ==== return to hessenberg form ====
           if( ns==0_${ik}$ )s = czero
           if( ns<jw ) then
              ! ==== sorting the diagonal of t improves accuracy for
              ! .    graded matrices.  ====
              do i = infqr + 1, ns
                 ifst = i
                 do j = i + 1, ns
                    if( cabs1( t( j, j ) )>cabs1( t( ifst, ifst ) ) )ifst = j
                 end do
                 ilst = i
                 if( ifst/=ilst )call stdlib${ii}$_${ci}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info )
                           
              end do
           end if
           ! ==== restore shift/eigenvalue array from t ====
           do i = infqr + 1, jw
              sh( kwtop+i-1 ) = t( i, i )
           end do
           if( ns<jw .or. s==czero ) then
              if( ns>1_${ik}$ .and. s/=czero ) then
                 ! ==== reflect spike back into lower triangle ====
                 call stdlib${ii}$_${ci}$copy( ns, v, ldv, work, 1_${ik}$ )
                 do i = 1, ns
                    work( i ) = conjg( work( i ) )
                 end do
                 beta = work( 1_${ik}$ )
                 call stdlib${ii}$_${ci}$larfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau )
                 work( 1_${ik}$ ) = cone
                 call stdlib${ii}$_${ci}$laset( 'L', jw-2, jw-2, czero, czero, t( 3_${ik}$, 1_${ik}$ ), ldt )
                 call stdlib${ii}$_${ci}$larf( 'L', ns, jw, work, 1_${ik}$, conjg( tau ), t, ldt,work( jw+1 ) )
                           
                 call stdlib${ii}$_${ci}$larf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) )
                 call stdlib${ii}$_${ci}$larf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) )
                 call stdlib${ii}$_${ci}$gehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info )
                           
              end if
              ! ==== copy updated reduced window into place ====
              if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*conjg( v( 1_${ik}$, 1_${ik}$ ) )
              call stdlib${ii}$_${ci}$lacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh )
              call stdlib${ii}$_${ci}$copy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 )
              ! ==== accumulate orthogonal matrix in order update
              ! .    h and z, if requested.  ====
              if( ns>1_${ik}$ .and. s/=czero )call stdlib${ii}$_${ci}$unmhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, &
                        v, ldv,work( jw+1 ), lwork-jw, info )
              ! ==== update vertical slab in h ====
              if( wantt ) then
                 ltop = 1_${ik}$
              else
                 ltop = ktop
              end if
              do krow = ltop, kwtop - 1, nv
                 kln = min( nv, kwtop-krow )
                 call stdlib${ii}$_${ci}$gemm( 'N', 'N', kln, jw, jw, cone, h( krow, kwtop ),ldh, v, ldv, &
                           czero, wv, ldwv )
                 call stdlib${ii}$_${ci}$lacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh )
              end do
              ! ==== update horizontal slab in h ====
              if( wantt ) then
                 do kcol = kbot + 1, n, nh
                    kln = min( nh, n-kcol+1 )
                    call stdlib${ii}$_${ci}$gemm( 'C', 'N', jw, kln, jw, cone, v, ldv,h( kwtop, kcol ), ldh, &
                              czero, t, ldt )
                    call stdlib${ii}$_${ci}$lacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh )
                 end do
              end if
              ! ==== update vertical slab in z ====
              if( wantz ) then
                 do krow = iloz, ihiz, nv
                    kln = min( nv, ihiz-krow+1 )
                    call stdlib${ii}$_${ci}$gemm( 'N', 'N', kln, jw, jw, cone, z( krow, kwtop ),ldz, v, ldv, &
                              czero, wv, ldwv )
                    call stdlib${ii}$_${ci}$lacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz )
                 end do
              end if
           end if
           ! ==== return the number of deflations ... ====
           nd = jw - ns
           ! ==== ... and the number of shifts. (subtracting
           ! .    infqr from the spike length takes care
           ! .    of the case of a rare qr failure while
           ! .    calculating eigenvalues of the deflation
           ! .    window.)  ====
           ns = ns - infqr
            ! ==== return optimal workspace. ====
           work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=${ck}$)
     end subroutine stdlib${ii}$_${ci}$laqr2

#:endif
#:endfor



     module subroutine stdlib${ii}$_slaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, ns, nd,&
     !! Aggressive early deflation:
     !! SLAQR3 accepts as input an upper Hessenberg matrix
     !! H and performs an orthogonal similarity transformation
     !! designed to detect and deflate fully converged eigenvalues from
     !! a trailing principal submatrix.  On output H has been over-
     !! written by a new Hessenberg matrix that is a perturbation of
     !! an orthogonal similarity transformation of H.  It is to be
     !! hoped that the final version of H has many zero subdiagonal
     !! entries.
                sr, si, v, ldv, nh, t,ldt, nv, wv, ldwv, work, lwork )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,&
                      nh, nv, nw
           integer(${ik}$), intent(out) :: nd, ns
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           real(sp), intent(inout) :: h(ldh,*), z(ldz,*)
           real(sp), intent(out) :: si(*), sr(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*)
        ! ================================================================
           
           ! Local Scalars 
           real(sp) :: aa, bb, beta, cc, cs, dd, evi, evk, foo, s, safmax, safmin, smlnum, sn, &
                     tau, ulp
           integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, k, kcol, kend, kln, krow, kwtop, &
                     ltop, lwk1, lwk2, lwk3, lwkopt, nmin
           logical(lk) :: bulge, sorted
           ! Intrinsic Functions 
           ! Executable Statements 
           ! ==== estimate optimal workspace. ====
           jw = min( nw, kbot-ktop+1 )
           if( jw<=2_${ik}$ ) then
              lwkopt = 1_${ik}$
           else
              ! ==== workspace query call to stdlib${ii}$_sgehrd ====
              call stdlib${ii}$_sgehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info )
              lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== workspace query call to stdlib${ii}$_sormhr ====
              call stdlib${ii}$_sormhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info )
                        
              lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== workspace query call to stdlib${ii}$_slaqr4 ====
              call stdlib${ii}$_slaqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr, si, 1_${ik}$, jw,v, ldv, work, -&
                        1_${ik}$, infqr )
              lwk3 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== optimal workspace ====
              lwkopt = max( jw+max( lwk1, lwk2 ), lwk3 )
           end if
           ! ==== quick return in case of workspace query. ====
           if( lwork==-1_${ik}$ ) then
              work( 1_${ik}$ ) = real( lwkopt,KIND=sp)
              return
           end if
           ! ==== nothing to do ...
           ! ... for an empty active block ... ====
           ns = 0_${ik}$
           nd = 0_${ik}$
           work( 1_${ik}$ ) = one
           if( ktop>kbot )return
           ! ... nor for an empty deflation window. ====
           if( nw<1 )return
           ! ==== machine constants ====
           safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_slabad( safmin, safmax )
           ulp = stdlib${ii}$_slamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=sp) / ulp )
           ! ==== setup deflation window ====
           jw = min( nw, kbot-ktop+1 )
           kwtop = kbot - jw + 1_${ik}$
           if( kwtop==ktop ) then
              s = zero
           else
              s = h( kwtop, kwtop-1 )
           end if
           if( kbot==kwtop ) then
              ! ==== 1-by-1 deflation window: not much to do ====
              sr( kwtop ) = h( kwtop, kwtop )
              si( kwtop ) = zero
              ns = 1_${ik}$
              nd = 0_${ik}$
              if( abs( s )<=max( smlnum, ulp*abs( h( kwtop, kwtop ) ) ) )then
                 ns = 0_${ik}$
                 nd = 1_${ik}$
                 if( kwtop>ktop )h( kwtop, kwtop-1 ) = zero
              end if
              work( 1_${ik}$ ) = one
              return
           end if
           ! ==== convert to spike-triangular form.  (in case of a
           ! .    rare qr failure, this routine continues to do
           ! .    aggressive early deflation using that part of
           ! .    the deflation window that converged using infqr
           ! .    here and there to keep track.) ====
           call stdlib${ii}$_slacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt )
           call stdlib${ii}$_scopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 )
           call stdlib${ii}$_slaset( 'A', jw, jw, zero, one, v, ldv )
           nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'SLAQR3', 'SV', jw, 1_${ik}$, jw, lwork )
           if( jw>nmin ) then
              call stdlib${ii}$_slaqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr( kwtop ),si( kwtop ), 1_${ik}$, &
                        jw, v, ldv, work, lwork, infqr )
           else
              call stdlib${ii}$_slahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr( kwtop ),si( kwtop ), 1_${ik}$, &
                        jw, v, ldv, infqr )
           end if
           ! ==== stdlib${ii}$_strexc needs a clean margin near the diagonal ====
           do j = 1, jw - 3
              t( j+2, j ) = zero
              t( j+3, j ) = zero
           end do
           if( jw>2_${ik}$ )t( jw, jw-2 ) = zero
           ! ==== deflation detection loop ====
           ns = jw
           ilst = infqr + 1_${ik}$
           20 continue
           if( ilst<=ns ) then
              if( ns==1_${ik}$ ) then
                 bulge = .false.
              else
                 bulge = t( ns, ns-1 )/=zero
              end if
              ! ==== small spike tip test for deflation ====
              if( .not. bulge ) then
                 ! ==== real eigenvalue ====
                 foo = abs( t( ns, ns ) )
                 if( foo==zero )foo = abs( s )
                 if( abs( s*v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) ) then
                    ! ==== deflatable ====
                    ns = ns - 1_${ik}$
                 else
                    ! ==== undeflatable.   move it up out of the way.
                    ! .    (stdlib${ii}$_strexc can not fail in this case.) ====
                    ifst = ns
                    call stdlib${ii}$_strexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info )
                    ilst = ilst + 1_${ik}$
                 end if
              else
                 ! ==== complex conjugate pair ====
                 foo = abs( t( ns, ns ) ) + sqrt( abs( t( ns, ns-1 ) ) )*sqrt( abs( t( ns-1, ns ) &
                           ) )
                 if( foo==zero )foo = abs( s )
                 if( max( abs( s*v( 1_${ik}$, ns ) ), abs( s*v( 1_${ik}$, ns-1 ) ) )<=max( smlnum, ulp*foo ) ) &
                           then
                    ! ==== deflatable ====
                    ns = ns - 2_${ik}$
                 else
                    ! ==== undeflatable. move them up out of the way.
                    ! .    fortunately, stdlib${ii}$_strexc does the right thing with
                    ! .    ilst in case of a rare exchange failure. ====
                    ifst = ns
                    call stdlib${ii}$_strexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info )
                    ilst = ilst + 2_${ik}$
                 end if
              end if
              ! ==== end deflation detection loop ====
              go to 20
           end if
              ! ==== return to hessenberg form ====
           if( ns==0_${ik}$ )s = zero
           if( ns<jw ) then
              ! ==== sorting diagonal blocks of t improves accuracy for
              ! .    graded matrices.  bubble sort deals well with
              ! .    exchange failures. ====
              sorted = .false.
              i = ns + 1_${ik}$
              30 continue
              if( sorted )go to 50
              sorted = .true.
              kend = i - 1_${ik}$
              i = infqr + 1_${ik}$
              if( i==ns ) then
                 k = i + 1_${ik}$
              else if( t( i+1, i )==zero ) then
                 k = i + 1_${ik}$
              else
                 k = i + 2_${ik}$
              end if
              40 continue
              if( k<=kend ) then
                 if( k==i+1 ) then
                    evi = abs( t( i, i ) )
                 else
                    evi = abs( t( i, i ) ) + sqrt( abs( t( i+1, i ) ) )*sqrt( abs( t( i, i+1 ) ) )
                              
                 end if
                 if( k==kend ) then
                    evk = abs( t( k, k ) )
                 else if( t( k+1, k )==zero ) then
                    evk = abs( t( k, k ) )
                 else
                    evk = abs( t( k, k ) ) + sqrt( abs( t( k+1, k ) ) )*sqrt( abs( t( k, k+1 ) ) )
                              
                 end if
                 if( evi>=evk ) then
                    i = k
                 else
                    sorted = .false.
                    ifst = i
                    ilst = k
                    call stdlib${ii}$_strexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info )
                    if( info==0_${ik}$ ) then
                       i = ilst
                    else
                       i = k
                    end if
                 end if
                 if( i==kend ) then
                    k = i + 1_${ik}$
                 else if( t( i+1, i )==zero ) then
                    k = i + 1_${ik}$
                 else
                    k = i + 2_${ik}$
                 end if
                 go to 40
              end if
              go to 30
              50 continue
           end if
           ! ==== restore shift/eigenvalue array from t ====
           i = jw
           60 continue
           if( i>=infqr+1 ) then
              if( i==infqr+1 ) then
                 sr( kwtop+i-1 ) = t( i, i )
                 si( kwtop+i-1 ) = zero
                 i = i - 1_${ik}$
              else if( t( i, i-1 )==zero ) then
                 sr( kwtop+i-1 ) = t( i, i )
                 si( kwtop+i-1 ) = zero
                 i = i - 1_${ik}$
              else
                 aa = t( i-1, i-1 )
                 cc = t( i, i-1 )
                 bb = t( i-1, i )
                 dd = t( i, i )
                 call stdlib${ii}$_slanv2( aa, bb, cc, dd, sr( kwtop+i-2 ),si( kwtop+i-2 ), sr( kwtop+i-&
                           1_${ik}$ ),si( kwtop+i-1 ), cs, sn )
                 i = i - 2_${ik}$
              end if
              go to 60
           end if
           if( ns<jw .or. s==zero ) then
              if( ns>1_${ik}$ .and. s/=zero ) then
                 ! ==== reflect spike back into lower triangle ====
                 call stdlib${ii}$_scopy( ns, v, ldv, work, 1_${ik}$ )
                 beta = work( 1_${ik}$ )
                 call stdlib${ii}$_slarfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau )
                 work( 1_${ik}$ ) = one
                 call stdlib${ii}$_slaset( 'L', jw-2, jw-2, zero, zero, t( 3_${ik}$, 1_${ik}$ ), ldt )
                 call stdlib${ii}$_slarf( 'L', ns, jw, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) )
                 call stdlib${ii}$_slarf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) )
                 call stdlib${ii}$_slarf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) )
                 call stdlib${ii}$_sgehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info )
                           
              end if
              ! ==== copy updated reduced window into place ====
              if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*v( 1_${ik}$, 1_${ik}$ )
              call stdlib${ii}$_slacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh )
              call stdlib${ii}$_scopy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 )
              ! ==== accumulate orthogonal matrix in order update
              ! .    h and z, if requested.  ====
              if( ns>1_${ik}$ .and. s/=zero )call stdlib${ii}$_sormhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, &
                        v, ldv,work( jw+1 ), lwork-jw, info )
              ! ==== update vertical slab in h ====
              if( wantt ) then
                 ltop = 1_${ik}$
              else
                 ltop = ktop
              end if
              do krow = ltop, kwtop - 1, nv
                 kln = min( nv, kwtop-krow )
                 call stdlib${ii}$_sgemm( 'N', 'N', kln, jw, jw, one, h( krow, kwtop ),ldh, v, ldv, &
                           zero, wv, ldwv )
                 call stdlib${ii}$_slacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh )
              end do
              ! ==== update horizontal slab in h ====
              if( wantt ) then
                 do kcol = kbot + 1, n, nh
                    kln = min( nh, n-kcol+1 )
                    call stdlib${ii}$_sgemm( 'C', 'N', jw, kln, jw, one, v, ldv,h( kwtop, kcol ), ldh, &
                              zero, t, ldt )
                    call stdlib${ii}$_slacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh )
                 end do
              end if
              ! ==== update vertical slab in z ====
              if( wantz ) then
                 do krow = iloz, ihiz, nv
                    kln = min( nv, ihiz-krow+1 )
                    call stdlib${ii}$_sgemm( 'N', 'N', kln, jw, jw, one, z( krow, kwtop ),ldz, v, ldv, &
                              zero, wv, ldwv )
                    call stdlib${ii}$_slacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz )
                 end do
              end if
           end if
           ! ==== return the number of deflations ... ====
           nd = jw - ns
           ! ==== ... and the number of shifts. (subtracting
           ! .    infqr from the spike length takes care
           ! .    of the case of a rare qr failure while
           ! .    calculating eigenvalues of the deflation
           ! .    window.)  ====
           ns = ns - infqr
            ! ==== return optimal workspace. ====
           work( 1_${ik}$ ) = real( lwkopt,KIND=sp)
     end subroutine stdlib${ii}$_slaqr3

     module subroutine stdlib${ii}$_dlaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, ns, nd,&
     !! Aggressive early deflation:
     !! DLAQR3 accepts as input an upper Hessenberg matrix
     !! H and performs an orthogonal similarity transformation
     !! designed to detect and deflate fully converged eigenvalues from
     !! a trailing principal submatrix.  On output H has been over-
     !! written by a new Hessenberg matrix that is a perturbation of
     !! an orthogonal similarity transformation of H.  It is to be
     !! hoped that the final version of H has many zero subdiagonal
     !! entries.
                sr, si, v, ldv, nh, t,ldt, nv, wv, ldwv, work, lwork )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,&
                      nh, nv, nw
           integer(${ik}$), intent(out) :: nd, ns
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           real(dp), intent(inout) :: h(ldh,*), z(ldz,*)
           real(dp), intent(out) :: si(*), sr(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*)
        ! ================================================================
           
           ! Local Scalars 
           real(dp) :: aa, bb, beta, cc, cs, dd, evi, evk, foo, s, safmax, safmin, smlnum, sn, &
                     tau, ulp
           integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, k, kcol, kend, kln, krow, kwtop, &
                     ltop, lwk1, lwk2, lwk3, lwkopt, nmin
           logical(lk) :: bulge, sorted
           ! Intrinsic Functions 
           ! Executable Statements 
           ! ==== estimate optimal workspace. ====
           jw = min( nw, kbot-ktop+1 )
           if( jw<=2_${ik}$ ) then
              lwkopt = 1_${ik}$
           else
              ! ==== workspace query call to stdlib${ii}$_dgehrd ====
              call stdlib${ii}$_dgehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info )
              lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== workspace query call to stdlib${ii}$_dormhr ====
              call stdlib${ii}$_dormhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info )
                        
              lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== workspace query call to stdlib${ii}$_dlaqr4 ====
              call stdlib${ii}$_dlaqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr, si, 1_${ik}$, jw,v, ldv, work, -&
                        1_${ik}$, infqr )
              lwk3 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== optimal workspace ====
              lwkopt = max( jw+max( lwk1, lwk2 ), lwk3 )
           end if
           ! ==== quick return in case of workspace query. ====
           if( lwork==-1_${ik}$ ) then
              work( 1_${ik}$ ) = real( lwkopt,KIND=dp)
              return
           end if
           ! ==== nothing to do ...
           ! ... for an empty active block ... ====
           ns = 0_${ik}$
           nd = 0_${ik}$
           work( 1_${ik}$ ) = one
           if( ktop>kbot )return
           ! ... nor for an empty deflation window. ====
           if( nw<1 )return
           ! ==== machine constants ====
           safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_dlabad( safmin, safmax )
           ulp = stdlib${ii}$_dlamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=dp) / ulp )
           ! ==== setup deflation window ====
           jw = min( nw, kbot-ktop+1 )
           kwtop = kbot - jw + 1_${ik}$
           if( kwtop==ktop ) then
              s = zero
           else
              s = h( kwtop, kwtop-1 )
           end if
           if( kbot==kwtop ) then
              ! ==== 1-by-1 deflation window: not much to do ====
              sr( kwtop ) = h( kwtop, kwtop )
              si( kwtop ) = zero
              ns = 1_${ik}$
              nd = 0_${ik}$
              if( abs( s )<=max( smlnum, ulp*abs( h( kwtop, kwtop ) ) ) )then
                 ns = 0_${ik}$
                 nd = 1_${ik}$
                 if( kwtop>ktop )h( kwtop, kwtop-1 ) = zero
              end if
              work( 1_${ik}$ ) = one
              return
           end if
           ! ==== convert to spike-triangular form.  (in case of a
           ! .    rare qr failure, this routine continues to do
           ! .    aggressive early deflation using that part of
           ! .    the deflation window that converged using infqr
           ! .    here and there to keep track.) ====
           call stdlib${ii}$_dlacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt )
           call stdlib${ii}$_dcopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 )
           call stdlib${ii}$_dlaset( 'A', jw, jw, zero, one, v, ldv )
           nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'DLAQR3', 'SV', jw, 1_${ik}$, jw, lwork )
           if( jw>nmin ) then
              call stdlib${ii}$_dlaqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr( kwtop ),si( kwtop ), 1_${ik}$, &
                        jw, v, ldv, work, lwork, infqr )
           else
              call stdlib${ii}$_dlahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr( kwtop ),si( kwtop ), 1_${ik}$, &
                        jw, v, ldv, infqr )
           end if
           ! ==== stdlib${ii}$_dtrexc needs a clean margin near the diagonal ====
           do j = 1, jw - 3
              t( j+2, j ) = zero
              t( j+3, j ) = zero
           end do
           if( jw>2_${ik}$ )t( jw, jw-2 ) = zero
           ! ==== deflation detection loop ====
           ns = jw
           ilst = infqr + 1_${ik}$
           20 continue
           if( ilst<=ns ) then
              if( ns==1_${ik}$ ) then
                 bulge = .false.
              else
                 bulge = t( ns, ns-1 )/=zero
              end if
              ! ==== small spike tip test for deflation ====
              if( .not. bulge ) then
                 ! ==== real eigenvalue ====
                 foo = abs( t( ns, ns ) )
                 if( foo==zero )foo = abs( s )
                 if( abs( s*v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) ) then
                    ! ==== deflatable ====
                    ns = ns - 1_${ik}$
                 else
                    ! ==== undeflatable.   move it up out of the way.
                    ! .    (stdlib${ii}$_dtrexc can not fail in this case.) ====
                    ifst = ns
                    call stdlib${ii}$_dtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info )
                    ilst = ilst + 1_${ik}$
                 end if
              else
                 ! ==== complex conjugate pair ====
                 foo = abs( t( ns, ns ) ) + sqrt( abs( t( ns, ns-1 ) ) )*sqrt( abs( t( ns-1, ns ) &
                           ) )
                 if( foo==zero )foo = abs( s )
                 if( max( abs( s*v( 1_${ik}$, ns ) ), abs( s*v( 1_${ik}$, ns-1 ) ) )<=max( smlnum, ulp*foo ) ) &
                           then
                    ! ==== deflatable ====
                    ns = ns - 2_${ik}$
                 else
                    ! ==== undeflatable. move them up out of the way.
                    ! .    fortunately, stdlib${ii}$_dtrexc does the right thing with
                    ! .    ilst in case of a rare exchange failure. ====
                    ifst = ns
                    call stdlib${ii}$_dtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info )
                    ilst = ilst + 2_${ik}$
                 end if
              end if
              ! ==== end deflation detection loop ====
              go to 20
           end if
              ! ==== return to hessenberg form ====
           if( ns==0_${ik}$ )s = zero
           if( ns<jw ) then
              ! ==== sorting diagonal blocks of t improves accuracy for
              ! .    graded matrices.  bubble sort deals well with
              ! .    exchange failures. ====
              sorted = .false.
              i = ns + 1_${ik}$
              30 continue
              if( sorted )go to 50
              sorted = .true.
              kend = i - 1_${ik}$
              i = infqr + 1_${ik}$
              if( i==ns ) then
                 k = i + 1_${ik}$
              else if( t( i+1, i )==zero ) then
                 k = i + 1_${ik}$
              else
                 k = i + 2_${ik}$
              end if
              40 continue
              if( k<=kend ) then
                 if( k==i+1 ) then
                    evi = abs( t( i, i ) )
                 else
                    evi = abs( t( i, i ) ) + sqrt( abs( t( i+1, i ) ) )*sqrt( abs( t( i, i+1 ) ) )
                              
                 end if
                 if( k==kend ) then
                    evk = abs( t( k, k ) )
                 else if( t( k+1, k )==zero ) then
                    evk = abs( t( k, k ) )
                 else
                    evk = abs( t( k, k ) ) + sqrt( abs( t( k+1, k ) ) )*sqrt( abs( t( k, k+1 ) ) )
                              
                 end if
                 if( evi>=evk ) then
                    i = k
                 else
                    sorted = .false.
                    ifst = i
                    ilst = k
                    call stdlib${ii}$_dtrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info )
                    if( info==0_${ik}$ ) then
                       i = ilst
                    else
                       i = k
                    end if
                 end if
                 if( i==kend ) then
                    k = i + 1_${ik}$
                 else if( t( i+1, i )==zero ) then
                    k = i + 1_${ik}$
                 else
                    k = i + 2_${ik}$
                 end if
                 go to 40
              end if
              go to 30
              50 continue
           end if
           ! ==== restore shift/eigenvalue array from t ====
           i = jw
           60 continue
           if( i>=infqr+1 ) then
              if( i==infqr+1 ) then
                 sr( kwtop+i-1 ) = t( i, i )
                 si( kwtop+i-1 ) = zero
                 i = i - 1_${ik}$
              else if( t( i, i-1 )==zero ) then
                 sr( kwtop+i-1 ) = t( i, i )
                 si( kwtop+i-1 ) = zero
                 i = i - 1_${ik}$
              else
                 aa = t( i-1, i-1 )
                 cc = t( i, i-1 )
                 bb = t( i-1, i )
                 dd = t( i, i )
                 call stdlib${ii}$_dlanv2( aa, bb, cc, dd, sr( kwtop+i-2 ),si( kwtop+i-2 ), sr( kwtop+i-&
                           1_${ik}$ ),si( kwtop+i-1 ), cs, sn )
                 i = i - 2_${ik}$
              end if
              go to 60
           end if
           if( ns<jw .or. s==zero ) then
              if( ns>1_${ik}$ .and. s/=zero ) then
                 ! ==== reflect spike back into lower triangle ====
                 call stdlib${ii}$_dcopy( ns, v, ldv, work, 1_${ik}$ )
                 beta = work( 1_${ik}$ )
                 call stdlib${ii}$_dlarfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau )
                 work( 1_${ik}$ ) = one
                 call stdlib${ii}$_dlaset( 'L', jw-2, jw-2, zero, zero, t( 3_${ik}$, 1_${ik}$ ), ldt )
                 call stdlib${ii}$_dlarf( 'L', ns, jw, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) )
                 call stdlib${ii}$_dlarf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) )
                 call stdlib${ii}$_dlarf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) )
                 call stdlib${ii}$_dgehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info )
                           
              end if
              ! ==== copy updated reduced window into place ====
              if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*v( 1_${ik}$, 1_${ik}$ )
              call stdlib${ii}$_dlacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh )
              call stdlib${ii}$_dcopy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 )
              ! ==== accumulate orthogonal matrix in order update
              ! .    h and z, if requested.  ====
              if( ns>1_${ik}$ .and. s/=zero )call stdlib${ii}$_dormhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, &
                        v, ldv,work( jw+1 ), lwork-jw, info )
              ! ==== update vertical slab in h ====
              if( wantt ) then
                 ltop = 1_${ik}$
              else
                 ltop = ktop
              end if
              do krow = ltop, kwtop - 1, nv
                 kln = min( nv, kwtop-krow )
                 call stdlib${ii}$_dgemm( 'N', 'N', kln, jw, jw, one, h( krow, kwtop ),ldh, v, ldv, &
                           zero, wv, ldwv )
                 call stdlib${ii}$_dlacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh )
              end do
              ! ==== update horizontal slab in h ====
              if( wantt ) then
                 do kcol = kbot + 1, n, nh
                    kln = min( nh, n-kcol+1 )
                    call stdlib${ii}$_dgemm( 'C', 'N', jw, kln, jw, one, v, ldv,h( kwtop, kcol ), ldh, &
                              zero, t, ldt )
                    call stdlib${ii}$_dlacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh )
                 end do
              end if
              ! ==== update vertical slab in z ====
              if( wantz ) then
                 do krow = iloz, ihiz, nv
                    kln = min( nv, ihiz-krow+1 )
                    call stdlib${ii}$_dgemm( 'N', 'N', kln, jw, jw, one, z( krow, kwtop ),ldz, v, ldv, &
                              zero, wv, ldwv )
                    call stdlib${ii}$_dlacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz )
                 end do
              end if
           end if
           ! ==== return the number of deflations ... ====
           nd = jw - ns
           ! ==== ... and the number of shifts. (subtracting
           ! .    infqr from the spike length takes care
           ! .    of the case of a rare qr failure while
           ! .    calculating eigenvalues of the deflation
           ! .    window.)  ====
           ns = ns - infqr
            ! ==== return optimal workspace. ====
           work( 1_${ik}$ ) = real( lwkopt,KIND=dp)
     end subroutine stdlib${ii}$_dlaqr3

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     module subroutine stdlib${ii}$_${ri}$laqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, ns, nd,&
     !! Aggressive early deflation:
     !! DLAQR3: accepts as input an upper Hessenberg matrix
     !! H and performs an orthogonal similarity transformation
     !! designed to detect and deflate fully converged eigenvalues from
     !! a trailing principal submatrix.  On output H has been over-
     !! written by a new Hessenberg matrix that is a perturbation of
     !! an orthogonal similarity transformation of H.  It is to be
     !! hoped that the final version of H has many zero subdiagonal
     !! entries.
                sr, si, v, ldv, nh, t,ldt, nv, wv, ldwv, work, lwork )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,&
                      nh, nv, nw
           integer(${ik}$), intent(out) :: nd, ns
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           real(${rk}$), intent(inout) :: h(ldh,*), z(ldz,*)
           real(${rk}$), intent(out) :: si(*), sr(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*)
        ! ================================================================
           
           ! Local Scalars 
           real(${rk}$) :: aa, bb, beta, cc, cs, dd, evi, evk, foo, s, safmax, safmin, smlnum, sn, &
                     tau, ulp
           integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, k, kcol, kend, kln, krow, kwtop, &
                     ltop, lwk1, lwk2, lwk3, lwkopt, nmin
           logical(lk) :: bulge, sorted
           ! Intrinsic Functions 
           ! Executable Statements 
           ! ==== estimate optimal workspace. ====
           jw = min( nw, kbot-ktop+1 )
           if( jw<=2_${ik}$ ) then
              lwkopt = 1_${ik}$
           else
              ! ==== workspace query call to stdlib${ii}$_${ri}$gehrd ====
              call stdlib${ii}$_${ri}$gehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info )
              lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== workspace query call to stdlib${ii}$_${ri}$ormhr ====
              call stdlib${ii}$_${ri}$ormhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info )
                        
              lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== workspace query call to stdlib${ii}$_${ri}$laqr4 ====
              call stdlib${ii}$_${ri}$laqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr, si, 1_${ik}$, jw,v, ldv, work, -&
                        1_${ik}$, infqr )
              lwk3 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== optimal workspace ====
              lwkopt = max( jw+max( lwk1, lwk2 ), lwk3 )
           end if
           ! ==== quick return in case of workspace query. ====
           if( lwork==-1_${ik}$ ) then
              work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$)
              return
           end if
           ! ==== nothing to do ...
           ! ... for an empty active block ... ====
           ns = 0_${ik}$
           nd = 0_${ik}$
           work( 1_${ik}$ ) = one
           if( ktop>kbot )return
           ! ... nor for an empty deflation window. ====
           if( nw<1 )return
           ! ==== machine constants ====
           safmin = stdlib${ii}$_${ri}$lamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_${ri}$labad( safmin, safmax )
           ulp = stdlib${ii}$_${ri}$lamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=${rk}$) / ulp )
           ! ==== setup deflation window ====
           jw = min( nw, kbot-ktop+1 )
           kwtop = kbot - jw + 1_${ik}$
           if( kwtop==ktop ) then
              s = zero
           else
              s = h( kwtop, kwtop-1 )
           end if
           if( kbot==kwtop ) then
              ! ==== 1-by-1 deflation window: not much to do ====
              sr( kwtop ) = h( kwtop, kwtop )
              si( kwtop ) = zero
              ns = 1_${ik}$
              nd = 0_${ik}$
              if( abs( s )<=max( smlnum, ulp*abs( h( kwtop, kwtop ) ) ) )then
                 ns = 0_${ik}$
                 nd = 1_${ik}$
                 if( kwtop>ktop )h( kwtop, kwtop-1 ) = zero
              end if
              work( 1_${ik}$ ) = one
              return
           end if
           ! ==== convert to spike-triangular form.  (in case of a
           ! .    rare qr failure, this routine continues to do
           ! .    aggressive early deflation using that part of
           ! .    the deflation window that converged using infqr
           ! .    here and there to keep track.) ====
           call stdlib${ii}$_${ri}$lacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt )
           call stdlib${ii}$_${ri}$copy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 )
           call stdlib${ii}$_${ri}$laset( 'A', jw, jw, zero, one, v, ldv )
           nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'DLAQR3', 'SV', jw, 1_${ik}$, jw, lwork )
           if( jw>nmin ) then
              call stdlib${ii}$_${ri}$laqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr( kwtop ),si( kwtop ), 1_${ik}$, &
                        jw, v, ldv, work, lwork, infqr )
           else
              call stdlib${ii}$_${ri}$lahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sr( kwtop ),si( kwtop ), 1_${ik}$, &
                        jw, v, ldv, infqr )
           end if
           ! ==== stdlib${ii}$_${ri}$trexc needs a clean margin near the diagonal ====
           do j = 1, jw - 3
              t( j+2, j ) = zero
              t( j+3, j ) = zero
           end do
           if( jw>2_${ik}$ )t( jw, jw-2 ) = zero
           ! ==== deflation detection loop ====
           ns = jw
           ilst = infqr + 1_${ik}$
           20 continue
           if( ilst<=ns ) then
              if( ns==1_${ik}$ ) then
                 bulge = .false.
              else
                 bulge = t( ns, ns-1 )/=zero
              end if
              ! ==== small spike tip test for deflation ====
              if( .not. bulge ) then
                 ! ==== real eigenvalue ====
                 foo = abs( t( ns, ns ) )
                 if( foo==zero )foo = abs( s )
                 if( abs( s*v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) ) then
                    ! ==== deflatable ====
                    ns = ns - 1_${ik}$
                 else
                    ! ==== undeflatable.   move it up out of the way.
                    ! .    (stdlib${ii}$_${ri}$trexc can not fail in this case.) ====
                    ifst = ns
                    call stdlib${ii}$_${ri}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info )
                    ilst = ilst + 1_${ik}$
                 end if
              else
                 ! ==== complex conjugate pair ====
                 foo = abs( t( ns, ns ) ) + sqrt( abs( t( ns, ns-1 ) ) )*sqrt( abs( t( ns-1, ns ) &
                           ) )
                 if( foo==zero )foo = abs( s )
                 if( max( abs( s*v( 1_${ik}$, ns ) ), abs( s*v( 1_${ik}$, ns-1 ) ) )<=max( smlnum, ulp*foo ) ) &
                           then
                    ! ==== deflatable ====
                    ns = ns - 2_${ik}$
                 else
                    ! ==== undeflatable. move them up out of the way.
                    ! .    fortunately, stdlib${ii}$_${ri}$trexc does the right thing with
                    ! .    ilst in case of a rare exchange failure. ====
                    ifst = ns
                    call stdlib${ii}$_${ri}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info )
                    ilst = ilst + 2_${ik}$
                 end if
              end if
              ! ==== end deflation detection loop ====
              go to 20
           end if
              ! ==== return to hessenberg form ====
           if( ns==0_${ik}$ )s = zero
           if( ns<jw ) then
              ! ==== sorting diagonal blocks of t improves accuracy for
              ! .    graded matrices.  bubble sort deals well with
              ! .    exchange failures. ====
              sorted = .false.
              i = ns + 1_${ik}$
              30 continue
              if( sorted )go to 50
              sorted = .true.
              kend = i - 1_${ik}$
              i = infqr + 1_${ik}$
              if( i==ns ) then
                 k = i + 1_${ik}$
              else if( t( i+1, i )==zero ) then
                 k = i + 1_${ik}$
              else
                 k = i + 2_${ik}$
              end if
              40 continue
              if( k<=kend ) then
                 if( k==i+1 ) then
                    evi = abs( t( i, i ) )
                 else
                    evi = abs( t( i, i ) ) + sqrt( abs( t( i+1, i ) ) )*sqrt( abs( t( i, i+1 ) ) )
                              
                 end if
                 if( k==kend ) then
                    evk = abs( t( k, k ) )
                 else if( t( k+1, k )==zero ) then
                    evk = abs( t( k, k ) )
                 else
                    evk = abs( t( k, k ) ) + sqrt( abs( t( k+1, k ) ) )*sqrt( abs( t( k, k+1 ) ) )
                              
                 end if
                 if( evi>=evk ) then
                    i = k
                 else
                    sorted = .false.
                    ifst = i
                    ilst = k
                    call stdlib${ii}$_${ri}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, work,info )
                    if( info==0_${ik}$ ) then
                       i = ilst
                    else
                       i = k
                    end if
                 end if
                 if( i==kend ) then
                    k = i + 1_${ik}$
                 else if( t( i+1, i )==zero ) then
                    k = i + 1_${ik}$
                 else
                    k = i + 2_${ik}$
                 end if
                 go to 40
              end if
              go to 30
              50 continue
           end if
           ! ==== restore shift/eigenvalue array from t ====
           i = jw
           60 continue
           if( i>=infqr+1 ) then
              if( i==infqr+1 ) then
                 sr( kwtop+i-1 ) = t( i, i )
                 si( kwtop+i-1 ) = zero
                 i = i - 1_${ik}$
              else if( t( i, i-1 )==zero ) then
                 sr( kwtop+i-1 ) = t( i, i )
                 si( kwtop+i-1 ) = zero
                 i = i - 1_${ik}$
              else
                 aa = t( i-1, i-1 )
                 cc = t( i, i-1 )
                 bb = t( i-1, i )
                 dd = t( i, i )
                 call stdlib${ii}$_${ri}$lanv2( aa, bb, cc, dd, sr( kwtop+i-2 ),si( kwtop+i-2 ), sr( kwtop+i-&
                           1_${ik}$ ),si( kwtop+i-1 ), cs, sn )
                 i = i - 2_${ik}$
              end if
              go to 60
           end if
           if( ns<jw .or. s==zero ) then
              if( ns>1_${ik}$ .and. s/=zero ) then
                 ! ==== reflect spike back into lower triangle ====
                 call stdlib${ii}$_${ri}$copy( ns, v, ldv, work, 1_${ik}$ )
                 beta = work( 1_${ik}$ )
                 call stdlib${ii}$_${ri}$larfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau )
                 work( 1_${ik}$ ) = one
                 call stdlib${ii}$_${ri}$laset( 'L', jw-2, jw-2, zero, zero, t( 3_${ik}$, 1_${ik}$ ), ldt )
                 call stdlib${ii}$_${ri}$larf( 'L', ns, jw, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) )
                 call stdlib${ii}$_${ri}$larf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) )
                 call stdlib${ii}$_${ri}$larf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) )
                 call stdlib${ii}$_${ri}$gehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info )
                           
              end if
              ! ==== copy updated reduced window into place ====
              if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*v( 1_${ik}$, 1_${ik}$ )
              call stdlib${ii}$_${ri}$lacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh )
              call stdlib${ii}$_${ri}$copy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 )
              ! ==== accumulate orthogonal matrix in order update
              ! .    h and z, if requested.  ====
              if( ns>1_${ik}$ .and. s/=zero )call stdlib${ii}$_${ri}$ormhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, &
                        v, ldv,work( jw+1 ), lwork-jw, info )
              ! ==== update vertical slab in h ====
              if( wantt ) then
                 ltop = 1_${ik}$
              else
                 ltop = ktop
              end if
              do krow = ltop, kwtop - 1, nv
                 kln = min( nv, kwtop-krow )
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', kln, jw, jw, one, h( krow, kwtop ),ldh, v, ldv, &
                           zero, wv, ldwv )
                 call stdlib${ii}$_${ri}$lacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh )
              end do
              ! ==== update horizontal slab in h ====
              if( wantt ) then
                 do kcol = kbot + 1, n, nh
                    kln = min( nh, n-kcol+1 )
                    call stdlib${ii}$_${ri}$gemm( 'C', 'N', jw, kln, jw, one, v, ldv,h( kwtop, kcol ), ldh, &
                              zero, t, ldt )
                    call stdlib${ii}$_${ri}$lacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh )
                 end do
              end if
              ! ==== update vertical slab in z ====
              if( wantz ) then
                 do krow = iloz, ihiz, nv
                    kln = min( nv, ihiz-krow+1 )
                    call stdlib${ii}$_${ri}$gemm( 'N', 'N', kln, jw, jw, one, z( krow, kwtop ),ldz, v, ldv, &
                              zero, wv, ldwv )
                    call stdlib${ii}$_${ri}$lacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz )
                 end do
              end if
           end if
           ! ==== return the number of deflations ... ====
           nd = jw - ns
           ! ==== ... and the number of shifts. (subtracting
           ! .    infqr from the spike length takes care
           ! .    of the case of a rare qr failure while
           ! .    calculating eigenvalues of the deflation
           ! .    window.)  ====
           ns = ns - infqr
            ! ==== return optimal workspace. ====
           work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$)
     end subroutine stdlib${ii}$_${ri}$laqr3

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_claqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, &
     !! Aggressive early deflation:
     !! CLAQR3 accepts as input an upper Hessenberg matrix
     !! H and performs an unitary similarity transformation
     !! designed to detect and deflate fully converged eigenvalues from
     !! a trailing principal submatrix.  On output H has been over-
     !! written by a new Hessenberg matrix that is a perturbation of
     !! an unitary similarity transformation of H.  It is to be
     !! hoped that the final version of H has many zero subdiagonal
     !! entries.
               ns, nd, sh, v, ldv, nh, t, ldt,nv, wv, ldwv, work, lwork )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,&
                      nh, nv, nw
           integer(${ik}$), intent(out) :: nd, ns
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           complex(sp), intent(inout) :: h(ldh,*), z(ldz,*)
           complex(sp), intent(out) :: sh(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*)
        ! ================================================================
           ! Parameters 
           
           ! Local Scalars 
           complex(sp) :: beta, cdum, s, tau
           real(sp) :: foo, safmax, safmin, smlnum, ulp
           integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, kcol, kln, knt, krow, kwtop, ltop, &
                     lwk1, lwk2, lwk3, lwkopt, nmin
           ! Intrinsic Functions 
           ! Statement Functions 
           real(sp) :: cabs1
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           ! ==== estimate optimal workspace. ====
           jw = min( nw, kbot-ktop+1 )
           if( jw<=2_${ik}$ ) then
              lwkopt = 1_${ik}$
           else
              ! ==== workspace query call to stdlib${ii}$_cgehrd ====
              call stdlib${ii}$_cgehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info )
              lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== workspace query call to stdlib${ii}$_cunmhr ====
              call stdlib${ii}$_cunmhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info )
                        
              lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== workspace query call to stdlib${ii}$_claqr4 ====
              call stdlib${ii}$_claqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh, 1_${ik}$, jw, v,ldv, work, -1_${ik}$, &
                        infqr )
              lwk3 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== optimal workspace ====
              lwkopt = max( jw+max( lwk1, lwk2 ), lwk3 )
           end if
           ! ==== quick return in case of workspace query. ====
           if( lwork==-1_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=sp)
              return
           end if
           ! ==== nothing to do ...
           ! ... for an empty active block ... ====
           ns = 0_${ik}$
           nd = 0_${ik}$
           work( 1_${ik}$ ) = cone
           if( ktop>kbot )return
           ! ... nor for an empty deflation window. ====
           if( nw<1 )return
           ! ==== machine constants ====
           safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_slabad( safmin, safmax )
           ulp = stdlib${ii}$_slamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=sp) / ulp )
           ! ==== setup deflation window ====
           jw = min( nw, kbot-ktop+1 )
           kwtop = kbot - jw + 1_${ik}$
           if( kwtop==ktop ) then
              s = czero
           else
              s = h( kwtop, kwtop-1 )
           end if
           if( kbot==kwtop ) then
              ! ==== 1-by-1 deflation window: not much to do ====
              sh( kwtop ) = h( kwtop, kwtop )
              ns = 1_${ik}$
              nd = 0_${ik}$
              if( cabs1( s )<=max( smlnum, ulp*cabs1( h( kwtop,kwtop ) ) ) ) then
                 ns = 0_${ik}$
                 nd = 1_${ik}$
                 if( kwtop>ktop )h( kwtop, kwtop-1 ) = czero
              end if
              work( 1_${ik}$ ) = cone
              return
           end if
           ! ==== convert to spike-triangular form.  (in case of a
           ! .    rare qr failure, this routine continues to do
           ! .    aggressive early deflation using that part of
           ! .    the deflation window that converged using infqr
           ! .    here and there to keep track.) ====
           call stdlib${ii}$_clacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt )
           call stdlib${ii}$_ccopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 )
           call stdlib${ii}$_claset( 'A', jw, jw, czero, cone, v, ldv )
           nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'CLAQR3', 'SV', jw, 1_${ik}$, jw, lwork )
           if( jw>nmin ) then
              call stdlib${ii}$_claqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh( kwtop ), 1_${ik}$,jw, v, ldv, &
                        work, lwork, infqr )
           else
              call stdlib${ii}$_clahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh( kwtop ), 1_${ik}$,jw, v, ldv, &
                        infqr )
           end if
           ! ==== deflation detection loop ====
           ns = jw
           ilst = infqr + 1_${ik}$
           do knt = infqr + 1, jw
              ! ==== small spike tip deflation test ====
              foo = cabs1( t( ns, ns ) )
              if( foo==zero )foo = cabs1( s )
              if( cabs1( s )*cabs1( v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) )then
                 ! ==== cone more converged eigenvalue ====
                 ns = ns - 1_${ik}$
              else
                 ! ==== cone undeflatable eigenvalue.  move it up out of the
                 ! .    way.   (stdlib${ii}$_ctrexc can not fail in this case.) ====
                 ifst = ns
                 call stdlib${ii}$_ctrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info )
                 ilst = ilst + 1_${ik}$
              end if
           end do
              ! ==== return to hessenberg form ====
           if( ns==0_${ik}$ )s = czero
           if( ns<jw ) then
              ! ==== sorting the diagonal of t improves accuracy for
              ! .    graded matrices.  ====
              do i = infqr + 1, ns
                 ifst = i
                 do j = i + 1, ns
                    if( cabs1( t( j, j ) )>cabs1( t( ifst, ifst ) ) )ifst = j
                 end do
                 ilst = i
                 if( ifst/=ilst )call stdlib${ii}$_ctrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info )
                           
              end do
           end if
           ! ==== restore shift/eigenvalue array from t ====
           do i = infqr + 1, jw
              sh( kwtop+i-1 ) = t( i, i )
           end do
           if( ns<jw .or. s==czero ) then
              if( ns>1_${ik}$ .and. s/=czero ) then
                 ! ==== reflect spike back into lower triangle ====
                 call stdlib${ii}$_ccopy( ns, v, ldv, work, 1_${ik}$ )
                 do i = 1, ns
                    work( i ) = conjg( work( i ) )
                 end do
                 beta = work( 1_${ik}$ )
                 call stdlib${ii}$_clarfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau )
                 work( 1_${ik}$ ) = cone
                 call stdlib${ii}$_claset( 'L', jw-2, jw-2, czero, czero, t( 3_${ik}$, 1_${ik}$ ), ldt )
                 call stdlib${ii}$_clarf( 'L', ns, jw, work, 1_${ik}$, conjg( tau ), t, ldt,work( jw+1 ) )
                           
                 call stdlib${ii}$_clarf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) )
                 call stdlib${ii}$_clarf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) )
                 call stdlib${ii}$_cgehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info )
                           
              end if
              ! ==== copy updated reduced window into place ====
              if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*conjg( v( 1_${ik}$, 1_${ik}$ ) )
              call stdlib${ii}$_clacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh )
              call stdlib${ii}$_ccopy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 )
              ! ==== accumulate orthogonal matrix in order update
              ! .    h and z, if requested.  ====
              if( ns>1_${ik}$ .and. s/=czero )call stdlib${ii}$_cunmhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, &
                        v, ldv,work( jw+1 ), lwork-jw, info )
              ! ==== update vertical slab in h ====
              if( wantt ) then
                 ltop = 1_${ik}$
              else
                 ltop = ktop
              end if
              do krow = ltop, kwtop - 1, nv
                 kln = min( nv, kwtop-krow )
                 call stdlib${ii}$_cgemm( 'N', 'N', kln, jw, jw, cone, h( krow, kwtop ),ldh, v, ldv, &
                           czero, wv, ldwv )
                 call stdlib${ii}$_clacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh )
              end do
              ! ==== update horizontal slab in h ====
              if( wantt ) then
                 do kcol = kbot + 1, n, nh
                    kln = min( nh, n-kcol+1 )
                    call stdlib${ii}$_cgemm( 'C', 'N', jw, kln, jw, cone, v, ldv,h( kwtop, kcol ), ldh, &
                              czero, t, ldt )
                    call stdlib${ii}$_clacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh )
                 end do
              end if
              ! ==== update vertical slab in z ====
              if( wantz ) then
                 do krow = iloz, ihiz, nv
                    kln = min( nv, ihiz-krow+1 )
                    call stdlib${ii}$_cgemm( 'N', 'N', kln, jw, jw, cone, z( krow, kwtop ),ldz, v, ldv, &
                              czero, wv, ldwv )
                    call stdlib${ii}$_clacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz )
                 end do
              end if
           end if
           ! ==== return the number of deflations ... ====
           nd = jw - ns
           ! ==== ... and the number of shifts. (subtracting
           ! .    infqr from the spike length takes care
           ! .    of the case of a rare qr failure while
           ! .    calculating eigenvalues of the deflation
           ! .    window.)  ====
           ns = ns - infqr
            ! ==== return optimal workspace. ====
           work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=sp)
     end subroutine stdlib${ii}$_claqr3

     pure module subroutine stdlib${ii}$_zlaqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, &
     !! Aggressive early deflation:
     !! ZLAQR3 accepts as input an upper Hessenberg matrix
     !! H and performs an unitary similarity transformation
     !! designed to detect and deflate fully converged eigenvalues from
     !! a trailing principal submatrix.  On output H has been over-
     !! written by a new Hessenberg matrix that is a perturbation of
     !! an unitary similarity transformation of H.  It is to be
     !! hoped that the final version of H has many zero subdiagonal
     !! entries.
               ns, nd, sh, v, ldv, nh, t, ldt,nv, wv, ldwv, work, lwork )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,&
                      nh, nv, nw
           integer(${ik}$), intent(out) :: nd, ns
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           complex(dp), intent(inout) :: h(ldh,*), z(ldz,*)
           complex(dp), intent(out) :: sh(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*)
        ! ================================================================
           ! Parameters 
           
           ! Local Scalars 
           complex(dp) :: beta, cdum, s, tau
           real(dp) :: foo, safmax, safmin, smlnum, ulp
           integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, kcol, kln, knt, krow, kwtop, ltop, &
                     lwk1, lwk2, lwk3, lwkopt, nmin
           ! Intrinsic Functions 
           ! Statement Functions 
           real(dp) :: cabs1
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           ! ==== estimate optimal workspace. ====
           jw = min( nw, kbot-ktop+1 )
           if( jw<=2_${ik}$ ) then
              lwkopt = 1_${ik}$
           else
              ! ==== workspace query call to stdlib${ii}$_zgehrd ====
              call stdlib${ii}$_zgehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info )
              lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== workspace query call to stdlib${ii}$_zunmhr ====
              call stdlib${ii}$_zunmhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info )
                        
              lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== workspace query call to stdlib${ii}$_zlaqr4 ====
              call stdlib${ii}$_zlaqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh, 1_${ik}$, jw, v,ldv, work, -1_${ik}$, &
                        infqr )
              lwk3 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== optimal workspace ====
              lwkopt = max( jw+max( lwk1, lwk2 ), lwk3 )
           end if
           ! ==== quick return in case of workspace query. ====
           if( lwork==-1_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=dp)
              return
           end if
           ! ==== nothing to do ...
           ! ... for an empty active block ... ====
           ns = 0_${ik}$
           nd = 0_${ik}$
           work( 1_${ik}$ ) = cone
           if( ktop>kbot )return
           ! ... nor for an empty deflation window. ====
           if( nw<1 )return
           ! ==== machine constants ====
           safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_dlabad( safmin, safmax )
           ulp = stdlib${ii}$_dlamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=dp) / ulp )
           ! ==== setup deflation window ====
           jw = min( nw, kbot-ktop+1 )
           kwtop = kbot - jw + 1_${ik}$
           if( kwtop==ktop ) then
              s = czero
           else
              s = h( kwtop, kwtop-1 )
           end if
           if( kbot==kwtop ) then
              ! ==== 1-by-1 deflation window: not much to do ====
              sh( kwtop ) = h( kwtop, kwtop )
              ns = 1_${ik}$
              nd = 0_${ik}$
              if( cabs1( s )<=max( smlnum, ulp*cabs1( h( kwtop,kwtop ) ) ) ) then
                 ns = 0_${ik}$
                 nd = 1_${ik}$
                 if( kwtop>ktop )h( kwtop, kwtop-1 ) = czero
              end if
              work( 1_${ik}$ ) = cone
              return
           end if
           ! ==== convert to spike-triangular form.  (in case of a
           ! .    rare qr failure, this routine continues to do
           ! .    aggressive early deflation using that part of
           ! .    the deflation window that converged using infqr
           ! .    here and there to keep track.) ====
           call stdlib${ii}$_zlacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt )
           call stdlib${ii}$_zcopy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 )
           call stdlib${ii}$_zlaset( 'A', jw, jw, czero, cone, v, ldv )
           nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'ZLAQR3', 'SV', jw, 1_${ik}$, jw, lwork )
           if( jw>nmin ) then
              call stdlib${ii}$_zlaqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh( kwtop ), 1_${ik}$,jw, v, ldv, &
                        work, lwork, infqr )
           else
              call stdlib${ii}$_zlahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh( kwtop ), 1_${ik}$,jw, v, ldv, &
                        infqr )
           end if
           ! ==== deflation detection loop ====
           ns = jw
           ilst = infqr + 1_${ik}$
           do knt = infqr + 1, jw
              ! ==== small spike tip deflation test ====
              foo = cabs1( t( ns, ns ) )
              if( foo==zero )foo = cabs1( s )
              if( cabs1( s )*cabs1( v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) )then
                 ! ==== cone more converged eigenvalue ====
                 ns = ns - 1_${ik}$
              else
                 ! ==== cone undeflatable eigenvalue.  move it up out of the
                 ! .    way.   (stdlib${ii}$_ztrexc can not fail in this case.) ====
                 ifst = ns
                 call stdlib${ii}$_ztrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info )
                 ilst = ilst + 1_${ik}$
              end if
           end do
              ! ==== return to hessenberg form ====
           if( ns==0_${ik}$ )s = czero
           if( ns<jw ) then
              ! ==== sorting the diagonal of t improves accuracy for
              ! .    graded matrices.  ====
              do i = infqr + 1, ns
                 ifst = i
                 do j = i + 1, ns
                    if( cabs1( t( j, j ) )>cabs1( t( ifst, ifst ) ) )ifst = j
                 end do
                 ilst = i
                 if( ifst/=ilst )call stdlib${ii}$_ztrexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info )
                           
              end do
           end if
           ! ==== restore shift/eigenvalue array from t ====
           do i = infqr + 1, jw
              sh( kwtop+i-1 ) = t( i, i )
           end do
           if( ns<jw .or. s==czero ) then
              if( ns>1_${ik}$ .and. s/=czero ) then
                 ! ==== reflect spike back into lower triangle ====
                 call stdlib${ii}$_zcopy( ns, v, ldv, work, 1_${ik}$ )
                 do i = 1, ns
                    work( i ) = conjg( work( i ) )
                 end do
                 beta = work( 1_${ik}$ )
                 call stdlib${ii}$_zlarfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau )
                 work( 1_${ik}$ ) = cone
                 call stdlib${ii}$_zlaset( 'L', jw-2, jw-2, czero, czero, t( 3_${ik}$, 1_${ik}$ ), ldt )
                 call stdlib${ii}$_zlarf( 'L', ns, jw, work, 1_${ik}$, conjg( tau ), t, ldt,work( jw+1 ) )
                           
                 call stdlib${ii}$_zlarf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) )
                 call stdlib${ii}$_zlarf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) )
                 call stdlib${ii}$_zgehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info )
                           
              end if
              ! ==== copy updated reduced window into place ====
              if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*conjg( v( 1_${ik}$, 1_${ik}$ ) )
              call stdlib${ii}$_zlacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh )
              call stdlib${ii}$_zcopy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 )
              ! ==== accumulate orthogonal matrix in order update
              ! .    h and z, if requested.  ====
              if( ns>1_${ik}$ .and. s/=czero )call stdlib${ii}$_zunmhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, &
                        v, ldv,work( jw+1 ), lwork-jw, info )
              ! ==== update vertical slab in h ====
              if( wantt ) then
                 ltop = 1_${ik}$
              else
                 ltop = ktop
              end if
              do krow = ltop, kwtop - 1, nv
                 kln = min( nv, kwtop-krow )
                 call stdlib${ii}$_zgemm( 'N', 'N', kln, jw, jw, cone, h( krow, kwtop ),ldh, v, ldv, &
                           czero, wv, ldwv )
                 call stdlib${ii}$_zlacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh )
              end do
              ! ==== update horizontal slab in h ====
              if( wantt ) then
                 do kcol = kbot + 1, n, nh
                    kln = min( nh, n-kcol+1 )
                    call stdlib${ii}$_zgemm( 'C', 'N', jw, kln, jw, cone, v, ldv,h( kwtop, kcol ), ldh, &
                              czero, t, ldt )
                    call stdlib${ii}$_zlacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh )
                 end do
              end if
              ! ==== update vertical slab in z ====
              if( wantz ) then
                 do krow = iloz, ihiz, nv
                    kln = min( nv, ihiz-krow+1 )
                    call stdlib${ii}$_zgemm( 'N', 'N', kln, jw, jw, cone, z( krow, kwtop ),ldz, v, ldv, &
                              czero, wv, ldwv )
                    call stdlib${ii}$_zlacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz )
                 end do
              end if
           end if
           ! ==== return the number of deflations ... ====
           nd = jw - ns
           ! ==== ... and the number of shifts. (subtracting
           ! .    infqr from the spike length takes care
           ! .    of the case of a rare qr failure while
           ! .    calculating eigenvalues of the deflation
           ! .    window.)  ====
           ns = ns - infqr
            ! ==== return optimal workspace. ====
           work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=dp)
     end subroutine stdlib${ii}$_zlaqr3

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$laqr3( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, &
     !! Aggressive early deflation:
     !! ZLAQR3: accepts as input an upper Hessenberg matrix
     !! H and performs an unitary similarity transformation
     !! designed to detect and deflate fully converged eigenvalues from
     !! a trailing principal submatrix.  On output H has been over-
     !! written by a new Hessenberg matrix that is a perturbation of
     !! an unitary similarity transformation of H.  It is to be
     !! hoped that the final version of H has many zero subdiagonal
     !! entries.
               ns, nd, sh, v, ldv, nh, t, ldt,nv, wv, ldwv, work, lwork )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihiz, iloz, kbot, ktop, ldh, ldt, ldv, ldwv, ldz, lwork, n,&
                      nh, nv, nw
           integer(${ik}$), intent(out) :: nd, ns
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: h(ldh,*), z(ldz,*)
           complex(${ck}$), intent(out) :: sh(*), t(ldt,*), v(ldv,*), work(*), wv(ldwv,*)
        ! ================================================================
           ! Parameters 
           
           ! Local Scalars 
           complex(${ck}$) :: beta, cdum, s, tau
           real(${ck}$) :: foo, safmax, safmin, smlnum, ulp
           integer(${ik}$) :: i, ifst, ilst, info, infqr, j, jw, kcol, kln, knt, krow, kwtop, ltop, &
                     lwk1, lwk2, lwk3, lwkopt, nmin
           ! Intrinsic Functions 
           ! Statement Functions 
           real(${ck}$) :: cabs1
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=${ck}$) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           ! ==== estimate optimal workspace. ====
           jw = min( nw, kbot-ktop+1 )
           if( jw<=2_${ik}$ ) then
              lwkopt = 1_${ik}$
           else
              ! ==== workspace query call to stdlib${ii}$_${ci}$gehrd ====
              call stdlib${ii}$_${ci}$gehrd( jw, 1_${ik}$, jw-1, t, ldt, work, work, -1_${ik}$, info )
              lwk1 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== workspace query call to stdlib${ii}$_${ci}$unmhr ====
              call stdlib${ii}$_${ci}$unmhr( 'R', 'N', jw, jw, 1_${ik}$, jw-1, t, ldt, work, v, ldv,work, -1_${ik}$, info )
                        
              lwk2 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== workspace query call to stdlib${ii}$_${ci}$laqr4 ====
              call stdlib${ii}$_${ci}$laqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh, 1_${ik}$, jw, v,ldv, work, -1_${ik}$, &
                        infqr )
              lwk3 = int( work( 1_${ik}$ ),KIND=${ik}$)
              ! ==== optimal workspace ====
              lwkopt = max( jw+max( lwk1, lwk2 ), lwk3 )
           end if
           ! ==== quick return in case of workspace query. ====
           if( lwork==-1_${ik}$ ) then
              work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=${ck}$)
              return
           end if
           ! ==== nothing to do ...
           ! ... for an empty active block ... ====
           ns = 0_${ik}$
           nd = 0_${ik}$
           work( 1_${ik}$ ) = cone
           if( ktop>kbot )return
           ! ... nor for an empty deflation window. ====
           if( nw<1 )return
           ! ==== machine constants ====
           safmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_${c2ri(ci)}$labad( safmin, safmax )
           ulp = stdlib${ii}$_${c2ri(ci)}$lamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=${ck}$) / ulp )
           ! ==== setup deflation window ====
           jw = min( nw, kbot-ktop+1 )
           kwtop = kbot - jw + 1_${ik}$
           if( kwtop==ktop ) then
              s = czero
           else
              s = h( kwtop, kwtop-1 )
           end if
           if( kbot==kwtop ) then
              ! ==== 1-by-1 deflation window: not much to do ====
              sh( kwtop ) = h( kwtop, kwtop )
              ns = 1_${ik}$
              nd = 0_${ik}$
              if( cabs1( s )<=max( smlnum, ulp*cabs1( h( kwtop,kwtop ) ) ) ) then
                 ns = 0_${ik}$
                 nd = 1_${ik}$
                 if( kwtop>ktop )h( kwtop, kwtop-1 ) = czero
              end if
              work( 1_${ik}$ ) = cone
              return
           end if
           ! ==== convert to spike-triangular form.  (in case of a
           ! .    rare qr failure, this routine continues to do
           ! .    aggressive early deflation using that part of
           ! .    the deflation window that converged using infqr
           ! .    here and there to keep track.) ====
           call stdlib${ii}$_${ci}$lacpy( 'U', jw, jw, h( kwtop, kwtop ), ldh, t, ldt )
           call stdlib${ii}$_${ci}$copy( jw-1, h( kwtop+1, kwtop ), ldh+1, t( 2_${ik}$, 1_${ik}$ ), ldt+1 )
           call stdlib${ii}$_${ci}$laset( 'A', jw, jw, czero, cone, v, ldv )
           nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'ZLAQR3', 'SV', jw, 1_${ik}$, jw, lwork )
           if( jw>nmin ) then
              call stdlib${ii}$_${ci}$laqr4( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh( kwtop ), 1_${ik}$,jw, v, ldv, &
                        work, lwork, infqr )
           else
              call stdlib${ii}$_${ci}$lahqr( .true., .true., jw, 1_${ik}$, jw, t, ldt, sh( kwtop ), 1_${ik}$,jw, v, ldv, &
                        infqr )
           end if
           ! ==== deflation detection loop ====
           ns = jw
           ilst = infqr + 1_${ik}$
           do knt = infqr + 1, jw
              ! ==== small spike tip deflation test ====
              foo = cabs1( t( ns, ns ) )
              if( foo==zero )foo = cabs1( s )
              if( cabs1( s )*cabs1( v( 1_${ik}$, ns ) )<=max( smlnum, ulp*foo ) )then
                 ! ==== cone more converged eigenvalue ====
                 ns = ns - 1_${ik}$
              else
                 ! ==== cone undeflatable eigenvalue.  move it up out of the
                 ! .    way.   (stdlib${ii}$_${ci}$trexc can not fail in this case.) ====
                 ifst = ns
                 call stdlib${ii}$_${ci}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info )
                 ilst = ilst + 1_${ik}$
              end if
           end do
              ! ==== return to hessenberg form ====
           if( ns==0_${ik}$ )s = czero
           if( ns<jw ) then
              ! ==== sorting the diagonal of t improves accuracy for
              ! .    graded matrices.  ====
              do i = infqr + 1, ns
                 ifst = i
                 do j = i + 1, ns
                    if( cabs1( t( j, j ) )>cabs1( t( ifst, ifst ) ) )ifst = j
                 end do
                 ilst = i
                 if( ifst/=ilst )call stdlib${ii}$_${ci}$trexc( 'V', jw, t, ldt, v, ldv, ifst, ilst, info )
                           
              end do
           end if
           ! ==== restore shift/eigenvalue array from t ====
           do i = infqr + 1, jw
              sh( kwtop+i-1 ) = t( i, i )
           end do
           if( ns<jw .or. s==czero ) then
              if( ns>1_${ik}$ .and. s/=czero ) then
                 ! ==== reflect spike back into lower triangle ====
                 call stdlib${ii}$_${ci}$copy( ns, v, ldv, work, 1_${ik}$ )
                 do i = 1, ns
                    work( i ) = conjg( work( i ) )
                 end do
                 beta = work( 1_${ik}$ )
                 call stdlib${ii}$_${ci}$larfg( ns, beta, work( 2_${ik}$ ), 1_${ik}$, tau )
                 work( 1_${ik}$ ) = cone
                 call stdlib${ii}$_${ci}$laset( 'L', jw-2, jw-2, czero, czero, t( 3_${ik}$, 1_${ik}$ ), ldt )
                 call stdlib${ii}$_${ci}$larf( 'L', ns, jw, work, 1_${ik}$, conjg( tau ), t, ldt,work( jw+1 ) )
                           
                 call stdlib${ii}$_${ci}$larf( 'R', ns, ns, work, 1_${ik}$, tau, t, ldt,work( jw+1 ) )
                 call stdlib${ii}$_${ci}$larf( 'R', jw, ns, work, 1_${ik}$, tau, v, ldv,work( jw+1 ) )
                 call stdlib${ii}$_${ci}$gehrd( jw, 1_${ik}$, ns, t, ldt, work, work( jw+1 ),lwork-jw, info )
                           
              end if
              ! ==== copy updated reduced window into place ====
              if( kwtop>1_${ik}$ )h( kwtop, kwtop-1 ) = s*conjg( v( 1_${ik}$, 1_${ik}$ ) )
              call stdlib${ii}$_${ci}$lacpy( 'U', jw, jw, t, ldt, h( kwtop, kwtop ), ldh )
              call stdlib${ii}$_${ci}$copy( jw-1, t( 2_${ik}$, 1_${ik}$ ), ldt+1, h( kwtop+1, kwtop ),ldh+1 )
              ! ==== accumulate orthogonal matrix in order update
              ! .    h and z, if requested.  ====
              if( ns>1_${ik}$ .and. s/=czero )call stdlib${ii}$_${ci}$unmhr( 'R', 'N', jw, ns, 1_${ik}$, ns, t, ldt, work, &
                        v, ldv,work( jw+1 ), lwork-jw, info )
              ! ==== update vertical slab in h ====
              if( wantt ) then
                 ltop = 1_${ik}$
              else
                 ltop = ktop
              end if
              do krow = ltop, kwtop - 1, nv
                 kln = min( nv, kwtop-krow )
                 call stdlib${ii}$_${ci}$gemm( 'N', 'N', kln, jw, jw, cone, h( krow, kwtop ),ldh, v, ldv, &
                           czero, wv, ldwv )
                 call stdlib${ii}$_${ci}$lacpy( 'A', kln, jw, wv, ldwv, h( krow, kwtop ), ldh )
              end do
              ! ==== update horizontal slab in h ====
              if( wantt ) then
                 do kcol = kbot + 1, n, nh
                    kln = min( nh, n-kcol+1 )
                    call stdlib${ii}$_${ci}$gemm( 'C', 'N', jw, kln, jw, cone, v, ldv,h( kwtop, kcol ), ldh, &
                              czero, t, ldt )
                    call stdlib${ii}$_${ci}$lacpy( 'A', jw, kln, t, ldt, h( kwtop, kcol ),ldh )
                 end do
              end if
              ! ==== update vertical slab in z ====
              if( wantz ) then
                 do krow = iloz, ihiz, nv
                    kln = min( nv, ihiz-krow+1 )
                    call stdlib${ii}$_${ci}$gemm( 'N', 'N', kln, jw, jw, cone, z( krow, kwtop ),ldz, v, ldv, &
                              czero, wv, ldwv )
                    call stdlib${ii}$_${ci}$lacpy( 'A', kln, jw, wv, ldwv, z( krow, kwtop ),ldz )
                 end do
              end if
           end if
           ! ==== return the number of deflations ... ====
           nd = jw - ns
           ! ==== ... and the number of shifts. (subtracting
           ! .    infqr from the spike length takes care
           ! .    of the case of a rare qr failure while
           ! .    calculating eigenvalues of the deflation
           ! .    window.)  ====
           ns = ns - infqr
            ! ==== return optimal workspace. ====
           work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=${ck}$)
     end subroutine stdlib${ii}$_${ci}$laqr3

#:endif
#:endfor



     module subroutine stdlib${ii}$_slaqr4( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, work,&
     !! SLAQR4 implements one level of recursion for SLAQR0.
     !! It is a complete implementation of the small bulge multi-shift
     !! QR algorithm.  It may be called by SLAQR0 and, for large enough
     !! deflation window size, it may be called by SLAQR3.  This
     !! subroutine is identical to SLAQR0 except that it calls SLAQR2
     !! instead of SLAQR3.
     !! SLAQR4 computes the eigenvalues of a Hessenberg matrix H
     !! and, optionally, the matrices T and Z from the Schur decomposition
     !! H = Z T Z**T, where T is an upper quasi-triangular matrix (the
     !! Schur form), and Z is the orthogonal matrix of Schur vectors.
     !! Optionally Z may be postmultiplied into an input orthogonal
     !! matrix Q so that this routine can give the Schur factorization
     !! of a matrix A which has been reduced to the Hessenberg form H
     !! by the orthogonal matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
                lwork, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n
           integer(${ik}$), intent(out) :: info
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           real(sp), intent(inout) :: h(ldh,*), z(ldz,*)
           real(sp), intent(out) :: wi(*), work(*), wr(*)
        ! ================================================================
           ! Parameters 
           integer(${ik}$), parameter :: ntiny = 15_${ik}$
           integer(${ik}$), parameter :: kexnw = 5_${ik}$
           integer(${ik}$), parameter :: kexsh = 6_${ik}$
           real(sp), parameter :: wilk1 = 0.75_sp
           real(sp), parameter :: wilk2 = -0.4375_sp
           ! ==== matrices of order ntiny or smaller must be processed by
           ! .    stdlib${ii}$_slahqr because of insufficient subdiagonal scratch space.
           ! .    (this is a hard limit.) ====
           
           ! ==== exceptional deflation windows:  try to cure rare
           ! .    slow convergence by varying the size of the
           ! .    deflation window after kexnw iterations. ====
           
           ! ==== exceptional shifts: try to cure rare slow convergence
           ! .    with ad-hoc exceptional shifts every kexsh iterations.
           ! .    ====
           
           ! ==== the constants wilk1 and wilk2 are used to form the
           ! .    exceptional shifts. ====
           
           
           ! Local Scalars 
           real(sp) :: aa, bb, cc, cs, dd, sn, ss, swap
           integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, &
           kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,&
                      nwmax, nwr, nwupbd
           logical(lk) :: sorted
           character(len=2_${ik}$) :: jbcmpz
           ! Local Arrays 
           real(sp) :: zdum(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           ! ==== quick return for n = 0: nothing to do. ====
           if( n==0_${ik}$ ) then
              work( 1_${ik}$ ) = one
              return
           end if
           if( n<=ntiny ) then
              ! ==== tiny matrices must use stdlib_slahqr. ====
              lwkopt = 1_${ik}$
              if( lwork/=-1_${ik}$ )call stdlib${ii}$_slahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, &
                        ihiz, z, ldz, info )
           else
              ! ==== use small bulge multi-shift qr with aggressive early
              ! .    deflation on larger-than-tiny matrices. ====
              ! ==== hope for the best. ====
              info = 0_${ik}$
              ! ==== set up job flags for stdlib${ii}$_ilaenv. ====
              if( wantt ) then
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S'
              else
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E'
              end if
              if( wantz ) then
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V'
              else
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N'
              end if
              ! ==== nwr = recommended deflation window size.  at this
              ! .    point,  n > ntiny = 15, so there is enough
              ! .    subdiagonal workspace for nwr>=2 as required.
              ! .    (in fact, there is enough subdiagonal space for
              ! .    nwr>=4.) ====
              nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'SLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nwr = max( 2_${ik}$, nwr )
              nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr )
              ! ==== nsr = recommended number of simultaneous shifts.
              ! .    at this point n > ntiny = 15, so there is at
              ! .    enough subdiagonal workspace for nsr to be even
              ! .    and greater than or equal to two as required. ====
              nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'SLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo )
              nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) )
              ! ==== estimate optimal workspace ====
              ! ==== workspace query call to stdlib${ii}$_slaqr2 ====
              call stdlib${ii}$_slaqr2( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,&
                         ld, wr, wi, h, ldh, n, h, ldh,n, h, ldh, work, -1_${ik}$ )
              ! ==== optimal workspace = max(stdlib${ii}$_slaqr5, stdlib${ii}$_slaqr2) ====
              lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) )
              ! ==== quick return in case of workspace query. ====
              if( lwork==-1_${ik}$ ) then
                 work( 1_${ik}$ ) = real( lwkopt,KIND=sp)
                 return
              end if
              ! ==== stdlib${ii}$_slahqr/stdlib${ii}$_slaqr0 crossover point ====
              nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'SLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nmin = max( ntiny, nmin )
              ! ==== nibble crossover point ====
              nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'SLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nibble = max( 0_${ik}$, nibble )
              ! ==== accumulate reflections during ttswp?  use block
              ! .    2-by-2 structure during matrix-matrix multiply? ====
              kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'SLAQR4', jbcmpz, n, ilo, ihi, lwork )
              kacc22 = max( 0_${ik}$, kacc22 )
              kacc22 = min( 2_${ik}$, kacc22 )
              ! ==== nwmax = the largest possible deflation window for
              ! .    which there is sufficient workspace. ====
              nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ )
              nw = nwmax
              ! ==== nsmax = the largest number of simultaneous shifts
              ! .    for which there is sufficient workspace. ====
              nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ )
              nsmax = nsmax - mod( nsmax, 2_${ik}$ )
              ! ==== ndfl: an iteration count restarted at deflation. ====
              ndfl = 1_${ik}$
              ! ==== itmax = iteration limit ====
              itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) )
              ! ==== last row and column in the active block ====
              kbot = ihi
              ! ==== main loop ====
              loop_80: do it = 1, itmax
                 ! ==== done when kbot falls below ilo ====
                 if( kbot<ilo )go to 90
                 ! ==== locate active block ====
                 do k = kbot, ilo + 1, -1
                    if( h( k, k-1 )==zero )go to 20
                 end do
                 k = ilo
                 20 continue
                 ktop = k
                 ! ==== select deflation window size:
                 ! .    typical case:
                 ! .      if possible and advisable, nibble the entire
                 ! .      active block.  if not, use size min(nwr,nwmax)
                 ! .      or min(nwr+1,nwmax) depending upon which has
                 ! .      the smaller corresponding subdiagonal entry
                 ! .      (a heuristic).
                 ! .    exceptional case:
                 ! .      if there have been no deflations in kexnw or
                 ! .      more iterations, then vary the deflation window
                 ! .      size.   at first, because, larger windows are,
                 ! .      in general, more powerful than smaller ones,
                 ! .      rapidly increase the window to the maximum possible.
                 ! .      then, gradually reduce the window size. ====
                 nh = kbot - ktop + 1_${ik}$
                 nwupbd = min( nh, nwmax )
                 if( ndfl<kexnw ) then
                    nw = min( nwupbd, nwr )
                 else
                    nw = min( nwupbd, 2_${ik}$*nw )
                 end if
                 if( nw<nwmax ) then
                    if( nw>=nh-1 ) then
                       nw = nh
                    else
                       kwtop = kbot - nw + 1_${ik}$
                       if( abs( h( kwtop, kwtop-1 ) )>abs( h( kwtop-1, kwtop-2 ) ) )nw = nw + &
                                 1_${ik}$
                    end if
                 end if
                 if( ndfl<kexnw ) then
                    ndec = -1_${ik}$
                 else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then
                    ndec = ndec + 1_${ik}$
                    if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$
                    nw = nw - ndec
                 end if
                 ! ==== aggressive early deflation:
                 ! .    split workspace under the subdiagonal into
                 ! .      - an nw-by-nw work array v in the lower
                 ! .        left-hand-corner,
                 ! .      - an nw-by-at-least-nw-but-more-is-better
                 ! .        (nw-by-nho) horizontal work array along
                 ! .        the bottom edge,
                 ! .      - an at-least-nw-but-more-is-better (nhv-by-nw)
                 ! .        vertical work array along the left-hand-edge.
                 ! .        ====
                 kv = n - nw + 1_${ik}$
                 kt = nw + 1_${ik}$
                 nho = ( n-nw-1 ) - kt + 1_${ik}$
                 kwv = nw + 2_${ik}$
                 nve = ( n-nw ) - kwv + 1_${ik}$
                 ! ==== aggressive early deflation ====
                 call stdlib${ii}$_slaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, &
                 ls, ld, wr, wi, h( kv, 1_${ik}$ ), ldh,nho, h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,&
                           work, lwork )
                 ! ==== adjust kbot accounting for new deflations. ====
                 kbot = kbot - ld
                 ! ==== ks points to the shifts. ====
                 ks = kbot - ls + 1_${ik}$
                 ! ==== skip an expensive qr sweep if there is a (partly
                 ! .    heuristic) reason to expect that many eigenvalues
                 ! .    will deflate without it.  here, the qr sweep is
                 ! .    skipped if many eigenvalues have just been deflated
                 ! .    or if the remaining active block is small.
                 if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )&
                            ) ) ) then
                    ! ==== ns = nominal number of simultaneous shifts.
                    ! .    this may be lowered (slightly) if stdlib${ii}$_slaqr2
                    ! .    did not provide that many shifts. ====
                    ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ! ==== if there have been no deflations
                    ! .    in a multiple of kexsh iterations,
                    ! .    then try exceptional shifts.
                    ! .    otherwise use shifts provided by
                    ! .    stdlib${ii}$_slaqr2 above or from the eigenvalues
                    ! .    of a trailing principal submatrix. ====
                    if( mod( ndfl, kexsh )==0_${ik}$ ) then
                       ks = kbot - ns + 1_${ik}$
                       do i = kbot, max( ks+1, ktop+2 ), -2
                          ss = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) )
                          aa = wilk1*ss + h( i, i )
                          bb = ss
                          cc = wilk2*ss
                          dd = aa
                          call stdlib${ii}$_slanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),wr( i ), wi( i &
                                    ), cs, sn )
                       end do
                       if( ks==ktop ) then
                          wr( ks+1 ) = h( ks+1, ks+1 )
                          wi( ks+1 ) = zero
                          wr( ks ) = wr( ks+1 )
                          wi( ks ) = wi( ks+1 )
                       end if
                    else
                       ! ==== got ns/2 or fewer shifts? use stdlib_slahqr
                       ! .    on a trailing principal submatrix to
                       ! .    get more. (since ns<=nsmax<=(n-3)/6,
                       ! .    there is enough space below the subdiagonal
                       ! .    to fit an ns-by-ns scratch array.) ====
                       if( kbot-ks+1<=ns / 2_${ik}$ ) then
                          ks = kbot - ns + 1_${ik}$
                          kt = n - ns + 1_${ik}$
                          call stdlib${ii}$_slacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh )
                                    
                          call stdlib${ii}$_slahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, wr( ks &
                                    ), wi( ks ),1_${ik}$, 1_${ik}$, zdum, 1_${ik}$, inf )
                          ks = ks + inf
                          ! ==== in case of a rare qr failure use
                          ! .    eigenvalues of the trailing 2-by-2
                          ! .    principal submatrix.  ====
                          if( ks>=kbot ) then
                             aa = h( kbot-1, kbot-1 )
                             cc = h( kbot, kbot-1 )
                             bb = h( kbot-1, kbot )
                             dd = h( kbot, kbot )
                             call stdlib${ii}$_slanv2( aa, bb, cc, dd, wr( kbot-1 ),wi( kbot-1 ), wr( &
                                       kbot ),wi( kbot ), cs, sn )
                             ks = kbot - 1_${ik}$
                          end if
                       end if
                       if( kbot-ks+1>ns ) then
                          ! ==== sort the shifts (helps a little)
                          ! .    bubble sort keeps complex conjugate
                          ! .    pairs together. ====
                          sorted = .false.
                          do k = kbot, ks + 1, -1
                             if( sorted )go to 60
                             sorted = .true.
                             do i = ks, k - 1
                                if( abs( wr( i ) )+abs( wi( i ) )<abs( wr( i+1 ) )+abs( wi( i+1 ) &
                                          ) ) then
                                   sorted = .false.
                                   swap = wr( i )
                                   wr( i ) = wr( i+1 )
                                   wr( i+1 ) = swap
                                   swap = wi( i )
                                   wi( i ) = wi( i+1 )
                                   wi( i+1 ) = swap
                                end if
                             end do
                          end do
                          60 continue
                       end if
                       ! ==== shuffle shifts into pairs of real shifts
                       ! .    and pairs of complex conjugate shifts
                       ! .    assuming complex conjugate shifts are
                       ! .    already adjacent to one another. (yes,
                       ! .    they are.)  ====
                       do i = kbot, ks + 2, -2
                          if( wi( i )/=-wi( i-1 ) ) then
                             swap = wr( i )
                             wr( i ) = wr( i-1 )
                             wr( i-1 ) = wr( i-2 )
                             wr( i-2 ) = swap
                             swap = wi( i )
                             wi( i ) = wi( i-1 )
                             wi( i-1 ) = wi( i-2 )
                             wi( i-2 ) = swap
                          end if
                       end do
                    end if
                    ! ==== if there are only two shifts and both are
                    ! .    real, then use only one.  ====
                    if( kbot-ks+1==2_${ik}$ ) then
                       if( wi( kbot )==zero ) then
                          if( abs( wr( kbot )-h( kbot, kbot ) )<abs( wr( kbot-1 )-h( kbot, kbot ) &
                                    ) ) then
                             wr( kbot-1 ) = wr( kbot )
                          else
                             wr( kbot ) = wr( kbot-1 )
                          end if
                       end if
                    end if
                    ! ==== use up to ns of the the smallest magnitude
                    ! .    shifts.  if there aren't ns shifts available,
                    ! .    then use them all, possibly dropping one to
                    ! .    make the number of shifts even. ====
                    ns = min( ns, kbot-ks+1 )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ks = kbot - ns + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep:
                    ! .    split workspace under the subdiagonal into
                    ! .    - a kdu-by-kdu work array u in the lower
                    ! .      left-hand-corner,
                    ! .    - a kdu-by-at-least-kdu-but-more-is-better
                    ! .      (kdu-by-nho) horizontal work array wh along
                    ! .      the bottom edge,
                    ! .    - and an at-least-kdu-but-more-is-better-by-kdu
                    ! .      (nve-by-kdu) vertical work wv arrow along
                    ! .      the left-hand-edge. ====
                    kdu = 2_${ik}$*ns
                    ku = n - kdu + 1_${ik}$
                    kwh = kdu + 1_${ik}$
                    nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$
                    kwv = kdu + 4_${ik}$
                    nve = n - kdu - kwv + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep ====
                    call stdlib${ii}$_slaqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,wr( ks ), wi( ks )&
                    , h, ldh, iloz, ihiz, z,ldz, work, 3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve,h( kwv, 1_${ik}$ ), ldh, &
                              nho, h( ku, kwh ), ldh )
                 end if
                 ! ==== note progress (or the lack of it). ====
                 if( ld>0_${ik}$ ) then
                    ndfl = 1_${ik}$
                 else
                    ndfl = ndfl + 1_${ik}$
                 end if
                 ! ==== end of main loop ====
              end do loop_80
              ! ==== iteration limit exceeded.  set info to show where
              ! .    the problem occurred and exit. ====
              info = kbot
              90 continue
           end if
           ! ==== return the optimal value of lwork. ====
           work( 1_${ik}$ ) = real( lwkopt,KIND=sp)
     end subroutine stdlib${ii}$_slaqr4

     module subroutine stdlib${ii}$_dlaqr4( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, work,&
     !! DLAQR4 implements one level of recursion for DLAQR0.
     !! It is a complete implementation of the small bulge multi-shift
     !! QR algorithm.  It may be called by DLAQR0 and, for large enough
     !! deflation window size, it may be called by DLAQR3.  This
     !! subroutine is identical to DLAQR0 except that it calls DLAQR2
     !! instead of DLAQR3.
     !! DLAQR4 computes the eigenvalues of a Hessenberg matrix H
     !! and, optionally, the matrices T and Z from the Schur decomposition
     !! H = Z T Z**T, where T is an upper quasi-triangular matrix (the
     !! Schur form), and Z is the orthogonal matrix of Schur vectors.
     !! Optionally Z may be postmultiplied into an input orthogonal
     !! matrix Q so that this routine can give the Schur factorization
     !! of a matrix A which has been reduced to the Hessenberg form H
     !! by the orthogonal matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
                lwork, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n
           integer(${ik}$), intent(out) :: info
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           real(dp), intent(inout) :: h(ldh,*), z(ldz,*)
           real(dp), intent(out) :: wi(*), work(*), wr(*)
        ! ================================================================
           ! Parameters 
           integer(${ik}$), parameter :: ntiny = 15_${ik}$
           integer(${ik}$), parameter :: kexnw = 5_${ik}$
           integer(${ik}$), parameter :: kexsh = 6_${ik}$
           real(dp), parameter :: wilk1 = 0.75_dp
           real(dp), parameter :: wilk2 = -0.4375_dp
           ! ==== matrices of order ntiny or smaller must be processed by
           ! .    stdlib${ii}$_dlahqr because of insufficient subdiagonal scratch space.
           ! .    (this is a hard limit.) ====
           
           ! ==== exceptional deflation windows:  try to cure rare
           ! .    slow convergence by varying the size of the
           ! .    deflation window after kexnw iterations. ====
           
           ! ==== exceptional shifts: try to cure rare slow convergence
           ! .    with ad-hoc exceptional shifts every kexsh iterations.
           ! .    ====
           
           ! ==== the constants wilk1 and wilk2 are used to form the
           ! .    exceptional shifts. ====
           
           
           ! Local Scalars 
           real(dp) :: aa, bb, cc, cs, dd, sn, ss, swap
           integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, &
           kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,&
                      nwmax, nwr, nwupbd
           logical(lk) :: sorted
           character(len=2_${ik}$) :: jbcmpz
           ! Local Arrays 
           real(dp) :: zdum(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           ! ==== quick return for n = 0: nothing to do. ====
           if( n==0_${ik}$ ) then
              work( 1_${ik}$ ) = one
              return
           end if
           if( n<=ntiny ) then
              ! ==== tiny matrices must use stdlib_dlahqr. ====
              lwkopt = 1_${ik}$
              if( lwork/=-1_${ik}$ )call stdlib${ii}$_dlahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, &
                        ihiz, z, ldz, info )
           else
              ! ==== use small bulge multi-shift qr with aggressive early
              ! .    deflation on larger-than-tiny matrices. ====
              ! ==== hope for the best. ====
              info = 0_${ik}$
              ! ==== set up job flags for stdlib${ii}$_ilaenv. ====
              if( wantt ) then
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S'
              else
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E'
              end if
              if( wantz ) then
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V'
              else
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N'
              end if
              ! ==== nwr = recommended deflation window size.  at this
              ! .    point,  n > ntiny = 15, so there is enough
              ! .    subdiagonal workspace for nwr>=2 as required.
              ! .    (in fact, there is enough subdiagonal space for
              ! .    nwr>=4.) ====
              nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nwr = max( 2_${ik}$, nwr )
              nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr )
              ! ==== nsr = recommended number of simultaneous shifts.
              ! .    at this point n > ntiny = 15, so there is at
              ! .    enough subdiagonal workspace for nsr to be even
              ! .    and greater than or equal to two as required. ====
              nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo )
              nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) )
              ! ==== estimate optimal workspace ====
              ! ==== workspace query call to stdlib${ii}$_dlaqr2 ====
              call stdlib${ii}$_dlaqr2( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,&
                         ld, wr, wi, h, ldh, n, h, ldh,n, h, ldh, work, -1_${ik}$ )
              ! ==== optimal workspace = max(stdlib${ii}$_dlaqr5, stdlib${ii}$_dlaqr2) ====
              lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) )
              ! ==== quick return in case of workspace query. ====
              if( lwork==-1_${ik}$ ) then
                 work( 1_${ik}$ ) = real( lwkopt,KIND=dp)
                 return
              end if
              ! ==== stdlib${ii}$_dlahqr/stdlib${ii}$_dlaqr0 crossover point ====
              nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nmin = max( ntiny, nmin )
              ! ==== nibble crossover point ====
              nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nibble = max( 0_${ik}$, nibble )
              ! ==== accumulate reflections during ttswp?  use block
              ! .    2-by-2 structure during matrix-matrix multiply? ====
              kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork )
              kacc22 = max( 0_${ik}$, kacc22 )
              kacc22 = min( 2_${ik}$, kacc22 )
              ! ==== nwmax = the largest possible deflation window for
              ! .    which there is sufficient workspace. ====
              nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ )
              nw = nwmax
              ! ==== nsmax = the largest number of simultaneous shifts
              ! .    for which there is sufficient workspace. ====
              nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ )
              nsmax = nsmax - mod( nsmax, 2_${ik}$ )
              ! ==== ndfl: an iteration count restarted at deflation. ====
              ndfl = 1_${ik}$
              ! ==== itmax = iteration limit ====
              itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) )
              ! ==== last row and column in the active block ====
              kbot = ihi
              ! ==== main loop ====
              loop_80: do it = 1, itmax
                 ! ==== done when kbot falls below ilo ====
                 if( kbot<ilo )go to 90
                 ! ==== locate active block ====
                 do k = kbot, ilo + 1, -1
                    if( h( k, k-1 )==zero )go to 20
                 end do
                 k = ilo
                 20 continue
                 ktop = k
                 ! ==== select deflation window size:
                 ! .    typical case:
                 ! .      if possible and advisable, nibble the entire
                 ! .      active block.  if not, use size min(nwr,nwmax)
                 ! .      or min(nwr+1,nwmax) depending upon which has
                 ! .      the smaller corresponding subdiagonal entry
                 ! .      (a heuristic).
                 ! .    exceptional case:
                 ! .      if there have been no deflations in kexnw or
                 ! .      more iterations, then vary the deflation window
                 ! .      size.   at first, because, larger windows are,
                 ! .      in general, more powerful than smaller ones,
                 ! .      rapidly increase the window to the maximum possible.
                 ! .      then, gradually reduce the window size. ====
                 nh = kbot - ktop + 1_${ik}$
                 nwupbd = min( nh, nwmax )
                 if( ndfl<kexnw ) then
                    nw = min( nwupbd, nwr )
                 else
                    nw = min( nwupbd, 2_${ik}$*nw )
                 end if
                 if( nw<nwmax ) then
                    if( nw>=nh-1 ) then
                       nw = nh
                    else
                       kwtop = kbot - nw + 1_${ik}$
                       if( abs( h( kwtop, kwtop-1 ) )>abs( h( kwtop-1, kwtop-2 ) ) )nw = nw + &
                                 1_${ik}$
                    end if
                 end if
                 if( ndfl<kexnw ) then
                    ndec = -1_${ik}$
                 else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then
                    ndec = ndec + 1_${ik}$
                    if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$
                    nw = nw - ndec
                 end if
                 ! ==== aggressive early deflation:
                 ! .    split workspace under the subdiagonal into
                 ! .      - an nw-by-nw work array v in the lower
                 ! .        left-hand-corner,
                 ! .      - an nw-by-at-least-nw-but-more-is-better
                 ! .        (nw-by-nho) horizontal work array along
                 ! .        the bottom edge,
                 ! .      - an at-least-nw-but-more-is-better (nhv-by-nw)
                 ! .        vertical work array along the left-hand-edge.
                 ! .        ====
                 kv = n - nw + 1_${ik}$
                 kt = nw + 1_${ik}$
                 nho = ( n-nw-1 ) - kt + 1_${ik}$
                 kwv = nw + 2_${ik}$
                 nve = ( n-nw ) - kwv + 1_${ik}$
                 ! ==== aggressive early deflation ====
                 call stdlib${ii}$_dlaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, &
                 ls, ld, wr, wi, h( kv, 1_${ik}$ ), ldh,nho, h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,&
                           work, lwork )
                 ! ==== adjust kbot accounting for new deflations. ====
                 kbot = kbot - ld
                 ! ==== ks points to the shifts. ====
                 ks = kbot - ls + 1_${ik}$
                 ! ==== skip an expensive qr sweep if there is a (partly
                 ! .    heuristic) reason to expect that many eigenvalues
                 ! .    will deflate without it.  here, the qr sweep is
                 ! .    skipped if many eigenvalues have just been deflated
                 ! .    or if the remaining active block is small.
                 if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )&
                            ) ) ) then
                    ! ==== ns = nominal number of simultaneous shifts.
                    ! .    this may be lowered (slightly) if stdlib${ii}$_dlaqr2
                    ! .    did not provide that many shifts. ====
                    ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ! ==== if there have been no deflations
                    ! .    in a multiple of kexsh iterations,
                    ! .    then try exceptional shifts.
                    ! .    otherwise use shifts provided by
                    ! .    stdlib${ii}$_dlaqr2 above or from the eigenvalues
                    ! .    of a trailing principal submatrix. ====
                    if( mod( ndfl, kexsh )==0_${ik}$ ) then
                       ks = kbot - ns + 1_${ik}$
                       do i = kbot, max( ks+1, ktop+2 ), -2
                          ss = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) )
                          aa = wilk1*ss + h( i, i )
                          bb = ss
                          cc = wilk2*ss
                          dd = aa
                          call stdlib${ii}$_dlanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),wr( i ), wi( i &
                                    ), cs, sn )
                       end do
                       if( ks==ktop ) then
                          wr( ks+1 ) = h( ks+1, ks+1 )
                          wi( ks+1 ) = zero
                          wr( ks ) = wr( ks+1 )
                          wi( ks ) = wi( ks+1 )
                       end if
                    else
                       ! ==== got ns/2 or fewer shifts? use stdlib_dlahqr
                       ! .    on a trailing principal submatrix to
                       ! .    get more. (since ns<=nsmax<=(n-3)/6,
                       ! .    there is enough space below the subdiagonal
                       ! .    to fit an ns-by-ns scratch array.) ====
                       if( kbot-ks+1<=ns / 2_${ik}$ ) then
                          ks = kbot - ns + 1_${ik}$
                          kt = n - ns + 1_${ik}$
                          call stdlib${ii}$_dlacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh )
                                    
                          call stdlib${ii}$_dlahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, wr( ks &
                                    ), wi( ks ),1_${ik}$, 1_${ik}$, zdum, 1_${ik}$, inf )
                          ks = ks + inf
                          ! ==== in case of a rare qr failure use
                          ! .    eigenvalues of the trailing 2-by-2
                          ! .    principal submatrix.  ====
                          if( ks>=kbot ) then
                             aa = h( kbot-1, kbot-1 )
                             cc = h( kbot, kbot-1 )
                             bb = h( kbot-1, kbot )
                             dd = h( kbot, kbot )
                             call stdlib${ii}$_dlanv2( aa, bb, cc, dd, wr( kbot-1 ),wi( kbot-1 ), wr( &
                                       kbot ),wi( kbot ), cs, sn )
                             ks = kbot - 1_${ik}$
                          end if
                       end if
                       if( kbot-ks+1>ns ) then
                          ! ==== sort the shifts (helps a little)
                          ! .    bubble sort keeps complex conjugate
                          ! .    pairs together. ====
                          sorted = .false.
                          do k = kbot, ks + 1, -1
                             if( sorted )go to 60
                             sorted = .true.
                             do i = ks, k - 1
                                if( abs( wr( i ) )+abs( wi( i ) )<abs( wr( i+1 ) )+abs( wi( i+1 ) &
                                          ) ) then
                                   sorted = .false.
                                   swap = wr( i )
                                   wr( i ) = wr( i+1 )
                                   wr( i+1 ) = swap
                                   swap = wi( i )
                                   wi( i ) = wi( i+1 )
                                   wi( i+1 ) = swap
                                end if
                             end do
                          end do
                          60 continue
                       end if
                       ! ==== shuffle shifts into pairs of real shifts
                       ! .    and pairs of complex conjugate shifts
                       ! .    assuming complex conjugate shifts are
                       ! .    already adjacent to one another. (yes,
                       ! .    they are.)  ====
                       do i = kbot, ks + 2, -2
                          if( wi( i )/=-wi( i-1 ) ) then
                             swap = wr( i )
                             wr( i ) = wr( i-1 )
                             wr( i-1 ) = wr( i-2 )
                             wr( i-2 ) = swap
                             swap = wi( i )
                             wi( i ) = wi( i-1 )
                             wi( i-1 ) = wi( i-2 )
                             wi( i-2 ) = swap
                          end if
                       end do
                    end if
                    ! ==== if there are only two shifts and both are
                    ! .    real, then use only one.  ====
                    if( kbot-ks+1==2_${ik}$ ) then
                       if( wi( kbot )==zero ) then
                          if( abs( wr( kbot )-h( kbot, kbot ) )<abs( wr( kbot-1 )-h( kbot, kbot ) &
                                    ) ) then
                             wr( kbot-1 ) = wr( kbot )
                          else
                             wr( kbot ) = wr( kbot-1 )
                          end if
                       end if
                    end if
                    ! ==== use up to ns of the the smallest magnitude
                    ! .    shifts.  if there aren't ns shifts available,
                    ! .    then use them all, possibly dropping one to
                    ! .    make the number of shifts even. ====
                    ns = min( ns, kbot-ks+1 )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ks = kbot - ns + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep:
                    ! .    split workspace under the subdiagonal into
                    ! .    - a kdu-by-kdu work array u in the lower
                    ! .      left-hand-corner,
                    ! .    - a kdu-by-at-least-kdu-but-more-is-better
                    ! .      (kdu-by-nho) horizontal work array wh along
                    ! .      the bottom edge,
                    ! .    - and an at-least-kdu-but-more-is-better-by-kdu
                    ! .      (nve-by-kdu) vertical work wv arrow along
                    ! .      the left-hand-edge. ====
                    kdu = 2_${ik}$*ns
                    ku = n - kdu + 1_${ik}$
                    kwh = kdu + 1_${ik}$
                    nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$
                    kwv = kdu + 4_${ik}$
                    nve = n - kdu - kwv + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep ====
                    call stdlib${ii}$_dlaqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,wr( ks ), wi( ks )&
                    , h, ldh, iloz, ihiz, z,ldz, work, 3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve,h( kwv, 1_${ik}$ ), ldh, &
                              nho, h( ku, kwh ), ldh )
                 end if
                 ! ==== note progress (or the lack of it). ====
                 if( ld>0_${ik}$ ) then
                    ndfl = 1_${ik}$
                 else
                    ndfl = ndfl + 1_${ik}$
                 end if
                 ! ==== end of main loop ====
              end do loop_80
              ! ==== iteration limit exceeded.  set info to show where
              ! .    the problem occurred and exit. ====
              info = kbot
              90 continue
           end if
           ! ==== return the optimal value of lwork. ====
           work( 1_${ik}$ ) = real( lwkopt,KIND=dp)
     end subroutine stdlib${ii}$_dlaqr4

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     module subroutine stdlib${ii}$_${ri}$laqr4( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, ihiz, z, ldz, work,&
     !! DLAQR4: implements one level of recursion for DLAQR0.
     !! It is a complete implementation of the small bulge multi-shift
     !! QR algorithm.  It may be called by DLAQR0 and, for large enough
     !! deflation window size, it may be called by DLAQR3.  This
     !! subroutine is identical to DLAQR0 except that it calls DLAQR2
     !! instead of DLAQR3.
     !! DLAQR4 computes the eigenvalues of a Hessenberg matrix H
     !! and, optionally, the matrices T and Z from the Schur decomposition
     !! H = Z T Z**T, where T is an upper quasi-triangular matrix (the
     !! Schur form), and Z is the orthogonal matrix of Schur vectors.
     !! Optionally Z may be postmultiplied into an input orthogonal
     !! matrix Q so that this routine can give the Schur factorization
     !! of a matrix A which has been reduced to the Hessenberg form H
     !! by the orthogonal matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
                lwork, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n
           integer(${ik}$), intent(out) :: info
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           real(${rk}$), intent(inout) :: h(ldh,*), z(ldz,*)
           real(${rk}$), intent(out) :: wi(*), work(*), wr(*)
        ! ================================================================
           ! Parameters 
           integer(${ik}$), parameter :: ntiny = 15_${ik}$
           integer(${ik}$), parameter :: kexnw = 5_${ik}$
           integer(${ik}$), parameter :: kexsh = 6_${ik}$
           real(${rk}$), parameter :: wilk1 = 0.75_${rk}$
           real(${rk}$), parameter :: wilk2 = -0.4375_${rk}$
           ! ==== matrices of order ntiny or smaller must be processed by
           ! .    stdlib${ii}$_${ri}$lahqr because of insufficient subdiagonal scratch space.
           ! .    (this is a hard limit.) ====
           
           ! ==== exceptional deflation windows:  try to cure rare
           ! .    slow convergence by varying the size of the
           ! .    deflation window after kexnw iterations. ====
           
           ! ==== exceptional shifts: try to cure rare slow convergence
           ! .    with ad-hoc exceptional shifts every kexsh iterations.
           ! .    ====
           
           ! ==== the constants wilk1 and wilk2 are used to form the
           ! .    exceptional shifts. ====
           
           
           ! Local Scalars 
           real(${rk}$) :: aa, bb, cc, cs, dd, sn, ss, swap
           integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, &
           kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,&
                      nwmax, nwr, nwupbd
           logical(lk) :: sorted
           character(len=2_${ik}$) :: jbcmpz
           ! Local Arrays 
           real(${rk}$) :: zdum(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           ! ==== quick return for n = 0: nothing to do. ====
           if( n==0_${ik}$ ) then
              work( 1_${ik}$ ) = one
              return
           end if
           if( n<=ntiny ) then
              ! ==== tiny matrices must use stdlib_${ri}$lahqr. ====
              lwkopt = 1_${ik}$
              if( lwork/=-1_${ik}$ )call stdlib${ii}$_${ri}$lahqr( wantt, wantz, n, ilo, ihi, h, ldh, wr, wi,iloz, &
                        ihiz, z, ldz, info )
           else
              ! ==== use small bulge multi-shift qr with aggressive early
              ! .    deflation on larger-than-tiny matrices. ====
              ! ==== hope for the best. ====
              info = 0_${ik}$
              ! ==== set up job flags for stdlib${ii}$_ilaenv. ====
              if( wantt ) then
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S'
              else
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E'
              end if
              if( wantz ) then
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V'
              else
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N'
              end if
              ! ==== nwr = recommended deflation window size.  at this
              ! .    point,  n > ntiny = 15, so there is enough
              ! .    subdiagonal workspace for nwr>=2 as required.
              ! .    (in fact, there is enough subdiagonal space for
              ! .    nwr>=4.) ====
              nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nwr = max( 2_${ik}$, nwr )
              nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr )
              ! ==== nsr = recommended number of simultaneous shifts.
              ! .    at this point n > ntiny = 15, so there is at
              ! .    enough subdiagonal workspace for nsr to be even
              ! .    and greater than or equal to two as required. ====
              nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo )
              nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) )
              ! ==== estimate optimal workspace ====
              ! ==== workspace query call to stdlib${ii}$_${ri}$laqr2 ====
              call stdlib${ii}$_${ri}$laqr2( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,&
                         ld, wr, wi, h, ldh, n, h, ldh,n, h, ldh, work, -1_${ik}$ )
              ! ==== optimal workspace = max(stdlib${ii}$_${ri}$laqr5, stdlib${ii}$_${ri}$laqr2) ====
              lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) )
              ! ==== quick return in case of workspace query. ====
              if( lwork==-1_${ik}$ ) then
                 work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$)
                 return
              end if
              ! ==== stdlib${ii}$_${ri}$lahqr/stdlib${ii}$_${ri}$laqr0 crossover point ====
              nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nmin = max( ntiny, nmin )
              ! ==== nibble crossover point ====
              nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nibble = max( 0_${ik}$, nibble )
              ! ==== accumulate reflections during ttswp?  use block
              ! .    2-by-2 structure during matrix-matrix multiply? ====
              kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'DLAQR4', jbcmpz, n, ilo, ihi, lwork )
              kacc22 = max( 0_${ik}$, kacc22 )
              kacc22 = min( 2_${ik}$, kacc22 )
              ! ==== nwmax = the largest possible deflation window for
              ! .    which there is sufficient workspace. ====
              nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ )
              nw = nwmax
              ! ==== nsmax = the largest number of simultaneous shifts
              ! .    for which there is sufficient workspace. ====
              nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ )
              nsmax = nsmax - mod( nsmax, 2_${ik}$ )
              ! ==== ndfl: an iteration count restarted at deflation. ====
              ndfl = 1_${ik}$
              ! ==== itmax = iteration limit ====
              itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) )
              ! ==== last row and column in the active block ====
              kbot = ihi
              ! ==== main loop ====
              loop_80: do it = 1, itmax
                 ! ==== done when kbot falls below ilo ====
                 if( kbot<ilo )go to 90
                 ! ==== locate active block ====
                 do k = kbot, ilo + 1, -1
                    if( h( k, k-1 )==zero )go to 20
                 end do
                 k = ilo
                 20 continue
                 ktop = k
                 ! ==== select deflation window size:
                 ! .    typical case:
                 ! .      if possible and advisable, nibble the entire
                 ! .      active block.  if not, use size min(nwr,nwmax)
                 ! .      or min(nwr+1,nwmax) depending upon which has
                 ! .      the smaller corresponding subdiagonal entry
                 ! .      (a heuristic).
                 ! .    exceptional case:
                 ! .      if there have been no deflations in kexnw or
                 ! .      more iterations, then vary the deflation window
                 ! .      size.   at first, because, larger windows are,
                 ! .      in general, more powerful than smaller ones,
                 ! .      rapidly increase the window to the maximum possible.
                 ! .      then, gradually reduce the window size. ====
                 nh = kbot - ktop + 1_${ik}$
                 nwupbd = min( nh, nwmax )
                 if( ndfl<kexnw ) then
                    nw = min( nwupbd, nwr )
                 else
                    nw = min( nwupbd, 2_${ik}$*nw )
                 end if
                 if( nw<nwmax ) then
                    if( nw>=nh-1 ) then
                       nw = nh
                    else
                       kwtop = kbot - nw + 1_${ik}$
                       if( abs( h( kwtop, kwtop-1 ) )>abs( h( kwtop-1, kwtop-2 ) ) )nw = nw + &
                                 1_${ik}$
                    end if
                 end if
                 if( ndfl<kexnw ) then
                    ndec = -1_${ik}$
                 else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then
                    ndec = ndec + 1_${ik}$
                    if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$
                    nw = nw - ndec
                 end if
                 ! ==== aggressive early deflation:
                 ! .    split workspace under the subdiagonal into
                 ! .      - an nw-by-nw work array v in the lower
                 ! .        left-hand-corner,
                 ! .      - an nw-by-at-least-nw-but-more-is-better
                 ! .        (nw-by-nho) horizontal work array along
                 ! .        the bottom edge,
                 ! .      - an at-least-nw-but-more-is-better (nhv-by-nw)
                 ! .        vertical work array along the left-hand-edge.
                 ! .        ====
                 kv = n - nw + 1_${ik}$
                 kt = nw + 1_${ik}$
                 nho = ( n-nw-1 ) - kt + 1_${ik}$
                 kwv = nw + 2_${ik}$
                 nve = ( n-nw ) - kwv + 1_${ik}$
                 ! ==== aggressive early deflation ====
                 call stdlib${ii}$_${ri}$laqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, &
                 ls, ld, wr, wi, h( kv, 1_${ik}$ ), ldh,nho, h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,&
                           work, lwork )
                 ! ==== adjust kbot accounting for new deflations. ====
                 kbot = kbot - ld
                 ! ==== ks points to the shifts. ====
                 ks = kbot - ls + 1_${ik}$
                 ! ==== skip an expensive qr sweep if there is a (partly
                 ! .    heuristic) reason to expect that many eigenvalues
                 ! .    will deflate without it.  here, the qr sweep is
                 ! .    skipped if many eigenvalues have just been deflated
                 ! .    or if the remaining active block is small.
                 if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )&
                            ) ) ) then
                    ! ==== ns = nominal number of simultaneous shifts.
                    ! .    this may be lowered (slightly) if stdlib${ii}$_${ri}$laqr2
                    ! .    did not provide that many shifts. ====
                    ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ! ==== if there have been no deflations
                    ! .    in a multiple of kexsh iterations,
                    ! .    then try exceptional shifts.
                    ! .    otherwise use shifts provided by
                    ! .    stdlib${ii}$_${ri}$laqr2 above or from the eigenvalues
                    ! .    of a trailing principal submatrix. ====
                    if( mod( ndfl, kexsh )==0_${ik}$ ) then
                       ks = kbot - ns + 1_${ik}$
                       do i = kbot, max( ks+1, ktop+2 ), -2
                          ss = abs( h( i, i-1 ) ) + abs( h( i-1, i-2 ) )
                          aa = wilk1*ss + h( i, i )
                          bb = ss
                          cc = wilk2*ss
                          dd = aa
                          call stdlib${ii}$_${ri}$lanv2( aa, bb, cc, dd, wr( i-1 ), wi( i-1 ),wr( i ), wi( i &
                                    ), cs, sn )
                       end do
                       if( ks==ktop ) then
                          wr( ks+1 ) = h( ks+1, ks+1 )
                          wi( ks+1 ) = zero
                          wr( ks ) = wr( ks+1 )
                          wi( ks ) = wi( ks+1 )
                       end if
                    else
                       ! ==== got ns/2 or fewer shifts? use stdlib_${ri}$lahqr
                       ! .    on a trailing principal submatrix to
                       ! .    get more. (since ns<=nsmax<=(n-3)/6,
                       ! .    there is enough space below the subdiagonal
                       ! .    to fit an ns-by-ns scratch array.) ====
                       if( kbot-ks+1<=ns / 2_${ik}$ ) then
                          ks = kbot - ns + 1_${ik}$
                          kt = n - ns + 1_${ik}$
                          call stdlib${ii}$_${ri}$lacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh )
                                    
                          call stdlib${ii}$_${ri}$lahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, wr( ks &
                                    ), wi( ks ),1_${ik}$, 1_${ik}$, zdum, 1_${ik}$, inf )
                          ks = ks + inf
                          ! ==== in case of a rare qr failure use
                          ! .    eigenvalues of the trailing 2-by-2
                          ! .    principal submatrix.  ====
                          if( ks>=kbot ) then
                             aa = h( kbot-1, kbot-1 )
                             cc = h( kbot, kbot-1 )
                             bb = h( kbot-1, kbot )
                             dd = h( kbot, kbot )
                             call stdlib${ii}$_${ri}$lanv2( aa, bb, cc, dd, wr( kbot-1 ),wi( kbot-1 ), wr( &
                                       kbot ),wi( kbot ), cs, sn )
                             ks = kbot - 1_${ik}$
                          end if
                       end if
                       if( kbot-ks+1>ns ) then
                          ! ==== sort the shifts (helps a little)
                          ! .    bubble sort keeps complex conjugate
                          ! .    pairs together. ====
                          sorted = .false.
                          do k = kbot, ks + 1, -1
                             if( sorted )go to 60
                             sorted = .true.
                             do i = ks, k - 1
                                if( abs( wr( i ) )+abs( wi( i ) )<abs( wr( i+1 ) )+abs( wi( i+1 ) &
                                          ) ) then
                                   sorted = .false.
                                   swap = wr( i )
                                   wr( i ) = wr( i+1 )
                                   wr( i+1 ) = swap
                                   swap = wi( i )
                                   wi( i ) = wi( i+1 )
                                   wi( i+1 ) = swap
                                end if
                             end do
                          end do
                          60 continue
                       end if
                       ! ==== shuffle shifts into pairs of real shifts
                       ! .    and pairs of complex conjugate shifts
                       ! .    assuming complex conjugate shifts are
                       ! .    already adjacent to one another. (yes,
                       ! .    they are.)  ====
                       do i = kbot, ks + 2, -2
                          if( wi( i )/=-wi( i-1 ) ) then
                             swap = wr( i )
                             wr( i ) = wr( i-1 )
                             wr( i-1 ) = wr( i-2 )
                             wr( i-2 ) = swap
                             swap = wi( i )
                             wi( i ) = wi( i-1 )
                             wi( i-1 ) = wi( i-2 )
                             wi( i-2 ) = swap
                          end if
                       end do
                    end if
                    ! ==== if there are only two shifts and both are
                    ! .    real, then use only one.  ====
                    if( kbot-ks+1==2_${ik}$ ) then
                       if( wi( kbot )==zero ) then
                          if( abs( wr( kbot )-h( kbot, kbot ) )<abs( wr( kbot-1 )-h( kbot, kbot ) &
                                    ) ) then
                             wr( kbot-1 ) = wr( kbot )
                          else
                             wr( kbot ) = wr( kbot-1 )
                          end if
                       end if
                    end if
                    ! ==== use up to ns of the the smallest magnitude
                    ! .    shifts.  if there aren't ns shifts available,
                    ! .    then use them all, possibly dropping one to
                    ! .    make the number of shifts even. ====
                    ns = min( ns, kbot-ks+1 )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ks = kbot - ns + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep:
                    ! .    split workspace under the subdiagonal into
                    ! .    - a kdu-by-kdu work array u in the lower
                    ! .      left-hand-corner,
                    ! .    - a kdu-by-at-least-kdu-but-more-is-better
                    ! .      (kdu-by-nho) horizontal work array wh along
                    ! .      the bottom edge,
                    ! .    - and an at-least-kdu-but-more-is-better-by-kdu
                    ! .      (nve-by-kdu) vertical work wv arrow along
                    ! .      the left-hand-edge. ====
                    kdu = 2_${ik}$*ns
                    ku = n - kdu + 1_${ik}$
                    kwh = kdu + 1_${ik}$
                    nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$
                    kwv = kdu + 4_${ik}$
                    nve = n - kdu - kwv + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep ====
                    call stdlib${ii}$_${ri}$laqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,wr( ks ), wi( ks )&
                    , h, ldh, iloz, ihiz, z,ldz, work, 3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve,h( kwv, 1_${ik}$ ), ldh, &
                              nho, h( ku, kwh ), ldh )
                 end if
                 ! ==== note progress (or the lack of it). ====
                 if( ld>0_${ik}$ ) then
                    ndfl = 1_${ik}$
                 else
                    ndfl = ndfl + 1_${ik}$
                 end if
                 ! ==== end of main loop ====
              end do loop_80
              ! ==== iteration limit exceeded.  set info to show where
              ! .    the problem occurred and exit. ====
              info = kbot
              90 continue
           end if
           ! ==== return the optimal value of lwork. ====
           work( 1_${ik}$ ) = real( lwkopt,KIND=${rk}$)
     end subroutine stdlib${ii}$_${ri}$laqr4

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_claqr4( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, work,&
     !! CLAQR4 implements one level of recursion for CLAQR0.
     !! It is a complete implementation of the small bulge multi-shift
     !! QR algorithm.  It may be called by CLAQR0 and, for large enough
     !! deflation window size, it may be called by CLAQR3.  This
     !! subroutine is identical to CLAQR0 except that it calls CLAQR2
     !! instead of CLAQR3.
     !! CLAQR4 computes the eigenvalues of a Hessenberg matrix H
     !! and, optionally, the matrices T and Z from the Schur decomposition
     !! H = Z T Z**H, where T is an upper triangular matrix (the
     !! Schur form), and Z is the unitary matrix of Schur vectors.
     !! Optionally Z may be postmultiplied into an input unitary
     !! matrix Q so that this routine can give the Schur factorization
     !! of a matrix A which has been reduced to the Hessenberg form H
     !! by the unitary matrix Q:  A = Q*H*Q**H = (QZ)*H*(QZ)**H.
                lwork, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n
           integer(${ik}$), intent(out) :: info
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           complex(sp), intent(inout) :: h(ldh,*), z(ldz,*)
           complex(sp), intent(out) :: w(*), work(*)
        ! ================================================================
           ! Parameters 
           integer(${ik}$), parameter :: ntiny = 15_${ik}$
           integer(${ik}$), parameter :: kexnw = 5_${ik}$
           integer(${ik}$), parameter :: kexsh = 6_${ik}$
           real(sp), parameter :: wilk1 = 0.75_sp
           ! ==== matrices of order ntiny or smaller must be processed by
           ! .    stdlib${ii}$_clahqr because of insufficient subdiagonal scratch space.
           ! .    (this is a hard limit.) ====
           
           ! ==== exceptional deflation windows:  try to cure rare
           ! .    slow convergence by varying the size of the
           ! .    deflation window after kexnw iterations. ====
           
           ! ==== exceptional shifts: try to cure rare slow convergence
           ! .    with ad-hoc exceptional shifts every kexsh iterations.
           ! .    ====
           
           ! ==== the constant wilk1 is used to form the exceptional
           ! .    shifts. ====
           
           
           
           ! Local Scalars 
           complex(sp) :: aa, bb, cc, cdum, dd, det, rtdisc, swap, tr2
           real(sp) :: s
           integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, &
           kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,&
                      nwmax, nwr, nwupbd
           logical(lk) :: sorted
           character(len=2) :: jbcmpz
           ! Local Arrays 
           complex(sp) :: zdum(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Statement Functions 
           real(sp) :: cabs1
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           info = 0_${ik}$
           ! ==== quick return for n = 0: nothing to do. ====
           if( n==0_${ik}$ ) then
              work( 1_${ik}$ ) = cone
              return
           end if
           if( n<=ntiny ) then
              ! ==== tiny matrices must use stdlib_clahqr. ====
              lwkopt = 1_${ik}$
              if( lwork/=-1_${ik}$ )call stdlib${ii}$_clahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, &
                        z, ldz, info )
           else
              ! ==== use small bulge multi-shift qr with aggressive early
              ! .    deflation on larger-than-tiny matrices. ====
              ! ==== hope for the best. ====
              info = 0_${ik}$
              ! ==== set up job flags for stdlib${ii}$_ilaenv. ====
              if( wantt ) then
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S'
              else
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E'
              end if
              if( wantz ) then
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V'
              else
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N'
              end if
              ! ==== nwr = recommended deflation window size.  at this
              ! .    point,  n > ntiny = 15, so there is enough
              ! .    subdiagonal workspace for nwr>=2 as required.
              ! .    (in fact, there is enough subdiagonal space for
              ! .    nwr>=4.) ====
              nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'CLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nwr = max( 2_${ik}$, nwr )
              nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr )
              ! ==== nsr = recommended number of simultaneous shifts.
              ! .    at this point n > ntiny = 15, so there is at
              ! .    enough subdiagonal workspace for nsr to be even
              ! .    and greater than or equal to two as required. ====
              nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'CLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo )
              nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) )
              ! ==== estimate optimal workspace ====
              ! ==== workspace query call to stdlib${ii}$_claqr2 ====
              call stdlib${ii}$_claqr2( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,&
                         ld, w, h, ldh, n, h, ldh, n, h,ldh, work, -1_${ik}$ )
              ! ==== optimal workspace = max(stdlib${ii}$_claqr5, stdlib${ii}$_claqr2) ====
              lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) )
              ! ==== quick return in case of workspace query. ====
              if( lwork==-1_${ik}$ ) then
                 work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=sp)
                 return
              end if
              ! ==== stdlib${ii}$_clahqr/stdlib${ii}$_claqr0 crossover point ====
              nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'CLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nmin = max( ntiny, nmin )
              ! ==== nibble crossover point ====
              nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'CLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nibble = max( 0_${ik}$, nibble )
              ! ==== accumulate reflections during ttswp?  use block
              ! .    2-by-2 structure during matrix-matrix multiply? ====
              kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'CLAQR4', jbcmpz, n, ilo, ihi, lwork )
              kacc22 = max( 0_${ik}$, kacc22 )
              kacc22 = min( 2_${ik}$, kacc22 )
              ! ==== nwmax = the largest possible deflation window for
              ! .    which there is sufficient workspace. ====
              nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ )
              nw = nwmax
              ! ==== nsmax = the largest number of simultaneous shifts
              ! .    for which there is sufficient workspace. ====
              nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ )
              nsmax = nsmax - mod( nsmax, 2_${ik}$ )
              ! ==== ndfl: an iteration count restarted at deflation. ====
              ndfl = 1_${ik}$
              ! ==== itmax = iteration limit ====
              itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) )
              ! ==== last row and column in the active block ====
              kbot = ihi
              ! ==== main loop ====
              loop_70: do it = 1, itmax
                 ! ==== done when kbot falls below ilo ====
                 if( kbot<ilo )go to 80
                 ! ==== locate active block ====
                 do k = kbot, ilo + 1, -1
                    if( h( k, k-1 )==czero )go to 20
                 end do
                 k = ilo
                 20 continue
                 ktop = k
                 ! ==== select deflation window size:
                 ! .    typical case:
                 ! .      if possible and advisable, nibble the entire
                 ! .      active block.  if not, use size min(nwr,nwmax)
                 ! .      or min(nwr+1,nwmax) depending upon which has
                 ! .      the smaller corresponding subdiagonal entry
                 ! .      (a heuristic).
                 ! .    exceptional case:
                 ! .      if there have been no deflations in kexnw or
                 ! .      more iterations, then vary the deflation window
                 ! .      size.   at first, because, larger windows are,
                 ! .      in general, more powerful than smaller ones,
                 ! .      rapidly increase the window to the maximum possible.
                 ! .      then, gradually reduce the window size. ====
                 nh = kbot - ktop + 1_${ik}$
                 nwupbd = min( nh, nwmax )
                 if( ndfl<kexnw ) then
                    nw = min( nwupbd, nwr )
                 else
                    nw = min( nwupbd, 2_${ik}$*nw )
                 end if
                 if( nw<nwmax ) then
                    if( nw>=nh-1 ) then
                       nw = nh
                    else
                       kwtop = kbot - nw + 1_${ik}$
                       if( cabs1( h( kwtop, kwtop-1 ) )>cabs1( h( kwtop-1, kwtop-2 ) ) )nw = nw + &
                                 1_${ik}$
                    end if
                 end if
                 if( ndfl<kexnw ) then
                    ndec = -1_${ik}$
                 else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then
                    ndec = ndec + 1_${ik}$
                    if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$
                    nw = nw - ndec
                 end if
                 ! ==== aggressive early deflation:
                 ! .    split workspace under the subdiagonal into
                 ! .      - an nw-by-nw work array v in the lower
                 ! .        left-hand-corner,
                 ! .      - an nw-by-at-least-nw-but-more-is-better
                 ! .        (nw-by-nho) horizontal work array along
                 ! .        the bottom edge,
                 ! .      - an at-least-nw-but-more-is-better (nhv-by-nw)
                 ! .        vertical work array along the left-hand-edge.
                 ! .        ====
                 kv = n - nw + 1_${ik}$
                 kt = nw + 1_${ik}$
                 nho = ( n-nw-1 ) - kt + 1_${ik}$
                 kwv = nw + 2_${ik}$
                 nve = ( n-nw ) - kwv + 1_${ik}$
                 ! ==== aggressive early deflation ====
                 call stdlib${ii}$_claqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, &
                 ls, ld, w, h( kv, 1_${ik}$ ), ldh, nho,h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh, work,&
                           lwork )
                 ! ==== adjust kbot accounting for new deflations. ====
                 kbot = kbot - ld
                 ! ==== ks points to the shifts. ====
                 ks = kbot - ls + 1_${ik}$
                 ! ==== skip an expensive qr sweep if there is a (partly
                 ! .    heuristic) reason to expect that many eigenvalues
                 ! .    will deflate without it.  here, the qr sweep is
                 ! .    skipped if many eigenvalues have just been deflated
                 ! .    or if the remaining active block is small.
                 if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )&
                            ) ) ) then
                    ! ==== ns = nominal number of simultaneous shifts.
                    ! .    this may be lowered (slightly) if stdlib${ii}$_claqr2
                    ! .    did not provide that many shifts. ====
                    ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ! ==== if there have been no deflations
                    ! .    in a multiple of kexsh iterations,
                    ! .    then try exceptional shifts.
                    ! .    otherwise use shifts provided by
                    ! .    stdlib${ii}$_claqr2 above or from the eigenvalues
                    ! .    of a trailing principal submatrix. ====
                    if( mod( ndfl, kexsh )==0_${ik}$ ) then
                       ks = kbot - ns + 1_${ik}$
                       do i = kbot, ks + 1, -2
                          w( i ) = h( i, i ) + wilk1*cabs1( h( i, i-1 ) )
                          w( i-1 ) = w( i )
                       end do
                    else
                       ! ==== got ns/2 or fewer shifts? use stdlib_clahqr
                       ! .    on a trailing principal submatrix to
                       ! .    get more. (since ns<=nsmax<=(n-3)/6,
                       ! .    there is enough space below the subdiagonal
                       ! .    to fit an ns-by-ns scratch array.) ====
                       if( kbot-ks+1<=ns / 2_${ik}$ ) then
                          ks = kbot - ns + 1_${ik}$
                          kt = n - ns + 1_${ik}$
                          call stdlib${ii}$_clacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh )
                                    
                          call stdlib${ii}$_clahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, w( ks )&
                                    , 1_${ik}$, 1_${ik}$, zdum,1_${ik}$, inf )
                          ks = ks + inf
                          ! ==== in case of a rare qr failure use
                          ! .    eigenvalues of the trailing 2-by-2
                          ! .    principal submatrix.  scale to avoid
                          ! .    overflows, underflows and subnormals.
                          ! .    (the scale factor s can not be czero,
                          ! .    because h(kbot,kbot-1) is nonzero.) ====
                          if( ks>=kbot ) then
                             s = cabs1( h( kbot-1, kbot-1 ) ) +cabs1( h( kbot, kbot-1 ) ) +cabs1( &
                                       h( kbot-1, kbot ) ) +cabs1( h( kbot, kbot ) )
                             aa = h( kbot-1, kbot-1 ) / s
                             cc = h( kbot, kbot-1 ) / s
                             bb = h( kbot-1, kbot ) / s
                             dd = h( kbot, kbot ) / s
                             tr2 = ( aa+dd ) / two
                             det = ( aa-tr2 )*( dd-tr2 ) - bb*cc
                             rtdisc = sqrt( -det )
                             w( kbot-1 ) = ( tr2+rtdisc )*s
                             w( kbot ) = ( tr2-rtdisc )*s
                             ks = kbot - 1_${ik}$
                          end if
                       end if
                       if( kbot-ks+1>ns ) then
                          ! ==== sort the shifts (helps a little) ====
                          sorted = .false.
                          do k = kbot, ks + 1, -1
                             if( sorted )go to 60
                             sorted = .true.
                             do i = ks, k - 1
                                if( cabs1( w( i ) )<cabs1( w( i+1 ) ) )then
                                   sorted = .false.
                                   swap = w( i )
                                   w( i ) = w( i+1 )
                                   w( i+1 ) = swap
                                end if
                             end do
                          end do
                          60 continue
                       end if
                    end if
                    ! ==== if there are only two shifts, then use
                    ! .    only cone.  ====
                    if( kbot-ks+1==2_${ik}$ ) then
                       if( cabs1( w( kbot )-h( kbot, kbot ) )<cabs1( w( kbot-1 )-h( kbot, kbot ) )&
                                  ) then
                          w( kbot-1 ) = w( kbot )
                       else
                          w( kbot ) = w( kbot-1 )
                       end if
                    end if
                    ! ==== use up to ns of the the smallest magnitude
                    ! .    shifts.  if there aren't ns shifts available,
                    ! .    then use them all, possibly dropping cone to
                    ! .    make the number of shifts even. ====
                    ns = min( ns, kbot-ks+1 )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ks = kbot - ns + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep:
                    ! .    split workspace under the subdiagonal into
                    ! .    - a kdu-by-kdu work array u in the lower
                    ! .      left-hand-corner,
                    ! .    - a kdu-by-at-least-kdu-but-more-is-better
                    ! .      (kdu-by-nho) horizontal work array wh along
                    ! .      the bottom edge,
                    ! .    - and an at-least-kdu-but-more-is-better-by-kdu
                    ! .      (nve-by-kdu) vertical work wv arrow along
                    ! .      the left-hand-edge. ====
                    kdu = 2_${ik}$*ns
                    ku = n - kdu + 1_${ik}$
                    kwh = kdu + 1_${ik}$
                    nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$
                    kwv = kdu + 4_${ik}$
                    nve = n - kdu - kwv + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep ====
                    call stdlib${ii}$_claqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,w( ks ), h, ldh, &
                    iloz, ihiz, z, ldz, work,3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,nho, h( ku,&
                               kwh ), ldh )
                 end if
                 ! ==== note progress (or the lack of it). ====
                 if( ld>0_${ik}$ ) then
                    ndfl = 1_${ik}$
                 else
                    ndfl = ndfl + 1_${ik}$
                 end if
                 ! ==== end of main loop ====
              end do loop_70
              ! ==== iteration limit exceeded.  set info to show where
              ! .    the problem occurred and exit. ====
              info = kbot
              80 continue
           end if
           ! ==== return the optimal value of lwork. ====
           work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=sp)
     end subroutine stdlib${ii}$_claqr4

     pure module subroutine stdlib${ii}$_zlaqr4( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, work,&
     !! ZLAQR4 implements one level of recursion for ZLAQR0.
     !! It is a complete implementation of the small bulge multi-shift
     !! QR algorithm.  It may be called by ZLAQR0 and, for large enough
     !! deflation window size, it may be called by ZLAQR3.  This
     !! subroutine is identical to ZLAQR0 except that it calls ZLAQR2
     !! instead of ZLAQR3.
     !! ZLAQR4 computes the eigenvalues of a Hessenberg matrix H
     !! and, optionally, the matrices T and Z from the Schur decomposition
     !! H = Z T Z**H, where T is an upper triangular matrix (the
     !! Schur form), and Z is the unitary matrix of Schur vectors.
     !! Optionally Z may be postmultiplied into an input unitary
     !! matrix Q so that this routine can give the Schur factorization
     !! of a matrix A which has been reduced to the Hessenberg form H
     !! by the unitary matrix Q:  A = Q*H*Q**H = (QZ)*H*(QZ)**H.
                lwork, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n
           integer(${ik}$), intent(out) :: info
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           complex(dp), intent(inout) :: h(ldh,*), z(ldz,*)
           complex(dp), intent(out) :: w(*), work(*)
        ! ================================================================
           ! Parameters 
           integer(${ik}$), parameter :: ntiny = 15_${ik}$
           integer(${ik}$), parameter :: kexnw = 5_${ik}$
           integer(${ik}$), parameter :: kexsh = 6_${ik}$
           real(dp), parameter :: wilk1 = 0.75_dp
           ! ==== matrices of order ntiny or smaller must be processed by
           ! .    stdlib${ii}$_zlahqr because of insufficient subdiagonal scratch space.
           ! .    (this is a hard limit.) ====
           
           ! ==== exceptional deflation windows:  try to cure rare
           ! .    slow convergence by varying the size of the
           ! .    deflation window after kexnw iterations. ====
           
           ! ==== exceptional shifts: try to cure rare slow convergence
           ! .    with ad-hoc exceptional shifts every kexsh iterations.
           ! .    ====
           
           ! ==== the constant wilk1 is used to form the exceptional
           ! .    shifts. ====
           
           
           
           ! Local Scalars 
           complex(dp) :: aa, bb, cc, cdum, dd, det, rtdisc, swap, tr2
           real(dp) :: s
           integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, &
           kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,&
                      nwmax, nwr, nwupbd
           logical(lk) :: sorted
           character(len=2) :: jbcmpz
           ! Local Arrays 
           complex(dp) :: zdum(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Statement Functions 
           real(dp) :: cabs1
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           info = 0_${ik}$
           ! ==== quick return for n = 0: nothing to do. ====
           if( n==0_${ik}$ ) then
              work( 1_${ik}$ ) = cone
              return
           end if
           if( n<=ntiny ) then
              ! ==== tiny matrices must use stdlib_zlahqr. ====
              lwkopt = 1_${ik}$
              if( lwork/=-1_${ik}$ )call stdlib${ii}$_zlahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, &
                        z, ldz, info )
           else
              ! ==== use small bulge multi-shift qr with aggressive early
              ! .    deflation on larger-than-tiny matrices. ====
              ! ==== hope for the best. ====
              info = 0_${ik}$
              ! ==== set up job flags for stdlib${ii}$_ilaenv. ====
              if( wantt ) then
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S'
              else
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E'
              end if
              if( wantz ) then
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V'
              else
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N'
              end if
              ! ==== nwr = recommended deflation window size.  at this
              ! .    point,  n > ntiny = 15, so there is enough
              ! .    subdiagonal workspace for nwr>=2 as required.
              ! .    (in fact, there is enough subdiagonal space for
              ! .    nwr>=4.) ====
              nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nwr = max( 2_${ik}$, nwr )
              nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr )
              ! ==== nsr = recommended number of simultaneous shifts.
              ! .    at this point n > ntiny = 15, so there is at
              ! .    enough subdiagonal workspace for nsr to be even
              ! .    and greater than or equal to two as required. ====
              nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo )
              nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) )
              ! ==== estimate optimal workspace ====
              ! ==== workspace query call to stdlib${ii}$_zlaqr2 ====
              call stdlib${ii}$_zlaqr2( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,&
                         ld, w, h, ldh, n, h, ldh, n, h,ldh, work, -1_${ik}$ )
              ! ==== optimal workspace = max(stdlib${ii}$_zlaqr5, stdlib${ii}$_zlaqr2) ====
              lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) )
              ! ==== quick return in case of workspace query. ====
              if( lwork==-1_${ik}$ ) then
                 work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=dp)
                 return
              end if
              ! ==== stdlib${ii}$_zlahqr/stdlib${ii}$_zlaqr0 crossover point ====
              nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nmin = max( ntiny, nmin )
              ! ==== nibble crossover point ====
              nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nibble = max( 0_${ik}$, nibble )
              ! ==== accumulate reflections during ttswp?  use block
              ! .    2-by-2 structure during matrix-matrix multiply? ====
              kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork )
              kacc22 = max( 0_${ik}$, kacc22 )
              kacc22 = min( 2_${ik}$, kacc22 )
              ! ==== nwmax = the largest possible deflation window for
              ! .    which there is sufficient workspace. ====
              nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ )
              nw = nwmax
              ! ==== nsmax = the largest number of simultaneous shifts
              ! .    for which there is sufficient workspace. ====
              nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ )
              nsmax = nsmax - mod( nsmax, 2_${ik}$ )
              ! ==== ndfl: an iteration count restarted at deflation. ====
              ndfl = 1_${ik}$
              ! ==== itmax = iteration limit ====
              itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) )
              ! ==== last row and column in the active block ====
              kbot = ihi
              ! ==== main loop ====
              loop_70: do it = 1, itmax
                 ! ==== done when kbot falls below ilo ====
                 if( kbot<ilo )go to 80
                 ! ==== locate active block ====
                 do k = kbot, ilo + 1, -1
                    if( h( k, k-1 )==czero )go to 20
                 end do
                 k = ilo
                 20 continue
                 ktop = k
                 ! ==== select deflation window size:
                 ! .    typical case:
                 ! .      if possible and advisable, nibble the entire
                 ! .      active block.  if not, use size min(nwr,nwmax)
                 ! .      or min(nwr+1,nwmax) depending upon which has
                 ! .      the smaller corresponding subdiagonal entry
                 ! .      (a heuristic).
                 ! .    exceptional case:
                 ! .      if there have been no deflations in kexnw or
                 ! .      more iterations, then vary the deflation window
                 ! .      size.   at first, because, larger windows are,
                 ! .      in general, more powerful than smaller ones,
                 ! .      rapidly increase the window to the maximum possible.
                 ! .      then, gradually reduce the window size. ====
                 nh = kbot - ktop + 1_${ik}$
                 nwupbd = min( nh, nwmax )
                 if( ndfl<kexnw ) then
                    nw = min( nwupbd, nwr )
                 else
                    nw = min( nwupbd, 2_${ik}$*nw )
                 end if
                 if( nw<nwmax ) then
                    if( nw>=nh-1 ) then
                       nw = nh
                    else
                       kwtop = kbot - nw + 1_${ik}$
                       if( cabs1( h( kwtop, kwtop-1 ) )>cabs1( h( kwtop-1, kwtop-2 ) ) )nw = nw + &
                                 1_${ik}$
                    end if
                 end if
                 if( ndfl<kexnw ) then
                    ndec = -1_${ik}$
                 else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then
                    ndec = ndec + 1_${ik}$
                    if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$
                    nw = nw - ndec
                 end if
                 ! ==== aggressive early deflation:
                 ! .    split workspace under the subdiagonal into
                 ! .      - an nw-by-nw work array v in the lower
                 ! .        left-hand-corner,
                 ! .      - an nw-by-at-least-nw-but-more-is-better
                 ! .        (nw-by-nho) horizontal work array along
                 ! .        the bottom edge,
                 ! .      - an at-least-nw-but-more-is-better (nhv-by-nw)
                 ! .        vertical work array along the left-hand-edge.
                 ! .        ====
                 kv = n - nw + 1_${ik}$
                 kt = nw + 1_${ik}$
                 nho = ( n-nw-1 ) - kt + 1_${ik}$
                 kwv = nw + 2_${ik}$
                 nve = ( n-nw ) - kwv + 1_${ik}$
                 ! ==== aggressive early deflation ====
                 call stdlib${ii}$_zlaqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, &
                 ls, ld, w, h( kv, 1_${ik}$ ), ldh, nho,h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh, work,&
                           lwork )
                 ! ==== adjust kbot accounting for new deflations. ====
                 kbot = kbot - ld
                 ! ==== ks points to the shifts. ====
                 ks = kbot - ls + 1_${ik}$
                 ! ==== skip an expensive qr sweep if there is a (partly
                 ! .    heuristic) reason to expect that many eigenvalues
                 ! .    will deflate without it.  here, the qr sweep is
                 ! .    skipped if many eigenvalues have just been deflated
                 ! .    or if the remaining active block is small.
                 if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )&
                            ) ) ) then
                    ! ==== ns = nominal number of simultaneous shifts.
                    ! .    this may be lowered (slightly) if stdlib${ii}$_zlaqr2
                    ! .    did not provide that many shifts. ====
                    ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ! ==== if there have been no deflations
                    ! .    in a multiple of kexsh iterations,
                    ! .    then try exceptional shifts.
                    ! .    otherwise use shifts provided by
                    ! .    stdlib${ii}$_zlaqr2 above or from the eigenvalues
                    ! .    of a trailing principal submatrix. ====
                    if( mod( ndfl, kexsh )==0_${ik}$ ) then
                       ks = kbot - ns + 1_${ik}$
                       do i = kbot, ks + 1, -2
                          w( i ) = h( i, i ) + wilk1*cabs1( h( i, i-1 ) )
                          w( i-1 ) = w( i )
                       end do
                    else
                       ! ==== got ns/2 or fewer shifts? use stdlib_zlahqr
                       ! .    on a trailing principal submatrix to
                       ! .    get more. (since ns<=nsmax<=(n-3)/6,
                       ! .    there is enough space below the subdiagonal
                       ! .    to fit an ns-by-ns scratch array.) ====
                       if( kbot-ks+1<=ns / 2_${ik}$ ) then
                          ks = kbot - ns + 1_${ik}$
                          kt = n - ns + 1_${ik}$
                          call stdlib${ii}$_zlacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh )
                                    
                          call stdlib${ii}$_zlahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, w( ks )&
                                    , 1_${ik}$, 1_${ik}$, zdum,1_${ik}$, inf )
                          ks = ks + inf
                          ! ==== in case of a rare qr failure use
                          ! .    eigenvalues of the trailing 2-by-2
                          ! .    principal submatrix.  scale to avoid
                          ! .    overflows, underflows and subnormals.
                          ! .    (the scale factor s can not be czero,
                          ! .    because h(kbot,kbot-1) is nonzero.) ====
                          if( ks>=kbot ) then
                             s = cabs1( h( kbot-1, kbot-1 ) ) +cabs1( h( kbot, kbot-1 ) ) +cabs1( &
                                       h( kbot-1, kbot ) ) +cabs1( h( kbot, kbot ) )
                             aa = h( kbot-1, kbot-1 ) / s
                             cc = h( kbot, kbot-1 ) / s
                             bb = h( kbot-1, kbot ) / s
                             dd = h( kbot, kbot ) / s
                             tr2 = ( aa+dd ) / two
                             det = ( aa-tr2 )*( dd-tr2 ) - bb*cc
                             rtdisc = sqrt( -det )
                             w( kbot-1 ) = ( tr2+rtdisc )*s
                             w( kbot ) = ( tr2-rtdisc )*s
                             ks = kbot - 1_${ik}$
                          end if
                       end if
                       if( kbot-ks+1>ns ) then
                          ! ==== sort the shifts (helps a little) ====
                          sorted = .false.
                          do k = kbot, ks + 1, -1
                             if( sorted )go to 60
                             sorted = .true.
                             do i = ks, k - 1
                                if( cabs1( w( i ) )<cabs1( w( i+1 ) ) )then
                                   sorted = .false.
                                   swap = w( i )
                                   w( i ) = w( i+1 )
                                   w( i+1 ) = swap
                                end if
                             end do
                          end do
                          60 continue
                       end if
                    end if
                    ! ==== if there are only two shifts, then use
                    ! .    only cone.  ====
                    if( kbot-ks+1==2_${ik}$ ) then
                       if( cabs1( w( kbot )-h( kbot, kbot ) )<cabs1( w( kbot-1 )-h( kbot, kbot ) )&
                                  ) then
                          w( kbot-1 ) = w( kbot )
                       else
                          w( kbot ) = w( kbot-1 )
                       end if
                    end if
                    ! ==== use up to ns of the the smallest magnitude
                    ! .    shifts.  if there aren't ns shifts available,
                    ! .    then use them all, possibly dropping cone to
                    ! .    make the number of shifts even. ====
                    ns = min( ns, kbot-ks+1 )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ks = kbot - ns + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep:
                    ! .    split workspace under the subdiagonal into
                    ! .    - a kdu-by-kdu work array u in the lower
                    ! .      left-hand-corner,
                    ! .    - a kdu-by-at-least-kdu-but-more-is-better
                    ! .      (kdu-by-nho) horizontal work array wh along
                    ! .      the bottom edge,
                    ! .    - and an at-least-kdu-but-more-is-better-by-kdu
                    ! .      (nve-by-kdu) vertical work wv arrow along
                    ! .      the left-hand-edge. ====
                    kdu = 2_${ik}$*ns
                    ku = n - kdu + 1_${ik}$
                    kwh = kdu + 1_${ik}$
                    nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$
                    kwv = kdu + 4_${ik}$
                    nve = n - kdu - kwv + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep ====
                    call stdlib${ii}$_zlaqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,w( ks ), h, ldh, &
                    iloz, ihiz, z, ldz, work,3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,nho, h( ku,&
                               kwh ), ldh )
                 end if
                 ! ==== note progress (or the lack of it). ====
                 if( ld>0_${ik}$ ) then
                    ndfl = 1_${ik}$
                 else
                    ndfl = ndfl + 1_${ik}$
                 end if
                 ! ==== end of main loop ====
              end do loop_70
              ! ==== iteration limit exceeded.  set info to show where
              ! .    the problem occurred and exit. ====
              info = kbot
              80 continue
           end if
           ! ==== return the optimal value of lwork. ====
           work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=dp)
     end subroutine stdlib${ii}$_zlaqr4

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$laqr4( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, z, ldz, work,&
     !! ZLAQR4: implements one level of recursion for ZLAQR0.
     !! It is a complete implementation of the small bulge multi-shift
     !! QR algorithm.  It may be called by ZLAQR0 and, for large enough
     !! deflation window size, it may be called by ZLAQR3.  This
     !! subroutine is identical to ZLAQR0 except that it calls ZLAQR2
     !! instead of ZLAQR3.
     !! ZLAQR4 computes the eigenvalues of a Hessenberg matrix H
     !! and, optionally, the matrices T and Z from the Schur decomposition
     !! H = Z T Z**H, where T is an upper triangular matrix (the
     !! Schur form), and Z is the unitary matrix of Schur vectors.
     !! Optionally Z may be postmultiplied into an input unitary
     !! matrix Q so that this routine can give the Schur factorization
     !! of a matrix A which has been reduced to the Hessenberg form H
     !! by the unitary matrix Q:  A = Q*H*Q**H = (QZ)*H*(QZ)**H.
                lwork, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihi, ihiz, ilo, iloz, ldh, ldz, lwork, n
           integer(${ik}$), intent(out) :: info
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: h(ldh,*), z(ldz,*)
           complex(${ck}$), intent(out) :: w(*), work(*)
        ! ================================================================
           ! Parameters 
           integer(${ik}$), parameter :: ntiny = 15_${ik}$
           integer(${ik}$), parameter :: kexnw = 5_${ik}$
           integer(${ik}$), parameter :: kexsh = 6_${ik}$
           real(${ck}$), parameter :: wilk1 = 0.75_${ck}$
           ! ==== matrices of order ntiny or smaller must be processed by
           ! .    stdlib${ii}$_${ci}$lahqr because of insufficient subdiagonal scratch space.
           ! .    (this is a hard limit.) ====
           
           ! ==== exceptional deflation windows:  try to cure rare
           ! .    slow convergence by varying the size of the
           ! .    deflation window after kexnw iterations. ====
           
           ! ==== exceptional shifts: try to cure rare slow convergence
           ! .    with ad-hoc exceptional shifts every kexsh iterations.
           ! .    ====
           
           ! ==== the constant wilk1 is used to form the exceptional
           ! .    shifts. ====
           
           
           
           ! Local Scalars 
           complex(${ck}$) :: aa, bb, cc, cdum, dd, det, rtdisc, swap, tr2
           real(${ck}$) :: s
           integer(${ik}$) :: i, inf, it, itmax, k, kacc22, kbot, kdu, ks, kt, ktop, ku, kv, kwh, &
           kwtop, kwv, ld, ls, lwkopt, ndec, ndfl, nh, nho, nibble, nmin, ns, nsmax, nsr, nve, nw,&
                      nwmax, nwr, nwupbd
           logical(lk) :: sorted
           character(len=2) :: jbcmpz
           ! Local Arrays 
           complex(${ck}$) :: zdum(1_${ik}$,1_${ik}$)
           ! Intrinsic Functions 
           ! Statement Functions 
           real(${ck}$) :: cabs1
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=${ck}$) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           info = 0_${ik}$
           ! ==== quick return for n = 0: nothing to do. ====
           if( n==0_${ik}$ ) then
              work( 1_${ik}$ ) = cone
              return
           end if
           if( n<=ntiny ) then
              ! ==== tiny matrices must use stdlib_${ci}$lahqr. ====
              lwkopt = 1_${ik}$
              if( lwork/=-1_${ik}$ )call stdlib${ii}$_${ci}$lahqr( wantt, wantz, n, ilo, ihi, h, ldh, w, iloz,ihiz, &
                        z, ldz, info )
           else
              ! ==== use small bulge multi-shift qr with aggressive early
              ! .    deflation on larger-than-tiny matrices. ====
              ! ==== hope for the best. ====
              info = 0_${ik}$
              ! ==== set up job flags for stdlib${ii}$_ilaenv. ====
              if( wantt ) then
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'S'
              else
                 jbcmpz( 1_${ik}$: 1_${ik}$ ) = 'E'
              end if
              if( wantz ) then
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'V'
              else
                 jbcmpz( 2_${ik}$: 2_${ik}$ ) = 'N'
              end if
              ! ==== nwr = recommended deflation window size.  at this
              ! .    point,  n > ntiny = 15, so there is enough
              ! .    subdiagonal workspace for nwr>=2 as required.
              ! .    (in fact, there is enough subdiagonal space for
              ! .    nwr>=4.) ====
              nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nwr = max( 2_${ik}$, nwr )
              nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr )
              ! ==== nsr = recommended number of simultaneous shifts.
              ! .    at this point n > ntiny = 15, so there is at
              ! .    enough subdiagonal workspace for nsr to be even
              ! .    and greater than or equal to two as required. ====
              nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nsr = min( nsr, ( n-3 ) / 6_${ik}$, ihi-ilo )
              nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) )
              ! ==== estimate optimal workspace ====
              ! ==== workspace query call to stdlib${ii}$_${ci}$laqr2 ====
              call stdlib${ii}$_${ci}$laqr2( wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz,ihiz, z, ldz, ls,&
                         ld, w, h, ldh, n, h, ldh, n, h,ldh, work, -1_${ik}$ )
              ! ==== optimal workspace = max(stdlib${ii}$_${ci}$laqr5, stdlib${ii}$_${ci}$laqr2) ====
              lwkopt = max( 3_${ik}$*nsr / 2_${ik}$, int( work( 1_${ik}$ ),KIND=${ik}$) )
              ! ==== quick return in case of workspace query. ====
              if( lwork==-1_${ik}$ ) then
                 work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=${ck}$)
                 return
              end if
              ! ==== stdlib${ii}$_${ci}$lahqr/stdlib${ii}$_${ci}$laqr0 crossover point ====
              nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nmin = max( ntiny, nmin )
              ! ==== nibble crossover point ====
              nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork )
              nibble = max( 0_${ik}$, nibble )
              ! ==== accumulate reflections during ttswp?  use block
              ! .    2-by-2 structure during matrix-matrix multiply? ====
              kacc22 = stdlib${ii}$_ilaenv( 16_${ik}$, 'ZLAQR4', jbcmpz, n, ilo, ihi, lwork )
              kacc22 = max( 0_${ik}$, kacc22 )
              kacc22 = min( 2_${ik}$, kacc22 )
              ! ==== nwmax = the largest possible deflation window for
              ! .    which there is sufficient workspace. ====
              nwmax = min( ( n-1 ) / 3_${ik}$, lwork / 2_${ik}$ )
              nw = nwmax
              ! ==== nsmax = the largest number of simultaneous shifts
              ! .    for which there is sufficient workspace. ====
              nsmax = min( ( n-3 ) / 6_${ik}$, 2_${ik}$*lwork / 3_${ik}$ )
              nsmax = nsmax - mod( nsmax, 2_${ik}$ )
              ! ==== ndfl: an iteration count restarted at deflation. ====
              ndfl = 1_${ik}$
              ! ==== itmax = iteration limit ====
              itmax = max( 30_${ik}$, 2_${ik}$*kexsh )*max( 10_${ik}$, ( ihi-ilo+1 ) )
              ! ==== last row and column in the active block ====
              kbot = ihi
              ! ==== main loop ====
              loop_70: do it = 1, itmax
                 ! ==== done when kbot falls below ilo ====
                 if( kbot<ilo )go to 80
                 ! ==== locate active block ====
                 do k = kbot, ilo + 1, -1
                    if( h( k, k-1 )==czero )go to 20
                 end do
                 k = ilo
                 20 continue
                 ktop = k
                 ! ==== select deflation window size:
                 ! .    typical case:
                 ! .      if possible and advisable, nibble the entire
                 ! .      active block.  if not, use size min(nwr,nwmax)
                 ! .      or min(nwr+1,nwmax) depending upon which has
                 ! .      the smaller corresponding subdiagonal entry
                 ! .      (a heuristic).
                 ! .    exceptional case:
                 ! .      if there have been no deflations in kexnw or
                 ! .      more iterations, then vary the deflation window
                 ! .      size.   at first, because, larger windows are,
                 ! .      in general, more powerful than smaller ones,
                 ! .      rapidly increase the window to the maximum possible.
                 ! .      then, gradually reduce the window size. ====
                 nh = kbot - ktop + 1_${ik}$
                 nwupbd = min( nh, nwmax )
                 if( ndfl<kexnw ) then
                    nw = min( nwupbd, nwr )
                 else
                    nw = min( nwupbd, 2_${ik}$*nw )
                 end if
                 if( nw<nwmax ) then
                    if( nw>=nh-1 ) then
                       nw = nh
                    else
                       kwtop = kbot - nw + 1_${ik}$
                       if( cabs1( h( kwtop, kwtop-1 ) )>cabs1( h( kwtop-1, kwtop-2 ) ) )nw = nw + &
                                 1_${ik}$
                    end if
                 end if
                 if( ndfl<kexnw ) then
                    ndec = -1_${ik}$
                 else if( ndec>=0_${ik}$ .or. nw>=nwupbd ) then
                    ndec = ndec + 1_${ik}$
                    if( nw-ndec<2_${ik}$ )ndec = 0_${ik}$
                    nw = nw - ndec
                 end if
                 ! ==== aggressive early deflation:
                 ! .    split workspace under the subdiagonal into
                 ! .      - an nw-by-nw work array v in the lower
                 ! .        left-hand-corner,
                 ! .      - an nw-by-at-least-nw-but-more-is-better
                 ! .        (nw-by-nho) horizontal work array along
                 ! .        the bottom edge,
                 ! .      - an at-least-nw-but-more-is-better (nhv-by-nw)
                 ! .        vertical work array along the left-hand-edge.
                 ! .        ====
                 kv = n - nw + 1_${ik}$
                 kt = nw + 1_${ik}$
                 nho = ( n-nw-1 ) - kt + 1_${ik}$
                 kwv = nw + 2_${ik}$
                 nve = ( n-nw ) - kwv + 1_${ik}$
                 ! ==== aggressive early deflation ====
                 call stdlib${ii}$_${ci}$laqr2( wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz,ihiz, z, ldz, &
                 ls, ld, w, h( kv, 1_${ik}$ ), ldh, nho,h( kv, kt ), ldh, nve, h( kwv, 1_${ik}$ ), ldh, work,&
                           lwork )
                 ! ==== adjust kbot accounting for new deflations. ====
                 kbot = kbot - ld
                 ! ==== ks points to the shifts. ====
                 ks = kbot - ls + 1_${ik}$
                 ! ==== skip an expensive qr sweep if there is a (partly
                 ! .    heuristic) reason to expect that many eigenvalues
                 ! .    will deflate without it.  here, the qr sweep is
                 ! .    skipped if many eigenvalues have just been deflated
                 ! .    or if the remaining active block is small.
                 if( ( ld==0_${ik}$ ) .or. ( ( 100_${ik}$*ld<=nw*nibble ) .and. ( kbot-ktop+1>min( nmin, nwmax )&
                            ) ) ) then
                    ! ==== ns = nominal number of simultaneous shifts.
                    ! .    this may be lowered (slightly) if stdlib${ii}$_${ci}$laqr2
                    ! .    did not provide that many shifts. ====
                    ns = min( nsmax, nsr, max( 2_${ik}$, kbot-ktop ) )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ! ==== if there have been no deflations
                    ! .    in a multiple of kexsh iterations,
                    ! .    then try exceptional shifts.
                    ! .    otherwise use shifts provided by
                    ! .    stdlib${ii}$_${ci}$laqr2 above or from the eigenvalues
                    ! .    of a trailing principal submatrix. ====
                    if( mod( ndfl, kexsh )==0_${ik}$ ) then
                       ks = kbot - ns + 1_${ik}$
                       do i = kbot, ks + 1, -2
                          w( i ) = h( i, i ) + wilk1*cabs1( h( i, i-1 ) )
                          w( i-1 ) = w( i )
                       end do
                    else
                       ! ==== got ns/2 or fewer shifts? use stdlib_${ci}$lahqr
                       ! .    on a trailing principal submatrix to
                       ! .    get more. (since ns<=nsmax<=(n-3)/6,
                       ! .    there is enough space below the subdiagonal
                       ! .    to fit an ns-by-ns scratch array.) ====
                       if( kbot-ks+1<=ns / 2_${ik}$ ) then
                          ks = kbot - ns + 1_${ik}$
                          kt = n - ns + 1_${ik}$
                          call stdlib${ii}$_${ci}$lacpy( 'A', ns, ns, h( ks, ks ), ldh,h( kt, 1_${ik}$ ), ldh )
                                    
                          call stdlib${ii}$_${ci}$lahqr( .false., .false., ns, 1_${ik}$, ns,h( kt, 1_${ik}$ ), ldh, w( ks )&
                                    , 1_${ik}$, 1_${ik}$, zdum,1_${ik}$, inf )
                          ks = ks + inf
                          ! ==== in case of a rare qr failure use
                          ! .    eigenvalues of the trailing 2-by-2
                          ! .    principal submatrix.  scale to avoid
                          ! .    overflows, underflows and subnormals.
                          ! .    (the scale factor s can not be czero,
                          ! .    because h(kbot,kbot-1) is nonzero.) ====
                          if( ks>=kbot ) then
                             s = cabs1( h( kbot-1, kbot-1 ) ) +cabs1( h( kbot, kbot-1 ) ) +cabs1( &
                                       h( kbot-1, kbot ) ) +cabs1( h( kbot, kbot ) )
                             aa = h( kbot-1, kbot-1 ) / s
                             cc = h( kbot, kbot-1 ) / s
                             bb = h( kbot-1, kbot ) / s
                             dd = h( kbot, kbot ) / s
                             tr2 = ( aa+dd ) / two
                             det = ( aa-tr2 )*( dd-tr2 ) - bb*cc
                             rtdisc = sqrt( -det )
                             w( kbot-1 ) = ( tr2+rtdisc )*s
                             w( kbot ) = ( tr2-rtdisc )*s
                             ks = kbot - 1_${ik}$
                          end if
                       end if
                       if( kbot-ks+1>ns ) then
                          ! ==== sort the shifts (helps a little) ====
                          sorted = .false.
                          do k = kbot, ks + 1, -1
                             if( sorted )go to 60
                             sorted = .true.
                             do i = ks, k - 1
                                if( cabs1( w( i ) )<cabs1( w( i+1 ) ) )then
                                   sorted = .false.
                                   swap = w( i )
                                   w( i ) = w( i+1 )
                                   w( i+1 ) = swap
                                end if
                             end do
                          end do
                          60 continue
                       end if
                    end if
                    ! ==== if there are only two shifts, then use
                    ! .    only cone.  ====
                    if( kbot-ks+1==2_${ik}$ ) then
                       if( cabs1( w( kbot )-h( kbot, kbot ) )<cabs1( w( kbot-1 )-h( kbot, kbot ) )&
                                  ) then
                          w( kbot-1 ) = w( kbot )
                       else
                          w( kbot ) = w( kbot-1 )
                       end if
                    end if
                    ! ==== use up to ns of the the smallest magnitude
                    ! .    shifts.  if there aren't ns shifts available,
                    ! .    then use them all, possibly dropping cone to
                    ! .    make the number of shifts even. ====
                    ns = min( ns, kbot-ks+1 )
                    ns = ns - mod( ns, 2_${ik}$ )
                    ks = kbot - ns + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep:
                    ! .    split workspace under the subdiagonal into
                    ! .    - a kdu-by-kdu work array u in the lower
                    ! .      left-hand-corner,
                    ! .    - a kdu-by-at-least-kdu-but-more-is-better
                    ! .      (kdu-by-nho) horizontal work array wh along
                    ! .      the bottom edge,
                    ! .    - and an at-least-kdu-but-more-is-better-by-kdu
                    ! .      (nve-by-kdu) vertical work wv arrow along
                    ! .      the left-hand-edge. ====
                    kdu = 2_${ik}$*ns
                    ku = n - kdu + 1_${ik}$
                    kwh = kdu + 1_${ik}$
                    nho = ( n-kdu+1-4 ) - ( kdu+1 ) + 1_${ik}$
                    kwv = kdu + 4_${ik}$
                    nve = n - kdu - kwv + 1_${ik}$
                    ! ==== small-bulge multi-shift qr sweep ====
                    call stdlib${ii}$_${ci}$laqr5( wantt, wantz, kacc22, n, ktop, kbot, ns,w( ks ), h, ldh, &
                    iloz, ihiz, z, ldz, work,3_${ik}$, h( ku, 1_${ik}$ ), ldh, nve, h( kwv, 1_${ik}$ ), ldh,nho, h( ku,&
                               kwh ), ldh )
                 end if
                 ! ==== note progress (or the lack of it). ====
                 if( ld>0_${ik}$ ) then
                    ndfl = 1_${ik}$
                 else
                    ndfl = ndfl + 1_${ik}$
                 end if
                 ! ==== end of main loop ====
              end do loop_70
              ! ==== iteration limit exceeded.  set info to show where
              ! .    the problem occurred and exit. ====
              info = kbot
              80 continue
           end if
           ! ==== return the optimal value of lwork. ====
           work( 1_${ik}$ ) = cmplx( lwkopt, 0_${ik}$,KIND=${ck}$)
     end subroutine stdlib${ii}$_${ci}$laqr4

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slaqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts,sr, si, h, ldh, &
     !! SLAQR5 , called by SLAQR0, performs a
     !! single small-bulge multi-shift QR sweep.
               iloz, ihiz, z, ldz, v, ldv, u,ldu, nv, wv, ldwv, nh, wh, ldwh )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv, ldwh, ldwv, &
                     ldz, n, nh, nshfts, nv
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           real(sp), intent(inout) :: h(ldh,*), si(*), sr(*), z(ldz,*)
           real(sp), intent(out) :: u(ldu,*), v(ldv,*), wh(ldwh,*), wv(ldwv,*)
        ! ================================================================
           
           ! Local Scalars 
           real(sp) :: alpha, beta, h11, h12, h21, h22, refsum, safmax, safmin, scl, smlnum, swap,&
                      tst1, tst2, ulp
           integer(${ik}$) :: i, i2, i4, incol, j, jbot, jcol, jlen, jrow, jtop, k, k1, kdu, kms, &
                     krcol, m, m22, mbot, mtop, nbmps, ndcol, ns, nu
           logical(lk) :: accum, bmp22
           ! Intrinsic Functions 
           ! Local Arrays 
           real(sp) :: vt(3_${ik}$)
           ! Executable Statements 
           ! ==== if there are no shifts, then there is nothing to do. ====
           if( nshfts<2 )return
           ! ==== if the active block is empty or 1-by-1, then there
           ! .    is nothing to do. ====
           if( ktop>=kbot )return
           ! ==== shuffle shifts into pairs of real shifts and pairs
           ! .    of complex conjugate shifts assuming complex
           ! .    conjugate shifts are already adjacent to one
           ! .    another. ====
           do i = 1, nshfts - 2, 2
              if( si( i )/=-si( i+1 ) ) then
                 swap = sr( i )
                 sr( i ) = sr( i+1 )
                 sr( i+1 ) = sr( i+2 )
                 sr( i+2 ) = swap
                 swap = si( i )
                 si( i ) = si( i+1 )
                 si( i+1 ) = si( i+2 )
                 si( i+2 ) = swap
              end if
           end do
           ! ==== nshfts is supposed to be even, but if it is odd,
           ! .    then simply reduce it by one.  the shuffle above
           ! .    ensures that the dropped shift is real and that
           ! .    the remaining shifts are paired. ====
           ns = nshfts - mod( nshfts, 2_${ik}$ )
           ! ==== machine constants for deflation ====
           safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_slabad( safmin, safmax )
           ulp = stdlib${ii}$_slamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=sp) / ulp )
           ! ==== use accumulated reflections to update far-from-diagonal
           ! .    entries ? ====
           accum = ( kacc22==1_${ik}$ ) .or. ( kacc22==2_${ik}$ )
           ! ==== clear trash ====
           if( ktop+2<=kbot )h( ktop+2, ktop ) = zero
           ! ==== nbmps = number of 2-shift bulges in the chain ====
           nbmps = ns / 2_${ik}$
           ! ==== kdu = width of slab ====
           kdu = 4_${ik}$*nbmps
           ! ==== create and chase chains of nbmps bulges ====
           loop_180: do incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
              ! jtop = index from which updates from the right start.
              if( accum ) then
                 jtop = max( ktop, incol )
              else if( wantt ) then
                 jtop = 1_${ik}$
              else
                 jtop = ktop
              end if
              ndcol = incol + kdu
              if( accum )call stdlib${ii}$_slaset( 'ALL', kdu, kdu, zero, one, u, ldu )
              ! ==== near-the-diagonal bulge chase.  the following loop
              ! .    performs the near-the-diagonal part of a small bulge
              ! .    multi-shift qr sweep.  each 4*nbmps column diagonal
              ! .    chunk extends from column incol to column ndcol
              ! .    (including both column incol and column ndcol). the
              ! .    following loop chases a 2*nbmps+1 column long chain of
              ! .    nbmps bulges 2*nbmps-1 columns to the right.  (incol
              ! .    may be less than ktop and and ndcol may be greater than
              ! .    kbot indicating phantom columns from which to chase
              ! .    bulges before they are actually introduced or to which
              ! .    to chase bulges beyond column kbot.)  ====
              loop_145: do krcol = incol, min( incol+2*nbmps-1, kbot-2 )
                 ! ==== bulges number mtop to mbot are active double implicit
                 ! .    shift bulges.  there may or may not also be small
                 ! .    2-by-2 bulge, if there is room.  the inactive bulges
                 ! .    (if any) must wait until the active bulges have moved
                 ! .    down the diagonal to make room.  the phantom matrix
                 ! .    paradigm described above helps keep track.  ====
                 mtop = max( 1_${ik}$, ( ktop-krcol ) / 2_${ik}$+1 )
                 mbot = min( nbmps, ( kbot-krcol-1 ) / 2_${ik}$ )
                 m22 = mbot + 1_${ik}$
                 bmp22 = ( mbot<nbmps ) .and. ( krcol+2*( m22-1 ) )==( kbot-2 )
                 ! ==== generate reflections to chase the chain right
                 ! .    one column.  (the minimum value of k is ktop-1.) ====
                 if ( bmp22 ) then
                    ! ==== special case: 2-by-2 reflection at bottom treated
                    ! .    separately ====
                    k = krcol + 2_${ik}$*( m22-1 )
                    if( k==ktop-1 ) then
                       call stdlib${ii}$_slaqr1( 2_${ik}$, h( k+1, k+1 ), ldh, sr( 2_${ik}$*m22-1 ),si( 2_${ik}$*m22-1 ), sr(&
                                  2_${ik}$*m22 ), si( 2_${ik}$*m22 ),v( 1_${ik}$, m22 ) )
                       beta = v( 1_${ik}$, m22 )
                       call stdlib${ii}$_slarfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) )
                    else
                       beta = h( k+1, k )
                       v( 2_${ik}$, m22 ) = h( k+2, k )
                       call stdlib${ii}$_slarfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) )
                       h( k+1, k ) = beta
                       h( k+2, k ) = zero
                    end if
                    ! ==== perform update from right within
                    ! .    computational window. ====
                    do j = jtop, min( kbot, k+3 )
                       refsum = v( 1_${ik}$, m22 )*( h( j, k+1 )+v( 2_${ik}$, m22 )*h( j, k+2 ) )
                       h( j, k+1 ) = h( j, k+1 ) - refsum
                       h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2_${ik}$, m22 )
                    end do
                    ! ==== perform update from left within
                    ! .    computational window. ====
                    if( accum ) then
                       jbot = min( ndcol, kbot )
                    else if( wantt ) then
                       jbot = n
                    else
                       jbot = kbot
                    end if
                    do j = k+1, jbot
                       refsum = v( 1_${ik}$, m22 )*( h( k+1, j )+v( 2_${ik}$, m22 )*h( k+2, j ) )
                       h( k+1, j ) = h( k+1, j ) - refsum
                       h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m22 )
                    end do
                    ! ==== the following convergence test requires that
                    ! .    the tradition small-compared-to-nearby-diagonals
                    ! .    criterion and the ahues
                    ! .    criteria both be satisfied.  the latter improves
                    ! .    accuracy in some examples. falling back on an
                    ! .    alternate convergence criterion when tst1 or tst2
                    ! .    is zero (as done here) is traditional but probably
                    ! .    unnecessary. ====
                    if( k>=ktop ) then
                       if( h( k+1, k )/=zero ) then
                          tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
                          if( tst1==zero ) then
                             if( k>=ktop+1 )tst1 = tst1 + abs( h( k, k-1 ) )
                             if( k>=ktop+2 )tst1 = tst1 + abs( h( k, k-2 ) )
                             if( k>=ktop+3 )tst1 = tst1 + abs( h( k, k-3 ) )
                             if( k<=kbot-2 )tst1 = tst1 + abs( h( k+2, k+1 ) )
                             if( k<=kbot-3 )tst1 = tst1 + abs( h( k+3, k+1 ) )
                             if( k<=kbot-4 )tst1 = tst1 + abs( h( k+4, k+1 ) )
                          end if
                          if( abs( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then
                             h12 = max( abs( h( k+1, k ) ),abs( h( k, k+1 ) ) )
                             h21 = min( abs( h( k+1, k ) ),abs( h( k, k+1 ) ) )
                             h11 = max( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) )
                                       
                             h22 = min( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) )
                                       
                             scl = h11 + h12
                             tst2 = h22*( h11 / scl )
                             if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) ) &
                                       then
                                h( k+1, k ) = zero
                             end if
                          end if
                       end if
                    end if
                    ! ==== accumulate orthogonal transformations. ====
                    if( accum ) then
                       kms = k - incol
                       do j = max( 1, ktop-incol ), kdu
                          refsum = v( 1_${ik}$, m22 )*( u( j, kms+1 )+v( 2_${ik}$, m22 )*u( j, kms+2 ) )
                          u( j, kms+1 ) = u( j, kms+1 ) - refsum
                          u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2_${ik}$, m22 )
                       end do
                    else if( wantz ) then
                       do j = iloz, ihiz
                          refsum = v( 1_${ik}$, m22 )*( z( j, k+1 )+v( 2_${ik}$, m22 )*z( j, k+2 ) )
                          z( j, k+1 ) = z( j, k+1 ) - refsum
                          z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2_${ik}$, m22 )
                       end do
                    end if
                 end if
                 ! ==== normal case: chain of 3-by-3 reflections ====
                 loop_80: do m = mbot, mtop, -1
                    k = krcol + 2_${ik}$*( m-1 )
                    if( k==ktop-1 ) then
                       call stdlib${ii}$_slaqr1( 3_${ik}$, h( ktop, ktop ), ldh, sr( 2_${ik}$*m-1 ),si( 2_${ik}$*m-1 ), sr( &
                                 2_${ik}$*m ), si( 2_${ik}$*m ),v( 1_${ik}$, m ) )
                       alpha = v( 1_${ik}$, m )
                       call stdlib${ii}$_slarfg( 3_${ik}$, alpha, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) )
                    else
                       ! ==== perform delayed transformation of row below
                       ! .    mth bulge. exploit fact that first two elements
                       ! .    of row are actually zero. ====
                       refsum = v( 1_${ik}$, m )*v( 3_${ik}$, m )*h( k+3, k+2 )
                       h( k+3, k   ) = -refsum
                       h( k+3, k+1 ) = -refsum*v( 2_${ik}$, m )
                       h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*v( 3_${ik}$, m )
                       ! ==== calculate reflection to move
                       ! .    mth bulge one step. ====
                       beta      = h( k+1, k )
                       v( 2_${ik}$, m ) = h( k+2, k )
                       v( 3_${ik}$, m ) = h( k+3, k )
                       call stdlib${ii}$_slarfg( 3_${ik}$, beta, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) )
                       ! ==== a bulge may collapse because of vigilant
                       ! .    deflation or destructive underflow.  in the
                       ! .    underflow case, try the two-small-subdiagonals
                       ! .    trick to try to reinflate the bulge.  ====
                       if( h( k+3, k )/=zero .or. h( k+3, k+1 )/=zero .or. h( k+3, k+2 )==zero ) &
                                 then
                          ! ==== typical case: not collapsed (yet). ====
                          h( k+1, k ) = beta
                          h( k+2, k ) = zero
                          h( k+3, k ) = zero
                       else
                          ! ==== atypical case: collapsed.  attempt to
                          ! .    reintroduce ignoring h(k+1,k) and h(k+2,k).
                          ! .    if the fill resulting from the new
                          ! .    reflector is too large, then abandon it.
                          ! .    otherwise, use the new one. ====
                          call stdlib${ii}$_slaqr1( 3_${ik}$, h( k+1, k+1 ), ldh, sr( 2_${ik}$*m-1 ),si( 2_${ik}$*m-1 ), sr( &
                                    2_${ik}$*m ), si( 2_${ik}$*m ),vt )
                          alpha = vt( 1_${ik}$ )
                          call stdlib${ii}$_slarfg( 3_${ik}$, alpha, vt( 2_${ik}$ ), 1_${ik}$, vt( 1_${ik}$ ) )
                          refsum = vt( 1_${ik}$ )*( h( k+1, k )+vt( 2_${ik}$ )*h( k+2, k ) )
                          if( abs( h( k+2, k )-refsum*vt( 2_${ik}$ ) )+abs( refsum*vt( 3_${ik}$ ) )>ulp*( abs( &
                                    h( k, k ) )+abs( h( k+1,k+1 ) )+abs( h( k+2, k+2 ) ) ) ) then
                             ! ==== starting a new bulge here would
                             ! .    create non-negligible fill.  use
                             ! .    the old one with trepidation. ====
                             h( k+1, k ) = beta
                             h( k+2, k ) = zero
                             h( k+3, k ) = zero
                          else
                             ! ==== starting a new bulge here would
                             ! .    create only negligible fill.
                             ! .    replace the old reflector with
                             ! .    the new one. ====
                             h( k+1, k ) = h( k+1, k ) - refsum
                             h( k+2, k ) = zero
                             h( k+3, k ) = zero
                             v( 1_${ik}$, m ) = vt( 1_${ik}$ )
                             v( 2_${ik}$, m ) = vt( 2_${ik}$ )
                             v( 3_${ik}$, m ) = vt( 3_${ik}$ )
                          end if
                       end if
                    end if
                    ! ====  apply reflection from the right and
                    ! .     the first column of update from the left.
                    ! .     these updates are required for the vigilant
                    ! .     deflation check. we still delay most of the
                    ! .     updates from the left for efficiency. ====
                    do j = jtop, min( kbot, k+3 )
                       refsum = v( 1_${ik}$, m )*( h( j, k+1 )+v( 2_${ik}$, m )*h( j, k+2 )+v( 3_${ik}$, m )*h( j, k+3 &
                                 ) )
                       h( j, k+1 ) = h( j, k+1 ) - refsum
                       h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2_${ik}$, m )
                       h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3_${ik}$, m )
                    end do
                    ! ==== perform update from left for subsequent
                    ! .    column. ====
                    refsum = v( 1_${ik}$, m )*( h( k+1, k+1 )+v( 2_${ik}$, m )*h( k+2, k+1 )+v( 3_${ik}$, m )*h( k+3, &
                              k+1 ) )
                    h( k+1, k+1 ) = h( k+1, k+1 ) - refsum
                    h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2_${ik}$, m )
                    h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3_${ik}$, m )
                    ! ==== the following convergence test requires that
                    ! .    the tradition small-compared-to-nearby-diagonals
                    ! .    criterion and the ahues
                    ! .    criteria both be satisfied.  the latter improves
                    ! .    accuracy in some examples. falling back on an
                    ! .    alternate convergence criterion when tst1 or tst2
                    ! .    is zero (as done here) is traditional but probably
                    ! .    unnecessary. ====
                    if( k<ktop)cycle
                    if( h( k+1, k )/=zero ) then
                       tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
                       if( tst1==zero ) then
                          if( k>=ktop+1 )tst1 = tst1 + abs( h( k, k-1 ) )
                          if( k>=ktop+2 )tst1 = tst1 + abs( h( k, k-2 ) )
                          if( k>=ktop+3 )tst1 = tst1 + abs( h( k, k-3 ) )
                          if( k<=kbot-2 )tst1 = tst1 + abs( h( k+2, k+1 ) )
                          if( k<=kbot-3 )tst1 = tst1 + abs( h( k+3, k+1 ) )
                          if( k<=kbot-4 )tst1 = tst1 + abs( h( k+4, k+1 ) )
                       end if
                       if( abs( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then
                          h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
                          h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
                          h11 = max( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) )
                          h22 = min( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) )
                          scl = h11 + h12
                          tst2 = h22*( h11 / scl )
                          if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) ) &
                                    then
                             h( k+1, k ) = zero
                          end if
                       end if
                    end if
                 end do loop_80
                 ! ==== multiply h by reflections from the left ====
                 if( accum ) then
                    jbot = min( ndcol, kbot )
                 else if( wantt ) then
                    jbot = n
                 else
                    jbot = kbot
                 end if
                 do m = mbot, mtop, -1
                    k = krcol + 2_${ik}$*( m-1 )
                    do j = max( ktop, krcol + 2*m ), jbot
                       refsum = v( 1_${ik}$, m )*( h( k+1, j )+v( 2_${ik}$, m )*h( k+2, j )+v( 3_${ik}$, m )*h( k+3, j &
                                 ) )
                       h( k+1, j ) = h( k+1, j ) - refsum
                       h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m )
                       h( k+3, j ) = h( k+3, j ) - refsum*v( 3_${ik}$, m )
                    end do
                 end do
                 ! ==== accumulate orthogonal transformations. ====
                 if( accum ) then
                    ! ==== accumulate u. (if needed, update z later
                    ! .    with an efficient matrix-matrix
                    ! .    multiply.) ====
                    do m = mbot, mtop, -1
                       k = krcol + 2_${ik}$*( m-1 )
                       kms = k - incol
                       i2 = max( 1_${ik}$, ktop-incol )
                       i2 = max( i2, kms-(krcol-incol)+1_${ik}$ )
                       i4 = min( kdu, krcol + 2_${ik}$*( mbot-1 ) - incol + 5_${ik}$ )
                       do j = i2, i4
                          refsum = v( 1_${ik}$, m )*( u( j, kms+1 )+v( 2_${ik}$, m )*u( j, kms+2 )+v( 3_${ik}$, m )*u( &
                                    j, kms+3 ) )
                          u( j, kms+1 ) = u( j, kms+1 ) - refsum
                          u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2_${ik}$, m )
                          u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3_${ik}$, m )
                       end do
                    end do
                 else if( wantz ) then
                    ! ==== u is not accumulated, so update z
                    ! .    now by multiplying by reflections
                    ! .    from the right. ====
                    do m = mbot, mtop, -1
                       k = krcol + 2_${ik}$*( m-1 )
                       do j = iloz, ihiz
                          refsum = v( 1_${ik}$, m )*( z( j, k+1 )+v( 2_${ik}$, m )*z( j, k+2 )+v( 3_${ik}$, m )*z( j, &
                                    k+3 ) )
                          z( j, k+1 ) = z( j, k+1 ) - refsum
                          z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2_${ik}$, m )
                          z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3_${ik}$, m )
                       end do
                    end do
                 end if
                 ! ==== end of near-the-diagonal bulge chase. ====
              end do loop_145
              ! ==== use u (if accumulated) to update far-from-diagonal
              ! .    entries in h.  if required, use u to update z as
              ! .    well. ====
              if( accum ) then
                 if( wantt ) then
                    jtop = 1_${ik}$
                    jbot = n
                 else
                    jtop = ktop
                    jbot = kbot
                 end if
                 k1 = max( 1_${ik}$, ktop-incol )
                 nu = ( kdu-max( 0_${ik}$, ndcol-kbot ) ) - k1 + 1_${ik}$
                 ! ==== horizontal multiply ====
                 do jcol = min( ndcol, kbot ) + 1, jbot, nh
                    jlen = min( nh, jbot-jcol+1 )
                    call stdlib${ii}$_sgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),ldu, h( incol+k1, &
                              jcol ), ldh, zero, wh,ldwh )
                    call stdlib${ii}$_slacpy( 'ALL', nu, jlen, wh, ldwh,h( incol+k1, jcol ), ldh )
                              
                 end do
                 ! ==== vertical multiply ====
                 do jrow = jtop, max( ktop, incol ) - 1, nv
                    jlen = min( nv, max( ktop, incol )-jrow )
                    call stdlib${ii}$_sgemm( 'N', 'N', jlen, nu, nu, one,h( jrow, incol+k1 ), ldh, u( &
                              k1, k1 ),ldu, zero, wv, ldwv )
                    call stdlib${ii}$_slacpy( 'ALL', jlen, nu, wv, ldwv,h( jrow, incol+k1 ), ldh )
                              
                 end do
                 ! ==== z multiply (also vertical) ====
                 if( wantz ) then
                    do jrow = iloz, ihiz, nv
                       jlen = min( nv, ihiz-jrow+1 )
                       call stdlib${ii}$_sgemm( 'N', 'N', jlen, nu, nu, one,z( jrow, incol+k1 ), ldz, u(&
                                  k1, k1 ),ldu, zero, wv, ldwv )
                       call stdlib${ii}$_slacpy( 'ALL', jlen, nu, wv, ldwv,z( jrow, incol+k1 ), ldz )
                                 
                    end do
                 end if
              end if
           end do loop_180
     end subroutine stdlib${ii}$_slaqr5

     pure module subroutine stdlib${ii}$_dlaqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts,sr, si, h, ldh, &
     !! DLAQR5 , called by DLAQR0, performs a
     !! single small-bulge multi-shift QR sweep.
               iloz, ihiz, z, ldz, v, ldv, u,ldu, nv, wv, ldwv, nh, wh, ldwh )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv, ldwh, ldwv, &
                     ldz, n, nh, nshfts, nv
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           real(dp), intent(inout) :: h(ldh,*), si(*), sr(*), z(ldz,*)
           real(dp), intent(out) :: u(ldu,*), v(ldv,*), wh(ldwh,*), wv(ldwv,*)
        ! ================================================================
           
           ! Local Scalars 
           real(dp) :: alpha, beta, h11, h12, h21, h22, refsum, safmax, safmin, scl, smlnum, swap,&
                      tst1, tst2, ulp
           integer(${ik}$) :: i, i2, i4, incol, j, jbot, jcol, jlen, jrow, jtop, k, k1, kdu, kms, &
                     krcol, m, m22, mbot, mtop, nbmps, ndcol, ns, nu
           logical(lk) :: accum, bmp22
           ! Intrinsic Functions 
           ! Local Arrays 
           real(dp) :: vt(3_${ik}$)
           ! Executable Statements 
           ! ==== if there are no shifts, then there is nothing to do. ====
           if( nshfts<2 )return
           ! ==== if the active block is empty or 1-by-1, then there
           ! .    is nothing to do. ====
           if( ktop>=kbot )return
           ! ==== shuffle shifts into pairs of real shifts and pairs
           ! .    of complex conjugate shifts assuming complex
           ! .    conjugate shifts are already adjacent to one
           ! .    another. ====
           do i = 1, nshfts - 2, 2
              if( si( i )/=-si( i+1 ) ) then
                 swap = sr( i )
                 sr( i ) = sr( i+1 )
                 sr( i+1 ) = sr( i+2 )
                 sr( i+2 ) = swap
                 swap = si( i )
                 si( i ) = si( i+1 )
                 si( i+1 ) = si( i+2 )
                 si( i+2 ) = swap
              end if
           end do
           ! ==== nshfts is supposed to be even, but if it is odd,
           ! .    then simply reduce it by one.  the shuffle above
           ! .    ensures that the dropped shift is real and that
           ! .    the remaining shifts are paired. ====
           ns = nshfts - mod( nshfts, 2_${ik}$ )
           ! ==== machine constants for deflation ====
           safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_dlabad( safmin, safmax )
           ulp = stdlib${ii}$_dlamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=dp) / ulp )
           ! ==== use accumulated reflections to update far-from-diagonal
           ! .    entries ? ====
           accum = ( kacc22==1_${ik}$ ) .or. ( kacc22==2_${ik}$ )
           ! ==== clear trash ====
           if( ktop+2<=kbot )h( ktop+2, ktop ) = zero
           ! ==== nbmps = number of 2-shift bulges in the chain ====
           nbmps = ns / 2_${ik}$
           ! ==== kdu = width of slab ====
           kdu = 4_${ik}$*nbmps
           ! ==== create and chase chains of nbmps bulges ====
           loop_180: do incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
              ! jtop = index from which updates from the right start.
              if( accum ) then
                 jtop = max( ktop, incol )
              else if( wantt ) then
                 jtop = 1_${ik}$
              else
                 jtop = ktop
              end if
              ndcol = incol + kdu
              if( accum )call stdlib${ii}$_dlaset( 'ALL', kdu, kdu, zero, one, u, ldu )
              ! ==== near-the-diagonal bulge chase.  the following loop
              ! .    performs the near-the-diagonal part of a small bulge
              ! .    multi-shift qr sweep.  each 4*nbmps column diagonal
              ! .    chunk extends from column incol to column ndcol
              ! .    (including both column incol and column ndcol). the
              ! .    following loop chases a 2*nbmps+1 column long chain of
              ! .    nbmps bulges 2*nbmps columns to the right.  (incol
              ! .    may be less than ktop and and ndcol may be greater than
              ! .    kbot indicating phantom columns from which to chase
              ! .    bulges before they are actually introduced or to which
              ! .    to chase bulges beyond column kbot.)  ====
              loop_145: do krcol = incol, min( incol+2*nbmps-1, kbot-2 )
                 ! ==== bulges number mtop to mbot are active double implicit
                 ! .    shift bulges.  there may or may not also be small
                 ! .    2-by-2 bulge, if there is room.  the inactive bulges
                 ! .    (if any) must wait until the active bulges have moved
                 ! .    down the diagonal to make room.  the phantom matrix
                 ! .    paradigm described above helps keep track.  ====
                 mtop = max( 1_${ik}$, ( ktop-krcol ) / 2_${ik}$+1 )
                 mbot = min( nbmps, ( kbot-krcol-1 ) / 2_${ik}$ )
                 m22 = mbot + 1_${ik}$
                 bmp22 = ( mbot<nbmps ) .and. ( krcol+2*( m22-1 ) )==( kbot-2 )
                 ! ==== generate reflections to chase the chain right
                 ! .    one column.  (the minimum value of k is ktop-1.) ====
                 if ( bmp22 ) then
                    ! ==== special case: 2-by-2 reflection at bottom treated
                    ! .    separately ====
                    k = krcol + 2_${ik}$*( m22-1 )
                    if( k==ktop-1 ) then
                       call stdlib${ii}$_dlaqr1( 2_${ik}$, h( k+1, k+1 ), ldh, sr( 2_${ik}$*m22-1 ),si( 2_${ik}$*m22-1 ), sr(&
                                  2_${ik}$*m22 ), si( 2_${ik}$*m22 ),v( 1_${ik}$, m22 ) )
                       beta = v( 1_${ik}$, m22 )
                       call stdlib${ii}$_dlarfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) )
                    else
                       beta = h( k+1, k )
                       v( 2_${ik}$, m22 ) = h( k+2, k )
                       call stdlib${ii}$_dlarfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) )
                       h( k+1, k ) = beta
                       h( k+2, k ) = zero
                    end if
                    ! ==== perform update from right within
                    ! .    computational window. ====
                    do j = jtop, min( kbot, k+3 )
                       refsum = v( 1_${ik}$, m22 )*( h( j, k+1 )+v( 2_${ik}$, m22 )*h( j, k+2 ) )
                       h( j, k+1 ) = h( j, k+1 ) - refsum
                       h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2_${ik}$, m22 )
                    end do
                    ! ==== perform update from left within
                    ! .    computational window. ====
                    if( accum ) then
                       jbot = min( ndcol, kbot )
                    else if( wantt ) then
                       jbot = n
                    else
                       jbot = kbot
                    end if
                    do j = k+1, jbot
                       refsum = v( 1_${ik}$, m22 )*( h( k+1, j )+v( 2_${ik}$, m22 )*h( k+2, j ) )
                       h( k+1, j ) = h( k+1, j ) - refsum
                       h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m22 )
                    end do
                    ! ==== the following convergence test requires that
                    ! .    the tradition small-compared-to-nearby-diagonals
                    ! .    criterion and the ahues
                    ! .    criteria both be satisfied.  the latter improves
                    ! .    accuracy in some examples. falling back on an
                    ! .    alternate convergence criterion when tst1 or tst2
                    ! .    is zero (as done here) is traditional but probably
                    ! .    unnecessary. ====
                    if( k>=ktop ) then
                       if( h( k+1, k )/=zero ) then
                          tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
                          if( tst1==zero ) then
                             if( k>=ktop+1 )tst1 = tst1 + abs( h( k, k-1 ) )
                             if( k>=ktop+2 )tst1 = tst1 + abs( h( k, k-2 ) )
                             if( k>=ktop+3 )tst1 = tst1 + abs( h( k, k-3 ) )
                             if( k<=kbot-2 )tst1 = tst1 + abs( h( k+2, k+1 ) )
                             if( k<=kbot-3 )tst1 = tst1 + abs( h( k+3, k+1 ) )
                             if( k<=kbot-4 )tst1 = tst1 + abs( h( k+4, k+1 ) )
                          end if
                          if( abs( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) ) then
                             h12 = max( abs( h( k+1, k ) ),abs( h( k, k+1 ) ) )
                             h21 = min( abs( h( k+1, k ) ),abs( h( k, k+1 ) ) )
                             h11 = max( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) )
                                       
                             h22 = min( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) )
                                       
                             scl = h11 + h12
                             tst2 = h22*( h11 / scl )
                             if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) ) &
                                       then
                                h( k+1, k ) = zero
                             end if
                          end if
                       end if
                    end if
                    ! ==== accumulate orthogonal transformations. ====
                    if( accum ) then
                       kms = k - incol
                       do j = max( 1, ktop-incol ), kdu
                          refsum = v( 1_${ik}$, m22 )*( u( j, kms+1 )+v( 2_${ik}$, m22 )*u( j, kms+2 ) )
                          u( j, kms+1 ) = u( j, kms+1 ) - refsum
                          u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2_${ik}$, m22 )
                       end do
                    else if( wantz ) then
                       do j = iloz, ihiz
                          refsum = v( 1_${ik}$, m22 )*( z( j, k+1 )+v( 2_${ik}$, m22 )*z( j, k+2 ) )
                          z( j, k+1 ) = z( j, k+1 ) - refsum
                          z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2_${ik}$, m22 )
                       end do
                    end if
                 end if
                 ! ==== normal case: chain of 3-by-3 reflections ====
                 loop_80: do m = mbot, mtop, -1
                    k = krcol + 2_${ik}$*( m-1 )
                    if( k==ktop-1 ) then
                       call stdlib${ii}$_dlaqr1( 3_${ik}$, h( ktop, ktop ), ldh, sr( 2_${ik}$*m-1 ),si( 2_${ik}$*m-1 ), sr( &
                                 2_${ik}$*m ), si( 2_${ik}$*m ),v( 1_${ik}$, m ) )
                       alpha = v( 1_${ik}$, m )
                       call stdlib${ii}$_dlarfg( 3_${ik}$, alpha, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) )
                    else
                       ! ==== perform delayed transformation of row below
                       ! .    mth bulge. exploit fact that first two elements
                       ! .    of row are actually zero. ====
                       refsum = v( 1_${ik}$, m )*v( 3_${ik}$, m )*h( k+3, k+2 )
                       h( k+3, k   ) = -refsum
                       h( k+3, k+1 ) = -refsum*v( 2_${ik}$, m )
                       h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*v( 3_${ik}$, m )
                       ! ==== calculate reflection to move
                       ! .    mth bulge one step. ====
                       beta      = h( k+1, k )
                       v( 2_${ik}$, m ) = h( k+2, k )
                       v( 3_${ik}$, m ) = h( k+3, k )
                       call stdlib${ii}$_dlarfg( 3_${ik}$, beta, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) )
                       ! ==== a bulge may collapse because of vigilant
                       ! .    deflation or destructive underflow.  in the
                       ! .    underflow case, try the two-small-subdiagonals
                       ! .    trick to try to reinflate the bulge.  ====
                       if( h( k+3, k )/=zero .or. h( k+3, k+1 )/=zero .or. h( k+3, k+2 )==zero ) &
                                 then
                          ! ==== typical case: not collapsed (yet). ====
                          h( k+1, k ) = beta
                          h( k+2, k ) = zero
                          h( k+3, k ) = zero
                       else
                          ! ==== atypical case: collapsed.  attempt to
                          ! .    reintroduce ignoring h(k+1,k) and h(k+2,k).
                          ! .    if the fill resulting from the new
                          ! .    reflector is too large, then abandon it.
                          ! .    otherwise, use the new one. ====
                          call stdlib${ii}$_dlaqr1( 3_${ik}$, h( k+1, k+1 ), ldh, sr( 2_${ik}$*m-1 ),si( 2_${ik}$*m-1 ), sr( &
                                    2_${ik}$*m ), si( 2_${ik}$*m ),vt )
                          alpha = vt( 1_${ik}$ )
                          call stdlib${ii}$_dlarfg( 3_${ik}$, alpha, vt( 2_${ik}$ ), 1_${ik}$, vt( 1_${ik}$ ) )
                          refsum = vt( 1_${ik}$ )*( h( k+1, k )+vt( 2_${ik}$ )*h( k+2, k ) )
                          if( abs( h( k+2, k )-refsum*vt( 2_${ik}$ ) )+abs( refsum*vt( 3_${ik}$ ) )>ulp*( abs( &
                                    h( k, k ) )+abs( h( k+1,k+1 ) )+abs( h( k+2, k+2 ) ) ) ) then
                             ! ==== starting a new bulge here would
                             ! .    create non-negligible fill.  use
                             ! .    the old one with trepidation. ====
                             h( k+1, k ) = beta
                             h( k+2, k ) = zero
                             h( k+3, k ) = zero
                          else
                             ! ==== starting a new bulge here would
                             ! .    create only negligible fill.
                             ! .    replace the old reflector with
                             ! .    the new one. ====
                             h( k+1, k ) = h( k+1, k ) - refsum
                             h( k+2, k ) = zero
                             h( k+3, k ) = zero
                             v( 1_${ik}$, m ) = vt( 1_${ik}$ )
                             v( 2_${ik}$, m ) = vt( 2_${ik}$ )
                             v( 3_${ik}$, m ) = vt( 3_${ik}$ )
                          end if
                       end if
                    end if
                    ! ====  apply reflection from the right and
                    ! .     the first column of update from the left.
                    ! .     these updates are required for the vigilant
                    ! .     deflation check. we still delay most of the
                    ! .     updates from the left for efficiency. ====
                    do j = jtop, min( kbot, k+3 )
                       refsum = v( 1_${ik}$, m )*( h( j, k+1 )+v( 2_${ik}$, m )*h( j, k+2 )+v( 3_${ik}$, m )*h( j, k+3 &
                                 ) )
                       h( j, k+1 ) = h( j, k+1 ) - refsum
                       h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2_${ik}$, m )
                       h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3_${ik}$, m )
                    end do
                    ! ==== perform update from left for subsequent
                    ! .    column. ====
                    refsum = v( 1_${ik}$, m )*( h( k+1, k+1 )+v( 2_${ik}$, m )*h( k+2, k+1 )+v( 3_${ik}$, m )*h( k+3, &
                              k+1 ) )
                    h( k+1, k+1 ) = h( k+1, k+1 ) - refsum
                    h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2_${ik}$, m )
                    h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3_${ik}$, m )
                    ! ==== the following convergence test requires that
                    ! .    the tradition small-compared-to-nearby-diagonals
                    ! .    criterion and the ahues
                    ! .    criteria both be satisfied.  the latter improves
                    ! .    accuracy in some examples. falling back on an
                    ! .    alternate convergence criterion when tst1 or tst2
                    ! .    is zero (as done here) is traditional but probably
                    ! .    unnecessary. ====
                    if( k<ktop)cycle
                    if( h( k+1, k )/=zero ) then
                       tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
                       if( tst1==zero ) then
                          if( k>=ktop+1 )tst1 = tst1 + abs( h( k, k-1 ) )
                          if( k>=ktop+2 )tst1 = tst1 + abs( h( k, k-2 ) )
                          if( k>=ktop+3 )tst1 = tst1 + abs( h( k, k-3 ) )
                          if( k<=kbot-2 )tst1 = tst1 + abs( h( k+2, k+1 ) )
                          if( k<=kbot-3 )tst1 = tst1 + abs( h( k+3, k+1 ) )
                          if( k<=kbot-4 )tst1 = tst1 + abs( h( k+4, k+1 ) )
                       end if
                       if( abs( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then
                          h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
                          h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
                          h11 = max( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) )
                          h22 = min( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) )
                          scl = h11 + h12
                          tst2 = h22*( h11 / scl )
                          if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) ) &
                                    then
                             h( k+1, k ) = zero
                          end if
                       end if
                    end if
                 end do loop_80
                 ! ==== multiply h by reflections from the left ====
                 if( accum ) then
                    jbot = min( ndcol, kbot )
                 else if( wantt ) then
                    jbot = n
                 else
                    jbot = kbot
                 end if
                 do m = mbot, mtop, -1
                    k = krcol + 2_${ik}$*( m-1 )
                    do j = max( ktop, krcol + 2*m ), jbot
                       refsum = v( 1_${ik}$, m )*( h( k+1, j )+v( 2_${ik}$, m )*h( k+2, j )+v( 3_${ik}$, m )*h( k+3, j &
                                 ) )
                       h( k+1, j ) = h( k+1, j ) - refsum
                       h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m )
                       h( k+3, j ) = h( k+3, j ) - refsum*v( 3_${ik}$, m )
                    end do
                 end do
                 ! ==== accumulate orthogonal transformations. ====
                 if( accum ) then
                    ! ==== accumulate u. (if needed, update z later
                    ! .    with an efficient matrix-matrix
                    ! .    multiply.) ====
                    do m = mbot, mtop, -1
                       k = krcol + 2_${ik}$*( m-1 )
                       kms = k - incol
                       i2 = max( 1_${ik}$, ktop-incol )
                       i2 = max( i2, kms-(krcol-incol)+1_${ik}$ )
                       i4 = min( kdu, krcol + 2_${ik}$*( mbot-1 ) - incol + 5_${ik}$ )
                       do j = i2, i4
                          refsum = v( 1_${ik}$, m )*( u( j, kms+1 )+v( 2_${ik}$, m )*u( j, kms+2 )+v( 3_${ik}$, m )*u( &
                                    j, kms+3 ) )
                          u( j, kms+1 ) = u( j, kms+1 ) - refsum
                          u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2_${ik}$, m )
                          u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3_${ik}$, m )
                       end do
                    end do
                 else if( wantz ) then
                    ! ==== u is not accumulated, so update z
                    ! .    now by multiplying by reflections
                    ! .    from the right. ====
                    do m = mbot, mtop, -1
                       k = krcol + 2_${ik}$*( m-1 )
                       do j = iloz, ihiz
                          refsum = v( 1_${ik}$, m )*( z( j, k+1 )+v( 2_${ik}$, m )*z( j, k+2 )+v( 3_${ik}$, m )*z( j, &
                                    k+3 ) )
                          z( j, k+1 ) = z( j, k+1 ) - refsum
                          z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2_${ik}$, m )
                          z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3_${ik}$, m )
                       end do
                    end do
                 end if
                 ! ==== end of near-the-diagonal bulge chase. ====
              end do loop_145
              ! ==== use u (if accumulated) to update far-from-diagonal
              ! .    entries in h.  if required, use u to update z as
              ! .    well. ====
              if( accum ) then
                 if( wantt ) then
                    jtop = 1_${ik}$
                    jbot = n
                 else
                    jtop = ktop
                    jbot = kbot
                 end if
                 k1 = max( 1_${ik}$, ktop-incol )
                 nu = ( kdu-max( 0_${ik}$, ndcol-kbot ) ) - k1 + 1_${ik}$
                 ! ==== horizontal multiply ====
                 do jcol = min( ndcol, kbot ) + 1, jbot, nh
                    jlen = min( nh, jbot-jcol+1 )
                    call stdlib${ii}$_dgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),ldu, h( incol+k1, &
                              jcol ), ldh, zero, wh,ldwh )
                    call stdlib${ii}$_dlacpy( 'ALL', nu, jlen, wh, ldwh,h( incol+k1, jcol ), ldh )
                              
                 end do
                 ! ==== vertical multiply ====
                 do jrow = jtop, max( ktop, incol ) - 1, nv
                    jlen = min( nv, max( ktop, incol )-jrow )
                    call stdlib${ii}$_dgemm( 'N', 'N', jlen, nu, nu, one,h( jrow, incol+k1 ), ldh, u( &
                              k1, k1 ),ldu, zero, wv, ldwv )
                    call stdlib${ii}$_dlacpy( 'ALL', jlen, nu, wv, ldwv,h( jrow, incol+k1 ), ldh )
                              
                 end do
                 ! ==== z multiply (also vertical) ====
                 if( wantz ) then
                    do jrow = iloz, ihiz, nv
                       jlen = min( nv, ihiz-jrow+1 )
                       call stdlib${ii}$_dgemm( 'N', 'N', jlen, nu, nu, one,z( jrow, incol+k1 ), ldz, u(&
                                  k1, k1 ),ldu, zero, wv, ldwv )
                       call stdlib${ii}$_dlacpy( 'ALL', jlen, nu, wv, ldwv,z( jrow, incol+k1 ), ldz )
                                 
                    end do
                 end if
              end if
           end do loop_180
     end subroutine stdlib${ii}$_dlaqr5

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$laqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts,sr, si, h, ldh, &
     !! DLAQR5:, called by DLAQR0, performs a
     !! single small-bulge multi-shift QR sweep.
               iloz, ihiz, z, ldz, v, ldv, u,ldu, nv, wv, ldwv, nh, wh, ldwh )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv, ldwh, ldwv, &
                     ldz, n, nh, nshfts, nv
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           real(${rk}$), intent(inout) :: h(ldh,*), si(*), sr(*), z(ldz,*)
           real(${rk}$), intent(out) :: u(ldu,*), v(ldv,*), wh(ldwh,*), wv(ldwv,*)
        ! ================================================================
           
           ! Local Scalars 
           real(${rk}$) :: alpha, beta, h11, h12, h21, h22, refsum, safmax, safmin, scl, smlnum, swap,&
                      tst1, tst2, ulp
           integer(${ik}$) :: i, i2, i4, incol, j, jbot, jcol, jlen, jrow, jtop, k, k1, kdu, kms, &
                     krcol, m, m22, mbot, mtop, nbmps, ndcol, ns, nu
           logical(lk) :: accum, bmp22
           ! Intrinsic Functions 
           ! Local Arrays 
           real(${rk}$) :: vt(3_${ik}$)
           ! Executable Statements 
           ! ==== if there are no shifts, then there is nothing to do. ====
           if( nshfts<2 )return
           ! ==== if the active block is empty or 1-by-1, then there
           ! .    is nothing to do. ====
           if( ktop>=kbot )return
           ! ==== shuffle shifts into pairs of real shifts and pairs
           ! .    of complex conjugate shifts assuming complex
           ! .    conjugate shifts are already adjacent to one
           ! .    another. ====
           do i = 1, nshfts - 2, 2
              if( si( i )/=-si( i+1 ) ) then
                 swap = sr( i )
                 sr( i ) = sr( i+1 )
                 sr( i+1 ) = sr( i+2 )
                 sr( i+2 ) = swap
                 swap = si( i )
                 si( i ) = si( i+1 )
                 si( i+1 ) = si( i+2 )
                 si( i+2 ) = swap
              end if
           end do
           ! ==== nshfts is supposed to be even, but if it is odd,
           ! .    then simply reduce it by one.  the shuffle above
           ! .    ensures that the dropped shift is real and that
           ! .    the remaining shifts are paired. ====
           ns = nshfts - mod( nshfts, 2_${ik}$ )
           ! ==== machine constants for deflation ====
           safmin = stdlib${ii}$_${ri}$lamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_${ri}$labad( safmin, safmax )
           ulp = stdlib${ii}$_${ri}$lamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=${rk}$) / ulp )
           ! ==== use accumulated reflections to update far-from-diagonal
           ! .    entries ? ====
           accum = ( kacc22==1_${ik}$ ) .or. ( kacc22==2_${ik}$ )
           ! ==== clear trash ====
           if( ktop+2<=kbot )h( ktop+2, ktop ) = zero
           ! ==== nbmps = number of 2-shift bulges in the chain ====
           nbmps = ns / 2_${ik}$
           ! ==== kdu = width of slab ====
           kdu = 4_${ik}$*nbmps
           ! ==== create and chase chains of nbmps bulges ====
           loop_180: do incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
              ! jtop = index from which updates from the right start.
              if( accum ) then
                 jtop = max( ktop, incol )
              else if( wantt ) then
                 jtop = 1_${ik}$
              else
                 jtop = ktop
              end if
              ndcol = incol + kdu
              if( accum )call stdlib${ii}$_${ri}$laset( 'ALL', kdu, kdu, zero, one, u, ldu )
              ! ==== near-the-diagonal bulge chase.  the following loop
              ! .    performs the near-the-diagonal part of a small bulge
              ! .    multi-shift qr sweep.  each 4*nbmps column diagonal
              ! .    chunk extends from column incol to column ndcol
              ! .    (including both column incol and column ndcol). the
              ! .    following loop chases a 2*nbmps+1 column long chain of
              ! .    nbmps bulges 2*nbmps columns to the right.  (incol
              ! .    may be less than ktop and and ndcol may be greater than
              ! .    kbot indicating phantom columns from which to chase
              ! .    bulges before they are actually introduced or to which
              ! .    to chase bulges beyond column kbot.)  ====
              loop_145: do krcol = incol, min( incol+2*nbmps-1, kbot-2 )
                 ! ==== bulges number mtop to mbot are active double implicit
                 ! .    shift bulges.  there may or may not also be small
                 ! .    2-by-2 bulge, if there is room.  the inactive bulges
                 ! .    (if any) must wait until the active bulges have moved
                 ! .    down the diagonal to make room.  the phantom matrix
                 ! .    paradigm described above helps keep track.  ====
                 mtop = max( 1_${ik}$, ( ktop-krcol ) / 2_${ik}$+1 )
                 mbot = min( nbmps, ( kbot-krcol-1 ) / 2_${ik}$ )
                 m22 = mbot + 1_${ik}$
                 bmp22 = ( mbot<nbmps ) .and. ( krcol+2*( m22-1 ) )==( kbot-2 )
                 ! ==== generate reflections to chase the chain right
                 ! .    one column.  (the minimum value of k is ktop-1.) ====
                 if ( bmp22 ) then
                    ! ==== special case: 2-by-2 reflection at bottom treated
                    ! .    separately ====
                    k = krcol + 2_${ik}$*( m22-1 )
                    if( k==ktop-1 ) then
                       call stdlib${ii}$_${ri}$laqr1( 2_${ik}$, h( k+1, k+1 ), ldh, sr( 2_${ik}$*m22-1 ),si( 2_${ik}$*m22-1 ), sr(&
                                  2_${ik}$*m22 ), si( 2_${ik}$*m22 ),v( 1_${ik}$, m22 ) )
                       beta = v( 1_${ik}$, m22 )
                       call stdlib${ii}$_${ri}$larfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) )
                    else
                       beta = h( k+1, k )
                       v( 2_${ik}$, m22 ) = h( k+2, k )
                       call stdlib${ii}$_${ri}$larfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) )
                       h( k+1, k ) = beta
                       h( k+2, k ) = zero
                    end if
                    ! ==== perform update from right within
                    ! .    computational window. ====
                    do j = jtop, min( kbot, k+3 )
                       refsum = v( 1_${ik}$, m22 )*( h( j, k+1 )+v( 2_${ik}$, m22 )*h( j, k+2 ) )
                       h( j, k+1 ) = h( j, k+1 ) - refsum
                       h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2_${ik}$, m22 )
                    end do
                    ! ==== perform update from left within
                    ! .    computational window. ====
                    if( accum ) then
                       jbot = min( ndcol, kbot )
                    else if( wantt ) then
                       jbot = n
                    else
                       jbot = kbot
                    end if
                    do j = k+1, jbot
                       refsum = v( 1_${ik}$, m22 )*( h( k+1, j )+v( 2_${ik}$, m22 )*h( k+2, j ) )
                       h( k+1, j ) = h( k+1, j ) - refsum
                       h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m22 )
                    end do
                    ! ==== the following convergence test requires that
                    ! .    the tradition small-compared-to-nearby-diagonals
                    ! .    criterion and the ahues
                    ! .    criteria both be satisfied.  the latter improves
                    ! .    accuracy in some examples. falling back on an
                    ! .    alternate convergence criterion when tst1 or tst2
                    ! .    is zero (as done here) is traditional but probably
                    ! .    unnecessary. ====
                    if( k>=ktop ) then
                       if( h( k+1, k )/=zero ) then
                          tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
                          if( tst1==zero ) then
                             if( k>=ktop+1 )tst1 = tst1 + abs( h( k, k-1 ) )
                             if( k>=ktop+2 )tst1 = tst1 + abs( h( k, k-2 ) )
                             if( k>=ktop+3 )tst1 = tst1 + abs( h( k, k-3 ) )
                             if( k<=kbot-2 )tst1 = tst1 + abs( h( k+2, k+1 ) )
                             if( k<=kbot-3 )tst1 = tst1 + abs( h( k+3, k+1 ) )
                             if( k<=kbot-4 )tst1 = tst1 + abs( h( k+4, k+1 ) )
                          end if
                          if( abs( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) ) then
                             h12 = max( abs( h( k+1, k ) ),abs( h( k, k+1 ) ) )
                             h21 = min( abs( h( k+1, k ) ),abs( h( k, k+1 ) ) )
                             h11 = max( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) )
                                       
                             h22 = min( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) )
                                       
                             scl = h11 + h12
                             tst2 = h22*( h11 / scl )
                             if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) ) &
                                       then
                                h( k+1, k ) = zero
                             end if
                          end if
                       end if
                    end if
                    ! ==== accumulate orthogonal transformations. ====
                    if( accum ) then
                       kms = k - incol
                       do j = max( 1, ktop-incol ), kdu
                          refsum = v( 1_${ik}$, m22 )*( u( j, kms+1 )+v( 2_${ik}$, m22 )*u( j, kms+2 ) )
                          u( j, kms+1 ) = u( j, kms+1 ) - refsum
                          u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2_${ik}$, m22 )
                       end do
                    else if( wantz ) then
                       do j = iloz, ihiz
                          refsum = v( 1_${ik}$, m22 )*( z( j, k+1 )+v( 2_${ik}$, m22 )*z( j, k+2 ) )
                          z( j, k+1 ) = z( j, k+1 ) - refsum
                          z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2_${ik}$, m22 )
                       end do
                    end if
                 end if
                 ! ==== normal case: chain of 3-by-3 reflections ====
                 loop_80: do m = mbot, mtop, -1
                    k = krcol + 2_${ik}$*( m-1 )
                    if( k==ktop-1 ) then
                       call stdlib${ii}$_${ri}$laqr1( 3_${ik}$, h( ktop, ktop ), ldh, sr( 2_${ik}$*m-1 ),si( 2_${ik}$*m-1 ), sr( &
                                 2_${ik}$*m ), si( 2_${ik}$*m ),v( 1_${ik}$, m ) )
                       alpha = v( 1_${ik}$, m )
                       call stdlib${ii}$_${ri}$larfg( 3_${ik}$, alpha, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) )
                    else
                       ! ==== perform delayed transformation of row below
                       ! .    mth bulge. exploit fact that first two elements
                       ! .    of row are actually zero. ====
                       refsum = v( 1_${ik}$, m )*v( 3_${ik}$, m )*h( k+3, k+2 )
                       h( k+3, k   ) = -refsum
                       h( k+3, k+1 ) = -refsum*v( 2_${ik}$, m )
                       h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*v( 3_${ik}$, m )
                       ! ==== calculate reflection to move
                       ! .    mth bulge one step. ====
                       beta      = h( k+1, k )
                       v( 2_${ik}$, m ) = h( k+2, k )
                       v( 3_${ik}$, m ) = h( k+3, k )
                       call stdlib${ii}$_${ri}$larfg( 3_${ik}$, beta, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) )
                       ! ==== a bulge may collapse because of vigilant
                       ! .    deflation or destructive underflow.  in the
                       ! .    underflow case, try the two-small-subdiagonals
                       ! .    trick to try to reinflate the bulge.  ====
                       if( h( k+3, k )/=zero .or. h( k+3, k+1 )/=zero .or. h( k+3, k+2 )==zero ) &
                                 then
                          ! ==== typical case: not collapsed (yet). ====
                          h( k+1, k ) = beta
                          h( k+2, k ) = zero
                          h( k+3, k ) = zero
                       else
                          ! ==== atypical case: collapsed.  attempt to
                          ! .    reintroduce ignoring h(k+1,k) and h(k+2,k).
                          ! .    if the fill resulting from the new
                          ! .    reflector is too large, then abandon it.
                          ! .    otherwise, use the new one. ====
                          call stdlib${ii}$_${ri}$laqr1( 3_${ik}$, h( k+1, k+1 ), ldh, sr( 2_${ik}$*m-1 ),si( 2_${ik}$*m-1 ), sr( &
                                    2_${ik}$*m ), si( 2_${ik}$*m ),vt )
                          alpha = vt( 1_${ik}$ )
                          call stdlib${ii}$_${ri}$larfg( 3_${ik}$, alpha, vt( 2_${ik}$ ), 1_${ik}$, vt( 1_${ik}$ ) )
                          refsum = vt( 1_${ik}$ )*( h( k+1, k )+vt( 2_${ik}$ )*h( k+2, k ) )
                          if( abs( h( k+2, k )-refsum*vt( 2_${ik}$ ) )+abs( refsum*vt( 3_${ik}$ ) )>ulp*( abs( &
                                    h( k, k ) )+abs( h( k+1,k+1 ) )+abs( h( k+2, k+2 ) ) ) ) then
                             ! ==== starting a new bulge here would
                             ! .    create non-negligible fill.  use
                             ! .    the old one with trepidation. ====
                             h( k+1, k ) = beta
                             h( k+2, k ) = zero
                             h( k+3, k ) = zero
                          else
                             ! ==== starting a new bulge here would
                             ! .    create only negligible fill.
                             ! .    replace the old reflector with
                             ! .    the new one. ====
                             h( k+1, k ) = h( k+1, k ) - refsum
                             h( k+2, k ) = zero
                             h( k+3, k ) = zero
                             v( 1_${ik}$, m ) = vt( 1_${ik}$ )
                             v( 2_${ik}$, m ) = vt( 2_${ik}$ )
                             v( 3_${ik}$, m ) = vt( 3_${ik}$ )
                          end if
                       end if
                    end if
                    ! ====  apply reflection from the right and
                    ! .     the first column of update from the left.
                    ! .     these updates are required for the vigilant
                    ! .     deflation check. we still delay most of the
                    ! .     updates from the left for efficiency. ====
                    do j = jtop, min( kbot, k+3 )
                       refsum = v( 1_${ik}$, m )*( h( j, k+1 )+v( 2_${ik}$, m )*h( j, k+2 )+v( 3_${ik}$, m )*h( j, k+3 &
                                 ) )
                       h( j, k+1 ) = h( j, k+1 ) - refsum
                       h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2_${ik}$, m )
                       h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3_${ik}$, m )
                    end do
                    ! ==== perform update from left for subsequent
                    ! .    column. ====
                    refsum = v( 1_${ik}$, m )*( h( k+1, k+1 )+v( 2_${ik}$, m )*h( k+2, k+1 )+v( 3_${ik}$, m )*h( k+3, &
                              k+1 ) )
                    h( k+1, k+1 ) = h( k+1, k+1 ) - refsum
                    h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2_${ik}$, m )
                    h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3_${ik}$, m )
                    ! ==== the following convergence test requires that
                    ! .    the tradition small-compared-to-nearby-diagonals
                    ! .    criterion and the ahues
                    ! .    criteria both be satisfied.  the latter improves
                    ! .    accuracy in some examples. falling back on an
                    ! .    alternate convergence criterion when tst1 or tst2
                    ! .    is zero (as done here) is traditional but probably
                    ! .    unnecessary. ====
                    if( k<ktop)cycle
                    if( h( k+1, k )/=zero ) then
                       tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
                       if( tst1==zero ) then
                          if( k>=ktop+1 )tst1 = tst1 + abs( h( k, k-1 ) )
                          if( k>=ktop+2 )tst1 = tst1 + abs( h( k, k-2 ) )
                          if( k>=ktop+3 )tst1 = tst1 + abs( h( k, k-3 ) )
                          if( k<=kbot-2 )tst1 = tst1 + abs( h( k+2, k+1 ) )
                          if( k<=kbot-3 )tst1 = tst1 + abs( h( k+3, k+1 ) )
                          if( k<=kbot-4 )tst1 = tst1 + abs( h( k+4, k+1 ) )
                       end if
                       if( abs( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then
                          h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
                          h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
                          h11 = max( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) )
                          h22 = min( abs( h( k+1, k+1 ) ),abs( h( k, k )-h( k+1, k+1 ) ) )
                          scl = h11 + h12
                          tst2 = h22*( h11 / scl )
                          if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) ) &
                                    then
                             h( k+1, k ) = zero
                          end if
                       end if
                    end if
                 end do loop_80
                 ! ==== multiply h by reflections from the left ====
                 if( accum ) then
                    jbot = min( ndcol, kbot )
                 else if( wantt ) then
                    jbot = n
                 else
                    jbot = kbot
                 end if
                 do m = mbot, mtop, -1
                    k = krcol + 2_${ik}$*( m-1 )
                    do j = max( ktop, krcol + 2*m ), jbot
                       refsum = v( 1_${ik}$, m )*( h( k+1, j )+v( 2_${ik}$, m )*h( k+2, j )+v( 3_${ik}$, m )*h( k+3, j &
                                 ) )
                       h( k+1, j ) = h( k+1, j ) - refsum
                       h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m )
                       h( k+3, j ) = h( k+3, j ) - refsum*v( 3_${ik}$, m )
                    end do
                 end do
                 ! ==== accumulate orthogonal transformations. ====
                 if( accum ) then
                    ! ==== accumulate u. (if needed, update z later
                    ! .    with an efficient matrix-matrix
                    ! .    multiply.) ====
                    do m = mbot, mtop, -1
                       k = krcol + 2_${ik}$*( m-1 )
                       kms = k - incol
                       i2 = max( 1_${ik}$, ktop-incol )
                       i2 = max( i2, kms-(krcol-incol)+1_${ik}$ )
                       i4 = min( kdu, krcol + 2_${ik}$*( mbot-1 ) - incol + 5_${ik}$ )
                       do j = i2, i4
                          refsum = v( 1_${ik}$, m )*( u( j, kms+1 )+v( 2_${ik}$, m )*u( j, kms+2 )+v( 3_${ik}$, m )*u( &
                                    j, kms+3 ) )
                          u( j, kms+1 ) = u( j, kms+1 ) - refsum
                          u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2_${ik}$, m )
                          u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3_${ik}$, m )
                       end do
                    end do
                 else if( wantz ) then
                    ! ==== u is not accumulated, so update z
                    ! .    now by multiplying by reflections
                    ! .    from the right. ====
                    do m = mbot, mtop, -1
                       k = krcol + 2_${ik}$*( m-1 )
                       do j = iloz, ihiz
                          refsum = v( 1_${ik}$, m )*( z( j, k+1 )+v( 2_${ik}$, m )*z( j, k+2 )+v( 3_${ik}$, m )*z( j, &
                                    k+3 ) )
                          z( j, k+1 ) = z( j, k+1 ) - refsum
                          z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2_${ik}$, m )
                          z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3_${ik}$, m )
                       end do
                    end do
                 end if
                 ! ==== end of near-the-diagonal bulge chase. ====
              end do loop_145
              ! ==== use u (if accumulated) to update far-from-diagonal
              ! .    entries in h.  if required, use u to update z as
              ! .    well. ====
              if( accum ) then
                 if( wantt ) then
                    jtop = 1_${ik}$
                    jbot = n
                 else
                    jtop = ktop
                    jbot = kbot
                 end if
                 k1 = max( 1_${ik}$, ktop-incol )
                 nu = ( kdu-max( 0_${ik}$, ndcol-kbot ) ) - k1 + 1_${ik}$
                 ! ==== horizontal multiply ====
                 do jcol = min( ndcol, kbot ) + 1, jbot, nh
                    jlen = min( nh, jbot-jcol+1 )
                    call stdlib${ii}$_${ri}$gemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),ldu, h( incol+k1, &
                              jcol ), ldh, zero, wh,ldwh )
                    call stdlib${ii}$_${ri}$lacpy( 'ALL', nu, jlen, wh, ldwh,h( incol+k1, jcol ), ldh )
                              
                 end do
                 ! ==== vertical multiply ====
                 do jrow = jtop, max( ktop, incol ) - 1, nv
                    jlen = min( nv, max( ktop, incol )-jrow )
                    call stdlib${ii}$_${ri}$gemm( 'N', 'N', jlen, nu, nu, one,h( jrow, incol+k1 ), ldh, u( &
                              k1, k1 ),ldu, zero, wv, ldwv )
                    call stdlib${ii}$_${ri}$lacpy( 'ALL', jlen, nu, wv, ldwv,h( jrow, incol+k1 ), ldh )
                              
                 end do
                 ! ==== z multiply (also vertical) ====
                 if( wantz ) then
                    do jrow = iloz, ihiz, nv
                       jlen = min( nv, ihiz-jrow+1 )
                       call stdlib${ii}$_${ri}$gemm( 'N', 'N', jlen, nu, nu, one,z( jrow, incol+k1 ), ldz, u(&
                                  k1, k1 ),ldu, zero, wv, ldwv )
                       call stdlib${ii}$_${ri}$lacpy( 'ALL', jlen, nu, wv, ldwv,z( jrow, incol+k1 ), ldz )
                                 
                    end do
                 end if
              end if
           end do loop_180
     end subroutine stdlib${ii}$_${ri}$laqr5

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_claqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts, s,h, ldh, iloz, &
     !! CLAQR5 called by CLAQR0 performs a
     !! single small-bulge multi-shift QR sweep.
               ihiz, z, ldz, v, ldv, u, ldu, nv,wv, ldwv, nh, wh, ldwh )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv, ldwh, ldwv, &
                     ldz, n, nh, nshfts, nv
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           complex(sp), intent(inout) :: h(ldh,*), s(*), z(ldz,*)
           complex(sp), intent(out) :: u(ldu,*), v(ldv,*), wh(ldwh,*), wv(ldwv,*)
        ! ================================================================
           ! Parameters 
           
           ! Local Scalars 
           complex(sp) :: alpha, beta, cdum, refsum
           real(sp) :: h11, h12, h21, h22, safmax, safmin, scl, smlnum, tst1, tst2, ulp
           integer(${ik}$) :: i2, i4, incol, j, jbot, jcol, jlen, jrow, jtop, k, k1, kdu, kms, krcol,&
                      m, m22, mbot, mtop, nbmps, ndcol, ns, nu
           logical(lk) :: accum, bmp22
           ! Intrinsic Functions 
           ! Local Arrays 
           complex(sp) :: vt(3_${ik}$)
           ! Statement Functions 
           real(sp) :: cabs1
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=sp) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           ! ==== if there are no shifts, then there is nothing to do. ====
           if( nshfts<2 )return
           ! ==== if the active block is empty or 1-by-1, then there
           ! .    is nothing to do. ====
           if( ktop>=kbot )return
           ! ==== nshfts is supposed to be even, but if it is odd,
           ! .    then simply reduce it by cone.  ====
           ns = nshfts - mod( nshfts, 2_${ik}$ )
           ! ==== machine constants for deflation ====
           safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_slabad( safmin, safmax )
           ulp = stdlib${ii}$_slamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=sp) / ulp )
           ! ==== use accumulated reflections to update far-from-diagonal
           ! .    entries ? ====
           accum = ( kacc22==1_${ik}$ ) .or. ( kacc22==2_${ik}$ )
           ! ==== clear trash ====
           if( ktop+2<=kbot )h( ktop+2, ktop ) = czero
           ! ==== nbmps = number of 2-shift bulges in the chain ====
           nbmps = ns / 2_${ik}$
           ! ==== kdu = width of slab ====
           kdu = 4_${ik}$*nbmps
           ! ==== create and chase chains of nbmps bulges ====
           loop_180: do incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
              ! jtop = index from which updates from the right start.
              if( accum ) then
                 jtop = max( ktop, incol )
              else if( wantt ) then
                 jtop = 1_${ik}$
              else
                 jtop = ktop
              end if
              ndcol = incol + kdu
              if( accum )call stdlib${ii}$_claset( 'ALL', kdu, kdu, czero, cone, u, ldu )
              ! ==== near-the-diagonal bulge chase.  the following loop
              ! .    performs the near-the-diagonal part of a small bulge
              ! .    multi-shift qr sweep.  each 4*nbmps column diagonal
              ! .    chunk extends from column incol to column ndcol
              ! .    (including both column incol and column ndcol). the
              ! .    following loop chases a 2*nbmps+1 column long chain of
              ! .    nbmps bulges 2*nbmps columns to the right.  (incol
              ! .    may be less than ktop and and ndcol may be greater than
              ! .    kbot indicating phantom columns from which to chase
              ! .    bulges before they are actually introduced or to which
              ! .    to chase bulges beyond column kbot.)  ====
              loop_145: do krcol = incol, min( incol+2*nbmps-1, kbot-2 )
                 ! ==== bulges number mtop to mbot are active double implicit
                 ! .    shift bulges.  there may or may not also be small
                 ! .    2-by-2 bulge, if there is room.  the inactive bulges
                 ! .    (if any) must wait until the active bulges have moved
                 ! .    down the diagonal to make room.  the phantom matrix
                 ! .    paradigm described above helps keep track.  ====
                 mtop = max( 1_${ik}$, ( ktop-krcol ) / 2_${ik}$+1 )
                 mbot = min( nbmps, ( kbot-krcol-1 ) / 2_${ik}$ )
                 m22 = mbot + 1_${ik}$
                 bmp22 = ( mbot<nbmps ) .and. ( krcol+2*( m22-1 ) )==( kbot-2 )
                 ! ==== generate reflections to chase the chain right
                 ! .    cone column.  (the minimum value of k is ktop-1.) ====
                 if ( bmp22 ) then
                    ! ==== special case: 2-by-2 reflection at bottom treated
                    ! .    separately ====
                    k = krcol + 2_${ik}$*( m22-1 )
                    if( k==ktop-1 ) then
                       call stdlib${ii}$_claqr1( 2_${ik}$, h( k+1, k+1 ), ldh, s( 2_${ik}$*m22-1 ),s( 2_${ik}$*m22 ), v( 1_${ik}$, &
                                 m22 ) )
                       beta = v( 1_${ik}$, m22 )
                       call stdlib${ii}$_clarfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) )
                    else
                       beta = h( k+1, k )
                       v( 2_${ik}$, m22 ) = h( k+2, k )
                       call stdlib${ii}$_clarfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) )
                       h( k+1, k ) = beta
                       h( k+2, k ) = czero
                    end if
                    ! ==== perform update from right within
                    ! .    computational window. ====
                    do j = jtop, min( kbot, k+3 )
                       refsum = v( 1_${ik}$, m22 )*( h( j, k+1 )+v( 2_${ik}$, m22 )*h( j, k+2 ) )
                       h( j, k+1 ) = h( j, k+1 ) - refsum
                       h( j, k+2 ) = h( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m22 ) )
                    end do
                    ! ==== perform update from left within
                    ! .    computational window. ====
                    if( accum ) then
                       jbot = min( ndcol, kbot )
                    else if( wantt ) then
                       jbot = n
                    else
                       jbot = kbot
                    end if
                    do j = k+1, jbot
                       refsum = conjg( v( 1_${ik}$, m22 ) )*( h( k+1, j )+conjg( v( 2_${ik}$, m22 ) )*h( k+2, j &
                                 ) )
                       h( k+1, j ) = h( k+1, j ) - refsum
                       h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m22 )
                    end do
                    ! ==== the following convergence test requires that
                    ! .    the tradition small-compared-to-nearby-diagonals
                    ! .    criterion and the ahues
                    ! .    criteria both be satisfied.  the latter improves
                    ! .    accuracy in some examples. falling back on an
                    ! .    alternate convergence criterion when tst1 or tst2
                    ! .    is czero (as done here) is traditional but probably
                    ! .    unnecessary. ====
                    if( k>=ktop) then
                       if( h( k+1, k )/=czero ) then
                          tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
                          if( tst1==zero ) then
                             if( k>=ktop+1 )tst1 = tst1 + cabs1( h( k, k-1 ) )
                             if( k>=ktop+2 )tst1 = tst1 + cabs1( h( k, k-2 ) )
                             if( k>=ktop+3 )tst1 = tst1 + cabs1( h( k, k-3 ) )
                             if( k<=kbot-2 )tst1 = tst1 + cabs1( h( k+2, k+1 ) )
                             if( k<=kbot-3 )tst1 = tst1 + cabs1( h( k+3, k+1 ) )
                             if( k<=kbot-4 )tst1 = tst1 + cabs1( h( k+4, k+1 ) )
                          end if
                          if( cabs1( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) ) then
                             h12 = max( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) )
                             h21 = min( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) )
                             h11 = max( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) )
                                       
                             h22 = min( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) )
                                       
                             scl = h11 + h12
                             tst2 = h22*( h11 / scl )
                             if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) )h( &
                                       k+1, k ) = czero
                          end if
                       end if
                    end if
                    ! ==== accumulate orthogonal transformations. ====
                    if( accum ) then
                       kms = k - incol
                       do j = max( 1, ktop-incol ), kdu
                          refsum = v( 1_${ik}$, m22 )*( u( j, kms+1 )+v( 2_${ik}$, m22 )*u( j, kms+2 ) )
                          u( j, kms+1 ) = u( j, kms+1 ) - refsum
                          u( j, kms+2 ) = u( j, kms+2 ) -refsum*conjg( v( 2_${ik}$, m22 ) )
                       end do
                    else if( wantz ) then
                       do j = iloz, ihiz
                          refsum = v( 1_${ik}$, m22 )*( z( j, k+1 )+v( 2_${ik}$, m22 )*z( j, k+2 ) )
                          z( j, k+1 ) = z( j, k+1 ) - refsum
                          z( j, k+2 ) = z( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m22 ) )
                       end do
                    end if
                 end if
                 ! ==== normal case: chain of 3-by-3 reflections ====
                 loop_80: do m = mbot, mtop, -1
                    k = krcol + 2_${ik}$*( m-1 )
                    if( k==ktop-1 ) then
                       call stdlib${ii}$_claqr1( 3_${ik}$, h( ktop, ktop ), ldh, s( 2_${ik}$*m-1 ),s( 2_${ik}$*m ), v( 1_${ik}$, m )&
                                  )
                       alpha = v( 1_${ik}$, m )
                       call stdlib${ii}$_clarfg( 3_${ik}$, alpha, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) )
                    else
                       ! ==== perform delayed transformation of row below
                       ! .    mth bulge. exploit fact that first two elements
                       ! .    of row are actually czero. ====
                       refsum = v( 1_${ik}$, m )*v( 3_${ik}$, m )*h( k+3, k+2 )
                       h( k+3, k   ) = -refsum
                       h( k+3, k+1 ) = -refsum*conjg( v( 2_${ik}$, m ) )
                       h( k+3, k+2 ) = h( k+3, k+2 ) -refsum*conjg( v( 3_${ik}$, m ) )
                       ! ==== calculate reflection to move
                       ! .    mth bulge cone step. ====
                       beta      = h( k+1, k )
                       v( 2_${ik}$, m ) = h( k+2, k )
                       v( 3_${ik}$, m ) = h( k+3, k )
                       call stdlib${ii}$_clarfg( 3_${ik}$, beta, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) )
                       ! ==== a bulge may collapse because of vigilant
                       ! .    deflation or destructive underflow.  in the
                       ! .    underflow case, try the two-small-subdiagonals
                       ! .    trick to try to reinflate the bulge.  ====
                       if( h( k+3, k )/=czero .or. h( k+3, k+1 )/=czero .or. h( k+3, k+2 )==czero &
                                 ) then
                          ! ==== typical case: not collapsed (yet). ====
                          h( k+1, k ) = beta
                          h( k+2, k ) = czero
                          h( k+3, k ) = czero
                       else
                          ! ==== atypical case: collapsed.  attempt to
                          ! .    reintroduce ignoring h(k+1,k) and h(k+2,k).
                          ! .    if the fill resulting from the new
                          ! .    reflector is too large, then abandon it.
                          ! .    otherwise, use the new cone. ====
                          call stdlib${ii}$_claqr1( 3_${ik}$, h( k+1, k+1 ), ldh, s( 2_${ik}$*m-1 ),s( 2_${ik}$*m ), vt )
                                    
                          alpha = vt( 1_${ik}$ )
                          call stdlib${ii}$_clarfg( 3_${ik}$, alpha, vt( 2_${ik}$ ), 1_${ik}$, vt( 1_${ik}$ ) )
                          refsum = conjg( vt( 1_${ik}$ ) )*( h( k+1, k )+conjg( vt( 2_${ik}$ ) )*h( k+2, k ) )
                                    
                          if( cabs1( h( k+2, k )-refsum*vt( 2_${ik}$ ) )+cabs1( refsum*vt( 3_${ik}$ ) )>ulp*( &
                          cabs1( h( k, k ) )+cabs1( h( k+1,k+1 ) )+cabs1( h( k+2, k+2 ) ) ) ) &
                                    then
                             ! ==== starting a new bulge here would
                             ! .    create non-negligible fill.  use
                             ! .    the old cone with trepidation. ====
                             h( k+1, k ) = beta
                             h( k+2, k ) = czero
                             h( k+3, k ) = czero
                          else
                             ! ==== starting a new bulge here would
                             ! .    create only negligible fill.
                             ! .    replace the old reflector with
                             ! .    the new cone. ====
                             h( k+1, k ) = h( k+1, k ) - refsum
                             h( k+2, k ) = czero
                             h( k+3, k ) = czero
                             v( 1_${ik}$, m ) = vt( 1_${ik}$ )
                             v( 2_${ik}$, m ) = vt( 2_${ik}$ )
                             v( 3_${ik}$, m ) = vt( 3_${ik}$ )
                          end if
                       end if
                    end if
                    ! ====  apply reflection from the right and
                    ! .     the first column of update from the left.
                    ! .     these updates are required for the vigilant
                    ! .     deflation check. we still delay most of the
                    ! .     updates from the left for efficiency. ====
                    do j = jtop, min( kbot, k+3 )
                       refsum = v( 1_${ik}$, m )*( h( j, k+1 )+v( 2_${ik}$, m )*h( j, k+2 )+v( 3_${ik}$, m )*h( j, k+3 &
                                 ) )
                       h( j, k+1 ) = h( j, k+1 ) - refsum
                       h( j, k+2 ) = h( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m ) )
                       h( j, k+3 ) = h( j, k+3 ) -refsum*conjg( v( 3_${ik}$, m ) )
                    end do
                    ! ==== perform update from left for subsequent
                    ! .    column. ====
                    refsum =  conjg( v( 1_${ik}$, m ) )*( h( k+1, k+1 )+conjg( v( 2_${ik}$, m ) )*h( k+2, k+1 )+&
                              conjg( v( 3_${ik}$, m ) )*h( k+3, k+1 ) )
                    h( k+1, k+1 ) = h( k+1, k+1 ) - refsum
                    h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2_${ik}$, m )
                    h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3_${ik}$, m )
                    ! ==== the following convergence test requires that
                    ! .    the tradition small-compared-to-nearby-diagonals
                    ! .    criterion and the ahues
                    ! .    criteria both be satisfied.  the latter improves
                    ! .    accuracy in some examples. falling back on an
                    ! .    alternate convergence criterion when tst1 or tst2
                    ! .    is czero (as done here) is traditional but probably
                    ! .    unnecessary. ====
                    if( k<ktop)cycle
                    if( h( k+1, k )/=czero ) then
                       tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
                       if( tst1==zero ) then
                          if( k>=ktop+1 )tst1 = tst1 + cabs1( h( k, k-1 ) )
                          if( k>=ktop+2 )tst1 = tst1 + cabs1( h( k, k-2 ) )
                          if( k>=ktop+3 )tst1 = tst1 + cabs1( h( k, k-3 ) )
                          if( k<=kbot-2 )tst1 = tst1 + cabs1( h( k+2, k+1 ) )
                          if( k<=kbot-3 )tst1 = tst1 + cabs1( h( k+3, k+1 ) )
                          if( k<=kbot-4 )tst1 = tst1 + cabs1( h( k+4, k+1 ) )
                       end if
                       if( cabs1( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then
                          h12 = max( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) )
                          h21 = min( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) )
                          h11 = max( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) )
                                    
                          h22 = min( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) )
                                    
                          scl = h11 + h12
                          tst2 = h22*( h11 / scl )
                          if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) )h( k+1,&
                                     k ) = czero
                       end if
                    end if
                 end do loop_80
                 ! ==== multiply h by reflections from the left ====
                 if( accum ) then
                    jbot = min( ndcol, kbot )
                 else if( wantt ) then
                    jbot = n
                 else
                    jbot = kbot
                 end if
                 do m = mbot, mtop, -1
                    k = krcol + 2_${ik}$*( m-1 )
                    do j = max( ktop, krcol + 2*m ), jbot
                       refsum = conjg( v( 1_${ik}$, m ) )*( h( k+1, j )+conjg( v( 2_${ik}$, m ) )*h( k+2, j )+&
                                 conjg( v( 3_${ik}$, m ) )*h( k+3, j ) )
                       h( k+1, j ) = h( k+1, j ) - refsum
                       h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m )
                       h( k+3, j ) = h( k+3, j ) - refsum*v( 3_${ik}$, m )
                    end do
                 end do
                 ! ==== accumulate orthogonal transformations. ====
                 if( accum ) then
                    ! ==== accumulate u. (if needed, update z later
                    ! .    with an efficient matrix-matrix
                    ! .    multiply.) ====
                    do m = mbot, mtop, -1
                       k = krcol + 2_${ik}$*( m-1 )
                       kms = k - incol
                       i2 = max( 1_${ik}$, ktop-incol )
                       i2 = max( i2, kms-(krcol-incol)+1_${ik}$ )
                       i4 = min( kdu, krcol + 2_${ik}$*( mbot-1 ) - incol + 5_${ik}$ )
                       do j = i2, i4
                          refsum = v( 1_${ik}$, m )*( u( j, kms+1 )+v( 2_${ik}$, m )*u( j, kms+2 )+v( 3_${ik}$, m )*u( &
                                    j, kms+3 ) )
                          u( j, kms+1 ) = u( j, kms+1 ) - refsum
                          u( j, kms+2 ) = u( j, kms+2 ) -refsum*conjg( v( 2_${ik}$, m ) )
                          u( j, kms+3 ) = u( j, kms+3 ) -refsum*conjg( v( 3_${ik}$, m ) )
                       end do
                    end do
                 else if( wantz ) then
                    ! ==== u is not accumulated, so update z
                    ! .    now by multiplying by reflections
                    ! .    from the right. ====
                    do m = mbot, mtop, -1
                       k = krcol + 2_${ik}$*( m-1 )
                       do j = iloz, ihiz
                          refsum = v( 1_${ik}$, m )*( z( j, k+1 )+v( 2_${ik}$, m )*z( j, k+2 )+v( 3_${ik}$, m )*z( j, &
                                    k+3 ) )
                          z( j, k+1 ) = z( j, k+1 ) - refsum
                          z( j, k+2 ) = z( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m ) )
                          z( j, k+3 ) = z( j, k+3 ) -refsum*conjg( v( 3_${ik}$, m ) )
                       end do
                    end do
                 end if
                 ! ==== end of near-the-diagonal bulge chase. ====
              end do loop_145
              ! ==== use u (if accumulated) to update far-from-diagonal
              ! .    entries in h.  if required, use u to update z as
              ! .    well. ====
              if( accum ) then
                 if( wantt ) then
                    jtop = 1_${ik}$
                    jbot = n
                 else
                    jtop = ktop
                    jbot = kbot
                 end if
                 k1 = max( 1_${ik}$, ktop-incol )
                 nu = ( kdu-max( 0_${ik}$, ndcol-kbot ) ) - k1 + 1_${ik}$
                 ! ==== horizontal multiply ====
                 do jcol = min( ndcol, kbot ) + 1, jbot, nh
                    jlen = min( nh, jbot-jcol+1 )
                    call stdlib${ii}$_cgemm( 'C', 'N', nu, jlen, nu, cone, u( k1, k1 ),ldu, h( incol+k1,&
                               jcol ), ldh, czero, wh,ldwh )
                    call stdlib${ii}$_clacpy( 'ALL', nu, jlen, wh, ldwh,h( incol+k1, jcol ), ldh )
                              
                 end do
                 ! ==== vertical multiply ====
                 do jrow = jtop, max( ktop, incol ) - 1, nv
                    jlen = min( nv, max( ktop, incol )-jrow )
                    call stdlib${ii}$_cgemm( 'N', 'N', jlen, nu, nu, cone,h( jrow, incol+k1 ), ldh, u( &
                              k1, k1 ),ldu, czero, wv, ldwv )
                    call stdlib${ii}$_clacpy( 'ALL', jlen, nu, wv, ldwv,h( jrow, incol+k1 ), ldh )
                              
                 end do
                 ! ==== z multiply (also vertical) ====
                 if( wantz ) then
                    do jrow = iloz, ihiz, nv
                       jlen = min( nv, ihiz-jrow+1 )
                       call stdlib${ii}$_cgemm( 'N', 'N', jlen, nu, nu, cone,z( jrow, incol+k1 ), ldz, &
                                 u( k1, k1 ),ldu, czero, wv, ldwv )
                       call stdlib${ii}$_clacpy( 'ALL', jlen, nu, wv, ldwv,z( jrow, incol+k1 ), ldz )
                                 
                    end do
                 end if
              end if
           end do loop_180
     end subroutine stdlib${ii}$_claqr5

     pure module subroutine stdlib${ii}$_zlaqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts, s,h, ldh, iloz, &
     !! ZLAQR5 , called by ZLAQR0, performs a
     !! single small-bulge multi-shift QR sweep.
               ihiz, z, ldz, v, ldv, u, ldu, nv,wv, ldwv, nh, wh, ldwh )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv, ldwh, ldwv, &
                     ldz, n, nh, nshfts, nv
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           complex(dp), intent(inout) :: h(ldh,*), s(*), z(ldz,*)
           complex(dp), intent(out) :: u(ldu,*), v(ldv,*), wh(ldwh,*), wv(ldwv,*)
        ! ================================================================
           ! Parameters 
           
           ! Local Scalars 
           complex(dp) :: alpha, beta, cdum, refsum
           real(dp) :: h11, h12, h21, h22, safmax, safmin, scl, smlnum, tst1, tst2, ulp
           integer(${ik}$) :: i2, i4, incol, j, jbot, jcol, jlen, jrow, jtop, k, k1, kdu, kms, krcol,&
                      m, m22, mbot, mtop, nbmps, ndcol, ns, nu
           logical(lk) :: accum, bmp22
           ! Intrinsic Functions 
           ! Local Arrays 
           complex(dp) :: vt(3_${ik}$)
           ! Statement Functions 
           real(dp) :: cabs1
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=dp) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           ! ==== if there are no shifts, then there is nothing to do. ====
           if( nshfts<2 )return
           ! ==== if the active block is empty or 1-by-1, then there
           ! .    is nothing to do. ====
           if( ktop>=kbot )return
           ! ==== nshfts is supposed to be even, but if it is odd,
           ! .    then simply reduce it by cone.  ====
           ns = nshfts - mod( nshfts, 2_${ik}$ )
           ! ==== machine constants for deflation ====
           safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_dlabad( safmin, safmax )
           ulp = stdlib${ii}$_dlamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=dp) / ulp )
           ! ==== use accumulated reflections to update far-from-diagonal
           ! .    entries ? ====
           accum = ( kacc22==1_${ik}$ ) .or. ( kacc22==2_${ik}$ )
           ! ==== clear trash ====
           if( ktop+2<=kbot )h( ktop+2, ktop ) = czero
           ! ==== nbmps = number of 2-shift bulges in the chain ====
           nbmps = ns / 2_${ik}$
           ! ==== kdu = width of slab ====
           kdu = 4_${ik}$*nbmps
           ! ==== create and chase chains of nbmps bulges ====
           loop_180: do incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
              ! jtop = index from which updates from the right start.
              if( accum ) then
                 jtop = max( ktop, incol )
              else if( wantt ) then
                 jtop = 1_${ik}$
              else
                 jtop = ktop
              end if
              ndcol = incol + kdu
              if( accum )call stdlib${ii}$_zlaset( 'ALL', kdu, kdu, czero, cone, u, ldu )
              ! ==== near-the-diagonal bulge chase.  the following loop
              ! .    performs the near-the-diagonal part of a small bulge
              ! .    multi-shift qr sweep.  each 4*nbmps column diagonal
              ! .    chunk extends from column incol to column ndcol
              ! .    (including both column incol and column ndcol). the
              ! .    following loop chases a 2*nbmps+1 column long chain of
              ! .    nbmps bulges 2*nbmps columns to the right.  (incol
              ! .    may be less than ktop and and ndcol may be greater than
              ! .    kbot indicating phantom columns from which to chase
              ! .    bulges before they are actually introduced or to which
              ! .    to chase bulges beyond column kbot.)  ====
              loop_145: do krcol = incol, min( incol+2*nbmps-1, kbot-2 )
                 ! ==== bulges number mtop to mbot are active double implicit
                 ! .    shift bulges.  there may or may not also be small
                 ! .    2-by-2 bulge, if there is room.  the inactive bulges
                 ! .    (if any) must wait until the active bulges have moved
                 ! .    down the diagonal to make room.  the phantom matrix
                 ! .    paradigm described above helps keep track.  ====
                 mtop = max( 1_${ik}$, ( ktop-krcol ) / 2_${ik}$+1 )
                 mbot = min( nbmps, ( kbot-krcol-1 ) / 2_${ik}$ )
                 m22 = mbot + 1_${ik}$
                 bmp22 = ( mbot<nbmps ) .and. ( krcol+2*( m22-1 ) )==( kbot-2 )
                 ! ==== generate reflections to chase the chain right
                 ! .    cone column.  (the minimum value of k is ktop-1.) ====
                 if ( bmp22 ) then
                    ! ==== special case: 2-by-2 reflection at bottom treated
                    ! .    separately ====
                    k = krcol + 2_${ik}$*( m22-1 )
                    if( k==ktop-1 ) then
                       call stdlib${ii}$_zlaqr1( 2_${ik}$, h( k+1, k+1 ), ldh, s( 2_${ik}$*m22-1 ),s( 2_${ik}$*m22 ), v( 1_${ik}$, &
                                 m22 ) )
                       beta = v( 1_${ik}$, m22 )
                       call stdlib${ii}$_zlarfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) )
                    else
                       beta = h( k+1, k )
                       v( 2_${ik}$, m22 ) = h( k+2, k )
                       call stdlib${ii}$_zlarfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) )
                       h( k+1, k ) = beta
                       h( k+2, k ) = czero
                    end if
                    ! ==== perform update from right within
                    ! .    computational window. ====
                    do j = jtop, min( kbot, k+3 )
                       refsum = v( 1_${ik}$, m22 )*( h( j, k+1 )+v( 2_${ik}$, m22 )*h( j, k+2 ) )
                       h( j, k+1 ) = h( j, k+1 ) - refsum
                       h( j, k+2 ) = h( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m22 ) )
                    end do
                    ! ==== perform update from left within
                    ! .    computational window. ====
                    if( accum ) then
                       jbot = min( ndcol, kbot )
                    else if( wantt ) then
                       jbot = n
                    else
                       jbot = kbot
                    end if
                    do j = k+1, jbot
                       refsum = conjg( v( 1_${ik}$, m22 ) )*( h( k+1, j )+conjg( v( 2_${ik}$, m22 ) )*h( k+2, j &
                                 ) )
                       h( k+1, j ) = h( k+1, j ) - refsum
                       h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m22 )
                    end do
                    ! ==== the following convergence test requires that
                    ! .    the tradition small-compared-to-nearby-diagonals
                    ! .    criterion and the ahues
                    ! .    criteria both be satisfied.  the latter improves
                    ! .    accuracy in some examples. falling back on an
                    ! .    alternate convergence criterion when tst1 or tst2
                    ! .    is czero (as done here) is traditional but probably
                    ! .    unnecessary. ====
                    if( k>=ktop ) then
                       if( h( k+1, k )/=czero ) then
                          tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
                          if( tst1==zero ) then
                             if( k>=ktop+1 )tst1 = tst1 + cabs1( h( k, k-1 ) )
                             if( k>=ktop+2 )tst1 = tst1 + cabs1( h( k, k-2 ) )
                             if( k>=ktop+3 )tst1 = tst1 + cabs1( h( k, k-3 ) )
                             if( k<=kbot-2 )tst1 = tst1 + cabs1( h( k+2, k+1 ) )
                             if( k<=kbot-3 )tst1 = tst1 + cabs1( h( k+3, k+1 ) )
                             if( k<=kbot-4 )tst1 = tst1 + cabs1( h( k+4, k+1 ) )
                          end if
                          if( cabs1( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) ) then
                             h12 = max( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) )
                             h21 = min( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) )
                             h11 = max( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) )
                                       
                             h22 = min( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) )
                                       
                             scl = h11 + h12
                             tst2 = h22*( h11 / scl )
                             if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) )h( &
                                       k+1, k ) = czero
                          end if
                       end if
                    end if
                    ! ==== accumulate orthogonal transformations. ====
                    if( accum ) then
                       kms = k - incol
                       do j = max( 1, ktop-incol ), kdu
                          refsum = v( 1_${ik}$, m22 )*( u( j, kms+1 )+v( 2_${ik}$, m22 )*u( j, kms+2 ) )
                          u( j, kms+1 ) = u( j, kms+1 ) - refsum
                          u( j, kms+2 ) = u( j, kms+2 ) -refsum*conjg( v( 2_${ik}$, m22 ) )
                       end do
                    else if( wantz ) then
                       do j = iloz, ihiz
                          refsum = v( 1_${ik}$, m22 )*( z( j, k+1 )+v( 2_${ik}$, m22 )*z( j, k+2 ) )
                          z( j, k+1 ) = z( j, k+1 ) - refsum
                          z( j, k+2 ) = z( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m22 ) )
                       end do
                    end if
                 end if
                 ! ==== normal case: chain of 3-by-3 reflections ====
                 loop_80: do m = mbot, mtop, -1
                    k = krcol + 2_${ik}$*( m-1 )
                    if( k==ktop-1 ) then
                       call stdlib${ii}$_zlaqr1( 3_${ik}$, h( ktop, ktop ), ldh, s( 2_${ik}$*m-1 ),s( 2_${ik}$*m ), v( 1_${ik}$, m )&
                                  )
                       alpha = v( 1_${ik}$, m )
                       call stdlib${ii}$_zlarfg( 3_${ik}$, alpha, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) )
                    else
                       ! ==== perform delayed transformation of row below
                       ! .    mth bulge. exploit fact that first two elements
                       ! .    of row are actually czero. ====
                       refsum = v( 1_${ik}$, m )*v( 3_${ik}$, m )*h( k+3, k+2 )
                       h( k+3, k   ) = -refsum
                       h( k+3, k+1 ) = -refsum*conjg( v( 2_${ik}$, m ) )
                       h( k+3, k+2 ) = h( k+3, k+2 ) -refsum*conjg( v( 3_${ik}$, m ) )
                       ! ==== calculate reflection to move
                       ! .    mth bulge cone step. ====
                       beta      = h( k+1, k )
                       v( 2_${ik}$, m ) = h( k+2, k )
                       v( 3_${ik}$, m ) = h( k+3, k )
                       call stdlib${ii}$_zlarfg( 3_${ik}$, beta, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) )
                       ! ==== a bulge may collapse because of vigilant
                       ! .    deflation or destructive underflow.  in the
                       ! .    underflow case, try the two-small-subdiagonals
                       ! .    trick to try to reinflate the bulge.  ====
                       if( h( k+3, k )/=czero .or. h( k+3, k+1 )/=czero .or. h( k+3, k+2 )==czero &
                                 ) then
                          ! ==== typical case: not collapsed (yet). ====
                          h( k+1, k ) = beta
                          h( k+2, k ) = czero
                          h( k+3, k ) = czero
                       else
                          ! ==== atypical case: collapsed.  attempt to
                          ! .    reintroduce ignoring h(k+1,k) and h(k+2,k).
                          ! .    if the fill resulting from the new
                          ! .    reflector is too large, then abandon it.
                          ! .    otherwise, use the new cone. ====
                          call stdlib${ii}$_zlaqr1( 3_${ik}$, h( k+1, k+1 ), ldh, s( 2_${ik}$*m-1 ),s( 2_${ik}$*m ), vt )
                                    
                          alpha = vt( 1_${ik}$ )
                          call stdlib${ii}$_zlarfg( 3_${ik}$, alpha, vt( 2_${ik}$ ), 1_${ik}$, vt( 1_${ik}$ ) )
                          refsum = conjg( vt( 1_${ik}$ ) )*( h( k+1, k )+conjg( vt( 2_${ik}$ ) )*h( k+2, k ) )
                                    
                          if( cabs1( h( k+2, k )-refsum*vt( 2_${ik}$ ) )+cabs1( refsum*vt( 3_${ik}$ ) )>ulp*( &
                          cabs1( h( k, k ) )+cabs1( h( k+1,k+1 ) )+cabs1( h( k+2, k+2 ) ) ) ) &
                                    then
                             ! ==== starting a new bulge here would
                             ! .    create non-negligible fill.  use
                             ! .    the old cone with trepidation. ====
                             h( k+1, k ) = beta
                             h( k+2, k ) = czero
                             h( k+3, k ) = czero
                          else
                             ! ==== starting a new bulge here would
                             ! .    create only negligible fill.
                             ! .    replace the old reflector with
                             ! .    the new cone. ====
                             h( k+1, k ) = h( k+1, k ) - refsum
                             h( k+2, k ) = czero
                             h( k+3, k ) = czero
                             v( 1_${ik}$, m ) = vt( 1_${ik}$ )
                             v( 2_${ik}$, m ) = vt( 2_${ik}$ )
                             v( 3_${ik}$, m ) = vt( 3_${ik}$ )
                          end if
                       end if
                    end if
                    ! ====  apply reflection from the right and
                    ! .     the first column of update from the left.
                    ! .     these updates are required for the vigilant
                    ! .     deflation check. we still delay most of the
                    ! .     updates from the left for efficiency. ====
                    do j = jtop, min( kbot, k+3 )
                       refsum = v( 1_${ik}$, m )*( h( j, k+1 )+v( 2_${ik}$, m )*h( j, k+2 )+v( 3_${ik}$, m )*h( j, k+3 &
                                 ) )
                       h( j, k+1 ) = h( j, k+1 ) - refsum
                       h( j, k+2 ) = h( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m ) )
                       h( j, k+3 ) = h( j, k+3 ) -refsum*conjg( v( 3_${ik}$, m ) )
                    end do
                    ! ==== perform update from left for subsequent
                    ! .    column. ====
                    refsum =  conjg( v( 1_${ik}$, m ) )*( h( k+1, k+1 )+conjg( v( 2_${ik}$, m ) )*h( k+2, k+1 )+&
                              conjg( v( 3_${ik}$, m ) )*h( k+3, k+1 ) )
                    h( k+1, k+1 ) = h( k+1, k+1 ) - refsum
                    h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2_${ik}$, m )
                    h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3_${ik}$, m )
                    ! ==== the following convergence test requires that
                    ! .    the tradition small-compared-to-nearby-diagonals
                    ! .    criterion and the ahues
                    ! .    criteria both be satisfied.  the latter improves
                    ! .    accuracy in some examples. falling back on an
                    ! .    alternate convergence criterion when tst1 or tst2
                    ! .    is czero (as done here) is traditional but probably
                    ! .    unnecessary. ====
                    if( k<ktop)cycle
                    if( h( k+1, k )/=czero ) then
                       tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
                       if( tst1==zero ) then
                          if( k>=ktop+1 )tst1 = tst1 + cabs1( h( k, k-1 ) )
                          if( k>=ktop+2 )tst1 = tst1 + cabs1( h( k, k-2 ) )
                          if( k>=ktop+3 )tst1 = tst1 + cabs1( h( k, k-3 ) )
                          if( k<=kbot-2 )tst1 = tst1 + cabs1( h( k+2, k+1 ) )
                          if( k<=kbot-3 )tst1 = tst1 + cabs1( h( k+3, k+1 ) )
                          if( k<=kbot-4 )tst1 = tst1 + cabs1( h( k+4, k+1 ) )
                       end if
                       if( cabs1( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then
                          h12 = max( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) )
                          h21 = min( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) )
                          h11 = max( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) )
                                    
                          h22 = min( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) )
                                    
                          scl = h11 + h12
                          tst2 = h22*( h11 / scl )
                          if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) )h( k+1,&
                                     k ) = czero
                       end if
                    end if
                 end do loop_80
                 ! ==== multiply h by reflections from the left ====
                 if( accum ) then
                    jbot = min( ndcol, kbot )
                 else if( wantt ) then
                    jbot = n
                 else
                    jbot = kbot
                 end if
                 do m = mbot, mtop, -1
                    k = krcol + 2_${ik}$*( m-1 )
                    do j = max( ktop, krcol + 2*m ), jbot
                       refsum = conjg( v( 1_${ik}$, m ) )*( h( k+1, j )+conjg( v( 2_${ik}$, m ) )*h( k+2, j )+&
                                 conjg( v( 3_${ik}$, m ) )*h( k+3, j ) )
                       h( k+1, j ) = h( k+1, j ) - refsum
                       h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m )
                       h( k+3, j ) = h( k+3, j ) - refsum*v( 3_${ik}$, m )
                    end do
                 end do
                 ! ==== accumulate orthogonal transformations. ====
                 if( accum ) then
                    ! ==== accumulate u. (if needed, update z later
                    ! .    with an efficient matrix-matrix
                    ! .    multiply.) ====
                    do m = mbot, mtop, -1
                       k = krcol + 2_${ik}$*( m-1 )
                       kms = k - incol
                       i2 = max( 1_${ik}$, ktop-incol )
                       i2 = max( i2, kms-(krcol-incol)+1_${ik}$ )
                       i4 = min( kdu, krcol + 2_${ik}$*( mbot-1 ) - incol + 5_${ik}$ )
                       do j = i2, i4
                          refsum = v( 1_${ik}$, m )*( u( j, kms+1 )+v( 2_${ik}$, m )*u( j, kms+2 )+v( 3_${ik}$, m )*u( &
                                    j, kms+3 ) )
                          u( j, kms+1 ) = u( j, kms+1 ) - refsum
                          u( j, kms+2 ) = u( j, kms+2 ) -refsum*conjg( v( 2_${ik}$, m ) )
                          u( j, kms+3 ) = u( j, kms+3 ) -refsum*conjg( v( 3_${ik}$, m ) )
                       end do
                    end do
                 else if( wantz ) then
                    ! ==== u is not accumulated, so update z
                    ! .    now by multiplying by reflections
                    ! .    from the right. ====
                    do m = mbot, mtop, -1
                       k = krcol + 2_${ik}$*( m-1 )
                       do j = iloz, ihiz
                          refsum = v( 1_${ik}$, m )*( z( j, k+1 )+v( 2_${ik}$, m )*z( j, k+2 )+v( 3_${ik}$, m )*z( j, &
                                    k+3 ) )
                          z( j, k+1 ) = z( j, k+1 ) - refsum
                          z( j, k+2 ) = z( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m ) )
                          z( j, k+3 ) = z( j, k+3 ) -refsum*conjg( v( 3_${ik}$, m ) )
                       end do
                    end do
                 end if
                 ! ==== end of near-the-diagonal bulge chase. ====
              end do loop_145
              ! ==== use u (if accumulated) to update far-from-diagonal
              ! .    entries in h.  if required, use u to update z as
              ! .    well. ====
              if( accum ) then
                 if( wantt ) then
                    jtop = 1_${ik}$
                    jbot = n
                 else
                    jtop = ktop
                    jbot = kbot
                 end if
                 k1 = max( 1_${ik}$, ktop-incol )
                 nu = ( kdu-max( 0_${ik}$, ndcol-kbot ) ) - k1 + 1_${ik}$
                 ! ==== horizontal multiply ====
                 do jcol = min( ndcol, kbot ) + 1, jbot, nh
                    jlen = min( nh, jbot-jcol+1 )
                    call stdlib${ii}$_zgemm( 'C', 'N', nu, jlen, nu, cone, u( k1, k1 ),ldu, h( incol+k1,&
                               jcol ), ldh, czero, wh,ldwh )
                    call stdlib${ii}$_zlacpy( 'ALL', nu, jlen, wh, ldwh,h( incol+k1, jcol ), ldh )
                              
                 end do
                 ! ==== vertical multiply ====
                 do jrow = jtop, max( ktop, incol ) - 1, nv
                    jlen = min( nv, max( ktop, incol )-jrow )
                    call stdlib${ii}$_zgemm( 'N', 'N', jlen, nu, nu, cone,h( jrow, incol+k1 ), ldh, u( &
                              k1, k1 ),ldu, czero, wv, ldwv )
                    call stdlib${ii}$_zlacpy( 'ALL', jlen, nu, wv, ldwv,h( jrow, incol+k1 ), ldh )
                              
                 end do
                 ! ==== z multiply (also vertical) ====
                 if( wantz ) then
                    do jrow = iloz, ihiz, nv
                       jlen = min( nv, ihiz-jrow+1 )
                       call stdlib${ii}$_zgemm( 'N', 'N', jlen, nu, nu, cone,z( jrow, incol+k1 ), ldz, &
                                 u( k1, k1 ),ldu, czero, wv, ldwv )
                       call stdlib${ii}$_zlacpy( 'ALL', jlen, nu, wv, ldwv,z( jrow, incol+k1 ), ldz )
                                 
                    end do
                 end if
              end if
           end do loop_180
     end subroutine stdlib${ii}$_zlaqr5

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$laqr5( wantt, wantz, kacc22, n, ktop, kbot, nshfts, s,h, ldh, iloz, &
     !! ZLAQR5:, called by ZLAQR0, performs a
     !! single small-bulge multi-shift QR sweep.
               ihiz, z, ldz, v, ldv, u, ldu, nv,wv, ldwv, nh, wh, ldwh )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: ihiz, iloz, kacc22, kbot, ktop, ldh, ldu, ldv, ldwh, ldwv, &
                     ldz, n, nh, nshfts, nv
           logical(lk), intent(in) :: wantt, wantz
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: h(ldh,*), s(*), z(ldz,*)
           complex(${ck}$), intent(out) :: u(ldu,*), v(ldv,*), wh(ldwh,*), wv(ldwv,*)
        ! ================================================================
           ! Parameters 
           
           ! Local Scalars 
           complex(${ck}$) :: alpha, beta, cdum, refsum
           real(${ck}$) :: h11, h12, h21, h22, safmax, safmin, scl, smlnum, tst1, tst2, ulp
           integer(${ik}$) :: i2, i4, incol, j, jbot, jcol, jlen, jrow, jtop, k, k1, kdu, kms, krcol,&
                      m, m22, mbot, mtop, nbmps, ndcol, ns, nu
           logical(lk) :: accum, bmp22
           ! Intrinsic Functions 
           ! Local Arrays 
           complex(${ck}$) :: vt(3_${ik}$)
           ! Statement Functions 
           real(${ck}$) :: cabs1
           ! Statement Function Definitions 
           cabs1( cdum ) = abs( real( cdum,KIND=${ck}$) ) + abs( aimag( cdum ) )
           ! Executable Statements 
           ! ==== if there are no shifts, then there is nothing to do. ====
           if( nshfts<2 )return
           ! ==== if the active block is empty or 1-by-1, then there
           ! .    is nothing to do. ====
           if( ktop>=kbot )return
           ! ==== nshfts is supposed to be even, but if it is odd,
           ! .    then simply reduce it by cone.  ====
           ns = nshfts - mod( nshfts, 2_${ik}$ )
           ! ==== machine constants for deflation ====
           safmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'SAFE MINIMUM' )
           safmax = one / safmin
           call stdlib${ii}$_${c2ri(ci)}$labad( safmin, safmax )
           ulp = stdlib${ii}$_${c2ri(ci)}$lamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=${ck}$) / ulp )
           ! ==== use accumulated reflections to update far-from-diagonal
           ! .    entries ? ====
           accum = ( kacc22==1_${ik}$ ) .or. ( kacc22==2_${ik}$ )
           ! ==== clear trash ====
           if( ktop+2<=kbot )h( ktop+2, ktop ) = czero
           ! ==== nbmps = number of 2-shift bulges in the chain ====
           nbmps = ns / 2_${ik}$
           ! ==== kdu = width of slab ====
           kdu = 4_${ik}$*nbmps
           ! ==== create and chase chains of nbmps bulges ====
           loop_180: do incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
              ! jtop = index from which updates from the right start.
              if( accum ) then
                 jtop = max( ktop, incol )
              else if( wantt ) then
                 jtop = 1_${ik}$
              else
                 jtop = ktop
              end if
              ndcol = incol + kdu
              if( accum )call stdlib${ii}$_${ci}$laset( 'ALL', kdu, kdu, czero, cone, u, ldu )
              ! ==== near-the-diagonal bulge chase.  the following loop
              ! .    performs the near-the-diagonal part of a small bulge
              ! .    multi-shift qr sweep.  each 4*nbmps column diagonal
              ! .    chunk extends from column incol to column ndcol
              ! .    (including both column incol and column ndcol). the
              ! .    following loop chases a 2*nbmps+1 column long chain of
              ! .    nbmps bulges 2*nbmps columns to the right.  (incol
              ! .    may be less than ktop and and ndcol may be greater than
              ! .    kbot indicating phantom columns from which to chase
              ! .    bulges before they are actually introduced or to which
              ! .    to chase bulges beyond column kbot.)  ====
              loop_145: do krcol = incol, min( incol+2*nbmps-1, kbot-2 )
                 ! ==== bulges number mtop to mbot are active double implicit
                 ! .    shift bulges.  there may or may not also be small
                 ! .    2-by-2 bulge, if there is room.  the inactive bulges
                 ! .    (if any) must wait until the active bulges have moved
                 ! .    down the diagonal to make room.  the phantom matrix
                 ! .    paradigm described above helps keep track.  ====
                 mtop = max( 1_${ik}$, ( ktop-krcol ) / 2_${ik}$+1 )
                 mbot = min( nbmps, ( kbot-krcol-1 ) / 2_${ik}$ )
                 m22 = mbot + 1_${ik}$
                 bmp22 = ( mbot<nbmps ) .and. ( krcol+2*( m22-1 ) )==( kbot-2 )
                 ! ==== generate reflections to chase the chain right
                 ! .    cone column.  (the minimum value of k is ktop-1.) ====
                 if ( bmp22 ) then
                    ! ==== special case: 2-by-2 reflection at bottom treated
                    ! .    separately ====
                    k = krcol + 2_${ik}$*( m22-1 )
                    if( k==ktop-1 ) then
                       call stdlib${ii}$_${ci}$laqr1( 2_${ik}$, h( k+1, k+1 ), ldh, s( 2_${ik}$*m22-1 ),s( 2_${ik}$*m22 ), v( 1_${ik}$, &
                                 m22 ) )
                       beta = v( 1_${ik}$, m22 )
                       call stdlib${ii}$_${ci}$larfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) )
                    else
                       beta = h( k+1, k )
                       v( 2_${ik}$, m22 ) = h( k+2, k )
                       call stdlib${ii}$_${ci}$larfg( 2_${ik}$, beta, v( 2_${ik}$, m22 ), 1_${ik}$, v( 1_${ik}$, m22 ) )
                       h( k+1, k ) = beta
                       h( k+2, k ) = czero
                    end if
                    ! ==== perform update from right within
                    ! .    computational window. ====
                    do j = jtop, min( kbot, k+3 )
                       refsum = v( 1_${ik}$, m22 )*( h( j, k+1 )+v( 2_${ik}$, m22 )*h( j, k+2 ) )
                       h( j, k+1 ) = h( j, k+1 ) - refsum
                       h( j, k+2 ) = h( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m22 ) )
                    end do
                    ! ==== perform update from left within
                    ! .    computational window. ====
                    if( accum ) then
                       jbot = min( ndcol, kbot )
                    else if( wantt ) then
                       jbot = n
                    else
                       jbot = kbot
                    end if
                    do j = k+1, jbot
                       refsum = conjg( v( 1_${ik}$, m22 ) )*( h( k+1, j )+conjg( v( 2_${ik}$, m22 ) )*h( k+2, j &
                                 ) )
                       h( k+1, j ) = h( k+1, j ) - refsum
                       h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m22 )
                    end do
                    ! ==== the following convergence test requires that
                    ! .    the tradition small-compared-to-nearby-diagonals
                    ! .    criterion and the ahues
                    ! .    criteria both be satisfied.  the latter improves
                    ! .    accuracy in some examples. falling back on an
                    ! .    alternate convergence criterion when tst1 or tst2
                    ! .    is czero (as done here) is traditional but probably
                    ! .    unnecessary. ====
                    if( k>=ktop ) then
                       if( h( k+1, k )/=czero ) then
                          tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
                          if( tst1==zero ) then
                             if( k>=ktop+1 )tst1 = tst1 + cabs1( h( k, k-1 ) )
                             if( k>=ktop+2 )tst1 = tst1 + cabs1( h( k, k-2 ) )
                             if( k>=ktop+3 )tst1 = tst1 + cabs1( h( k, k-3 ) )
                             if( k<=kbot-2 )tst1 = tst1 + cabs1( h( k+2, k+1 ) )
                             if( k<=kbot-3 )tst1 = tst1 + cabs1( h( k+3, k+1 ) )
                             if( k<=kbot-4 )tst1 = tst1 + cabs1( h( k+4, k+1 ) )
                          end if
                          if( cabs1( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) ) then
                             h12 = max( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) )
                             h21 = min( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) )
                             h11 = max( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) )
                                       
                             h22 = min( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) )
                                       
                             scl = h11 + h12
                             tst2 = h22*( h11 / scl )
                             if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) )h( &
                                       k+1, k ) = czero
                          end if
                       end if
                    end if
                    ! ==== accumulate orthogonal transformations. ====
                    if( accum ) then
                       kms = k - incol
                       do j = max( 1, ktop-incol ), kdu
                          refsum = v( 1_${ik}$, m22 )*( u( j, kms+1 )+v( 2_${ik}$, m22 )*u( j, kms+2 ) )
                          u( j, kms+1 ) = u( j, kms+1 ) - refsum
                          u( j, kms+2 ) = u( j, kms+2 ) -refsum*conjg( v( 2_${ik}$, m22 ) )
                       end do
                    else if( wantz ) then
                       do j = iloz, ihiz
                          refsum = v( 1_${ik}$, m22 )*( z( j, k+1 )+v( 2_${ik}$, m22 )*z( j, k+2 ) )
                          z( j, k+1 ) = z( j, k+1 ) - refsum
                          z( j, k+2 ) = z( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m22 ) )
                       end do
                    end if
                 end if
                 ! ==== normal case: chain of 3-by-3 reflections ====
                 loop_80: do m = mbot, mtop, -1
                    k = krcol + 2_${ik}$*( m-1 )
                    if( k==ktop-1 ) then
                       call stdlib${ii}$_${ci}$laqr1( 3_${ik}$, h( ktop, ktop ), ldh, s( 2_${ik}$*m-1 ),s( 2_${ik}$*m ), v( 1_${ik}$, m )&
                                  )
                       alpha = v( 1_${ik}$, m )
                       call stdlib${ii}$_${ci}$larfg( 3_${ik}$, alpha, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) )
                    else
                       ! ==== perform delayed transformation of row below
                       ! .    mth bulge. exploit fact that first two elements
                       ! .    of row are actually czero. ====
                       refsum = v( 1_${ik}$, m )*v( 3_${ik}$, m )*h( k+3, k+2 )
                       h( k+3, k   ) = -refsum
                       h( k+3, k+1 ) = -refsum*conjg( v( 2_${ik}$, m ) )
                       h( k+3, k+2 ) = h( k+3, k+2 ) -refsum*conjg( v( 3_${ik}$, m ) )
                       ! ==== calculate reflection to move
                       ! .    mth bulge cone step. ====
                       beta      = h( k+1, k )
                       v( 2_${ik}$, m ) = h( k+2, k )
                       v( 3_${ik}$, m ) = h( k+3, k )
                       call stdlib${ii}$_${ci}$larfg( 3_${ik}$, beta, v( 2_${ik}$, m ), 1_${ik}$, v( 1_${ik}$, m ) )
                       ! ==== a bulge may collapse because of vigilant
                       ! .    deflation or destructive underflow.  in the
                       ! .    underflow case, try the two-small-subdiagonals
                       ! .    trick to try to reinflate the bulge.  ====
                       if( h( k+3, k )/=czero .or. h( k+3, k+1 )/=czero .or. h( k+3, k+2 )==czero &
                                 ) then
                          ! ==== typical case: not collapsed (yet). ====
                          h( k+1, k ) = beta
                          h( k+2, k ) = czero
                          h( k+3, k ) = czero
                       else
                          ! ==== atypical case: collapsed.  attempt to
                          ! .    reintroduce ignoring h(k+1,k) and h(k+2,k).
                          ! .    if the fill resulting from the new
                          ! .    reflector is too large, then abandon it.
                          ! .    otherwise, use the new cone. ====
                          call stdlib${ii}$_${ci}$laqr1( 3_${ik}$, h( k+1, k+1 ), ldh, s( 2_${ik}$*m-1 ),s( 2_${ik}$*m ), vt )
                                    
                          alpha = vt( 1_${ik}$ )
                          call stdlib${ii}$_${ci}$larfg( 3_${ik}$, alpha, vt( 2_${ik}$ ), 1_${ik}$, vt( 1_${ik}$ ) )
                          refsum = conjg( vt( 1_${ik}$ ) )*( h( k+1, k )+conjg( vt( 2_${ik}$ ) )*h( k+2, k ) )
                                    
                          if( cabs1( h( k+2, k )-refsum*vt( 2_${ik}$ ) )+cabs1( refsum*vt( 3_${ik}$ ) )>ulp*( &
                          cabs1( h( k, k ) )+cabs1( h( k+1,k+1 ) )+cabs1( h( k+2, k+2 ) ) ) ) &
                                    then
                             ! ==== starting a new bulge here would
                             ! .    create non-negligible fill.  use
                             ! .    the old cone with trepidation. ====
                             h( k+1, k ) = beta
                             h( k+2, k ) = czero
                             h( k+3, k ) = czero
                          else
                             ! ==== starting a new bulge here would
                             ! .    create only negligible fill.
                             ! .    replace the old reflector with
                             ! .    the new cone. ====
                             h( k+1, k ) = h( k+1, k ) - refsum
                             h( k+2, k ) = czero
                             h( k+3, k ) = czero
                             v( 1_${ik}$, m ) = vt( 1_${ik}$ )
                             v( 2_${ik}$, m ) = vt( 2_${ik}$ )
                             v( 3_${ik}$, m ) = vt( 3_${ik}$ )
                          end if
                       end if
                    end if
                    ! ====  apply reflection from the right and
                    ! .     the first column of update from the left.
                    ! .     these updates are required for the vigilant
                    ! .     deflation check. we still delay most of the
                    ! .     updates from the left for efficiency. ====
                    do j = jtop, min( kbot, k+3 )
                       refsum = v( 1_${ik}$, m )*( h( j, k+1 )+v( 2_${ik}$, m )*h( j, k+2 )+v( 3_${ik}$, m )*h( j, k+3 &
                                 ) )
                       h( j, k+1 ) = h( j, k+1 ) - refsum
                       h( j, k+2 ) = h( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m ) )
                       h( j, k+3 ) = h( j, k+3 ) -refsum*conjg( v( 3_${ik}$, m ) )
                    end do
                    ! ==== perform update from left for subsequent
                    ! .    column. ====
                    refsum =  conjg( v( 1_${ik}$, m ) )*( h( k+1, k+1 )+conjg( v( 2_${ik}$, m ) )*h( k+2, k+1 )+&
                              conjg( v( 3_${ik}$, m ) )*h( k+3, k+1 ) )
                    h( k+1, k+1 ) = h( k+1, k+1 ) - refsum
                    h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2_${ik}$, m )
                    h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3_${ik}$, m )
                    ! ==== the following convergence test requires that
                    ! .    the tradition small-compared-to-nearby-diagonals
                    ! .    criterion and the ahues
                    ! .    criteria both be satisfied.  the latter improves
                    ! .    accuracy in some examples. falling back on an
                    ! .    alternate convergence criterion when tst1 or tst2
                    ! .    is czero (as done here) is traditional but probably
                    ! .    unnecessary. ====
                    if( k<ktop)cycle
                    if( h( k+1, k )/=czero ) then
                       tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
                       if( tst1==zero ) then
                          if( k>=ktop+1 )tst1 = tst1 + cabs1( h( k, k-1 ) )
                          if( k>=ktop+2 )tst1 = tst1 + cabs1( h( k, k-2 ) )
                          if( k>=ktop+3 )tst1 = tst1 + cabs1( h( k, k-3 ) )
                          if( k<=kbot-2 )tst1 = tst1 + cabs1( h( k+2, k+1 ) )
                          if( k<=kbot-3 )tst1 = tst1 + cabs1( h( k+3, k+1 ) )
                          if( k<=kbot-4 )tst1 = tst1 + cabs1( h( k+4, k+1 ) )
                       end if
                       if( cabs1( h( k+1, k ) )<=max( smlnum, ulp*tst1 ) )then
                          h12 = max( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) )
                          h21 = min( cabs1( h( k+1, k ) ),cabs1( h( k, k+1 ) ) )
                          h11 = max( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) )
                                    
                          h22 = min( cabs1( h( k+1, k+1 ) ),cabs1( h( k, k )-h( k+1, k+1 ) ) )
                                    
                          scl = h11 + h12
                          tst2 = h22*( h11 / scl )
                          if( tst2==zero .or. h21*( h12 / scl )<=max( smlnum, ulp*tst2 ) )h( k+1,&
                                     k ) = czero
                       end if
                    end if
                 end do loop_80
                 ! ==== multiply h by reflections from the left ====
                 if( accum ) then
                    jbot = min( ndcol, kbot )
                 else if( wantt ) then
                    jbot = n
                 else
                    jbot = kbot
                 end if
                 do m = mbot, mtop, -1
                    k = krcol + 2_${ik}$*( m-1 )
                    do j = max( ktop, krcol + 2*m ), jbot
                       refsum = conjg( v( 1_${ik}$, m ) )*( h( k+1, j )+conjg( v( 2_${ik}$, m ) )*h( k+2, j )+&
                                 conjg( v( 3_${ik}$, m ) )*h( k+3, j ) )
                       h( k+1, j ) = h( k+1, j ) - refsum
                       h( k+2, j ) = h( k+2, j ) - refsum*v( 2_${ik}$, m )
                       h( k+3, j ) = h( k+3, j ) - refsum*v( 3_${ik}$, m )
                    end do
                 end do
                 ! ==== accumulate orthogonal transformations. ====
                 if( accum ) then
                    ! ==== accumulate u. (if needed, update z later
                    ! .    with an efficient matrix-matrix
                    ! .    multiply.) ====
                    do m = mbot, mtop, -1
                       k = krcol + 2_${ik}$*( m-1 )
                       kms = k - incol
                       i2 = max( 1_${ik}$, ktop-incol )
                       i2 = max( i2, kms-(krcol-incol)+1_${ik}$ )
                       i4 = min( kdu, krcol + 2_${ik}$*( mbot-1 ) - incol + 5_${ik}$ )
                       do j = i2, i4
                          refsum = v( 1_${ik}$, m )*( u( j, kms+1 )+v( 2_${ik}$, m )*u( j, kms+2 )+v( 3_${ik}$, m )*u( &
                                    j, kms+3 ) )
                          u( j, kms+1 ) = u( j, kms+1 ) - refsum
                          u( j, kms+2 ) = u( j, kms+2 ) -refsum*conjg( v( 2_${ik}$, m ) )
                          u( j, kms+3 ) = u( j, kms+3 ) -refsum*conjg( v( 3_${ik}$, m ) )
                       end do
                    end do
                 else if( wantz ) then
                    ! ==== u is not accumulated, so update z
                    ! .    now by multiplying by reflections
                    ! .    from the right. ====
                    do m = mbot, mtop, -1
                       k = krcol + 2_${ik}$*( m-1 )
                       do j = iloz, ihiz
                          refsum = v( 1_${ik}$, m )*( z( j, k+1 )+v( 2_${ik}$, m )*z( j, k+2 )+v( 3_${ik}$, m )*z( j, &
                                    k+3 ) )
                          z( j, k+1 ) = z( j, k+1 ) - refsum
                          z( j, k+2 ) = z( j, k+2 ) -refsum*conjg( v( 2_${ik}$, m ) )
                          z( j, k+3 ) = z( j, k+3 ) -refsum*conjg( v( 3_${ik}$, m ) )
                       end do
                    end do
                 end if
                 ! ==== end of near-the-diagonal bulge chase. ====
              end do loop_145
              ! ==== use u (if accumulated) to update far-from-diagonal
              ! .    entries in h.  if required, use u to update z as
              ! .    well. ====
              if( accum ) then
                 if( wantt ) then
                    jtop = 1_${ik}$
                    jbot = n
                 else
                    jtop = ktop
                    jbot = kbot
                 end if
                 k1 = max( 1_${ik}$, ktop-incol )
                 nu = ( kdu-max( 0_${ik}$, ndcol-kbot ) ) - k1 + 1_${ik}$
                 ! ==== horizontal multiply ====
                 do jcol = min( ndcol, kbot ) + 1, jbot, nh
                    jlen = min( nh, jbot-jcol+1 )
                    call stdlib${ii}$_${ci}$gemm( 'C', 'N', nu, jlen, nu, cone, u( k1, k1 ),ldu, h( incol+k1,&
                               jcol ), ldh, czero, wh,ldwh )
                    call stdlib${ii}$_${ci}$lacpy( 'ALL', nu, jlen, wh, ldwh,h( incol+k1, jcol ), ldh )
                              
                 end do
                 ! ==== vertical multiply ====
                 do jrow = jtop, max( ktop, incol ) - 1, nv
                    jlen = min( nv, max( ktop, incol )-jrow )
                    call stdlib${ii}$_${ci}$gemm( 'N', 'N', jlen, nu, nu, cone,h( jrow, incol+k1 ), ldh, u( &
                              k1, k1 ),ldu, czero, wv, ldwv )
                    call stdlib${ii}$_${ci}$lacpy( 'ALL', jlen, nu, wv, ldwv,h( jrow, incol+k1 ), ldh )
                              
                 end do
                 ! ==== z multiply (also vertical) ====
                 if( wantz ) then
                    do jrow = iloz, ihiz, nv
                       jlen = min( nv, ihiz-jrow+1 )
                       call stdlib${ii}$_${ci}$gemm( 'N', 'N', jlen, nu, nu, cone,z( jrow, incol+k1 ), ldz, &
                                 u( k1, k1 ),ldu, czero, wv, ldwv )
                       call stdlib${ii}$_${ci}$lacpy( 'ALL', jlen, nu, wv, ldwv,z( jrow, incol+k1 ), ldz )
                                 
                    end do
                 end if
              end if
           end do loop_180
     end subroutine stdlib${ii}$_${ci}$laqr5

#:endif
#:endfor



     recursive module subroutine stdlib${ii}$_slaqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, alphar, &
     !! SLAQZ0 computes the eigenvalues of a real matrix pair (H,T),
     !! where H is an upper Hessenberg matrix and T is upper triangular,
     !! using the double-shift QZ method.
     !! Matrix pairs of this type are produced by the reduction to
     !! generalized upper Hessenberg form of a real matrix pair (A,B):
     !! A = Q1*H*Z1**T,  B = Q1*T*Z1**T,
     !! as computed by SGGHRD.
     !! If JOB='S', then the Hessenberg-triangular pair (H,T) is
     !! also reduced to generalized Schur form,
     !! H = Q*S*Z**T,  T = Q*P*Z**T,
     !! where Q and Z are orthogonal matrices, P is an upper triangular
     !! matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2
     !! diagonal blocks.
     !! The 1-by-1 blocks correspond to real eigenvalues of the matrix pair
     !! (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of
     !! eigenvalues.
     !! Additionally, the 2-by-2 upper triangular diagonal blocks of P
     !! corresponding to 2-by-2 blocks of S are reduced to positive diagonal
     !! form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0,
     !! P(j,j) > 0, and P(j+1,j+1) > 0.
     !! Optionally, the orthogonal matrix Q from the generalized Schur
     !! factorization may be postmultiplied into an input matrix Q1, and the
     !! orthogonal matrix Z may be postmultiplied into an input matrix Z1.
     !! If Q1 and Z1 are the orthogonal matrices from SGGHRD that reduced
     !! the matrix pair (A,B) to generalized upper Hessenberg form, then the
     !! output matrices Q1*Q and Z1*Z are the orthogonal factors from the
     !! generalized Schur factorization of (A,B):
     !! A = (Q1*Q)*S*(Z1*Z)**T,  B = (Q1*Q)*P*(Z1*Z)**T.
     !! To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently,
     !! of (A,B)) are computed as a pair of values (alpha,beta), where alpha is
     !! complex and beta real.
     !! If beta is nonzero, lambda = alpha / beta is an eigenvalue of the
     !! generalized nonsymmetric eigenvalue problem (GNEP)
     !! A*x = lambda*B*x
     !! and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the
     !! alternate form of the GNEP
     !! mu*A*y = B*y.
     !! Real eigenvalues can be read directly from the generalized Schur
     !! form:
     !! alpha = S(i,i), beta = P(i,i).
     !! Ref: C.B. Moler
     !! Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973),
     !! pp. 241--256.
     !! Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ
     !! Algorithm with Aggressive Early Deflation", SIAM J. Numer.
     !! Anal., 29(2006), pp. 199--227.
     !! Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift,
     !! multipole rational QZ method with agressive early deflation"
               alphai, beta,q, ldq, z, ldz, work, lwork, rec,info )
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           character, intent( in ) :: wants, wantq, wantz
           integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,rec
           integer(${ik}$), intent( out ) :: info
           real(sp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq, * ),z( ldz, * ), alphar(&
                      * ), alphai( * ), beta( * ), work( * )
           ! ================================================================
           ! local scalars
           real(sp) :: smlnum, ulp, eshift, safmin, safmax, c1, s1, temp, swap
           integer(${ik}$) :: istart, istop, iiter, maxit, istart2, k, ld, nshifts, nblock, nw, nmin,&
            nibble, n_undeflated, n_deflated, ns, sweep_info, shiftpos, lworkreq, k2, istartm, &
            istopm, iwants, iwantq, iwantz, norm_info, aed_info, nwr, nbr, nsr, itemp1, itemp2, &
                      rcost, i
           logical(lk) :: ilschur, ilq, ilz
           character(len=3_${ik}$) :: jbcmpz
           if( stdlib_lsame( wants, 'E' ) ) then
              ilschur = .false.
              iwants = 1_${ik}$
           else if( stdlib_lsame( wants, 'S' ) ) then
              ilschur = .true.
              iwants = 2_${ik}$
           else
              iwants = 0_${ik}$
           end if
           if( stdlib_lsame( wantq, 'N' ) ) then
              ilq = .false.
              iwantq = 1_${ik}$
           else if( stdlib_lsame( wantq, 'V' ) ) then
              ilq = .true.
              iwantq = 2_${ik}$
           else if( stdlib_lsame( wantq, 'I' ) ) then
              ilq = .true.
              iwantq = 3_${ik}$
           else
              iwantq = 0_${ik}$
           end if
           if( stdlib_lsame( wantz, 'N' ) ) then
              ilz = .false.
              iwantz = 1_${ik}$
           else if( stdlib_lsame( wantz, 'V' ) ) then
              ilz = .true.
              iwantz = 2_${ik}$
           else if( stdlib_lsame( wantz, 'I' ) ) then
              ilz = .true.
              iwantz = 3_${ik}$
           else
              iwantz = 0_${ik}$
           end if
           ! check argument values
           info = 0_${ik}$
           if( iwants==0_${ik}$ ) then
              info = -1_${ik}$
           else if( iwantq==0_${ik}$ ) then
              info = -2_${ik}$
           else if( iwantz==0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( ilo<1_${ik}$ ) then
              info = -5_${ik}$
           else if( ihi>n .or. ihi<ilo-1 ) then
              info = -6_${ik}$
           else if( lda<n ) then
              info = -8_${ik}$
           else if( ldb<n ) then
              info = -10_${ik}$
           else if( ldq<1_${ik}$ .or. ( ilq .and. ldq<n ) ) then
              info = -15_${ik}$
           else if( ldz<1_${ik}$ .or. ( ilz .and. ldz<n ) ) then
              info = -17_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLAQZ0', -info )
              return
           end if
           ! quick return if possible
           if( n<=0_${ik}$ ) then
              work( 1_${ik}$ ) = real( 1_${ik}$,KIND=sp)
              return
           end if
           ! get the parameters
           jbcmpz( 1_${ik}$:1_${ik}$ ) = wants
           jbcmpz( 2_${ik}$:2_${ik}$ ) = wantq
           jbcmpz( 3_${ik}$:3_${ik}$ ) = wantz
           nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'SLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'SLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nwr = max( 2_${ik}$, nwr )
           nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr )
           nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'SLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'SLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nsr = min( nsr, ( n+6 ) / 9_${ik}$, ihi-ilo )
           nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) )
           rcost = stdlib${ii}$_ilaenv( 17_${ik}$, 'SLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           itemp1 = int( nsr/sqrt( 1_${ik}$+2*nsr/( real( rcost,KIND=sp)/100_${ik}$*n ) ),KIND=${ik}$)
           itemp1 = ( ( itemp1-1 )/4_${ik}$ )*4_${ik}$+4
           nbr = nsr+itemp1
           if( n < nmin .or. rec >= 2_${ik}$ ) then
              call stdlib${ii}$_shgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alphar, alphai,&
                         beta, q, ldq, z, ldz, work,lwork, info )
              return
           end if
           ! find out required workspace
           ! workspace query to stdlib${ii}$_slaqz3
           nw = max( nwr, nmin )
           call stdlib${ii}$_slaqz3( ilschur, ilq, ilz, n, ilo, ihi, nw, a, lda, b, ldb,q, ldq, z, ldz, &
           n_undeflated, n_deflated, alphar,alphai, beta, work, nw, work, nw, work, -1_${ik}$, rec,&
                     aed_info )
           itemp1 = int( work( 1_${ik}$ ),KIND=${ik}$)
           ! workspace query to stdlib${ii}$_slaqz4
           call stdlib${ii}$_slaqz4( ilschur, ilq, ilz, n, ilo, ihi, nsr, nbr, alphar,alphai, beta, a, &
                     lda, b, ldb, q, ldq, z, ldz, work,nbr, work, nbr, work, -1_${ik}$, sweep_info )
           itemp2 = int( work( 1_${ik}$ ),KIND=${ik}$)
           lworkreq = max( itemp1+2*nw**2_${ik}$, itemp2+2*nbr**2_${ik}$ )
           if ( lwork ==-1_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkreq,KIND=sp)
              return
           else if ( lwork < lworkreq ) then
              info = -19_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLAQZ0', info )
              return
           end if
           ! initialize q and z
           if( iwantq==3_${ik}$ ) call stdlib${ii}$_slaset( 'FULL', n, n, zero, one, q, ldq )
           if( iwantz==3_${ik}$ ) call stdlib${ii}$_slaset( 'FULL', n, n, zero, one, z, ldz )
           ! get machine constants
           safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' )
           safmax = one/safmin
           call stdlib${ii}$_slabad( safmin, safmax )
           ulp = stdlib${ii}$_slamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=sp)/ulp )
           istart = ilo
           istop = ihi
           maxit = 3_${ik}$*( ihi-ilo+1 )
           ld = 0_${ik}$
           do iiter = 1, maxit
              if( iiter >= maxit ) then
                 info = istop+1
                 goto 80
              end if
              if ( istart+1 >= istop ) then
                 istop = istart
                 exit
              end if
              ! check deflations at the end
              if ( abs( a( istop-1, istop-2 ) ) <= max( smlnum,ulp*( abs( a( istop-1, istop-1 ) )+&
                        abs( a( istop-2,istop-2 ) ) ) ) ) then
                 a( istop-1, istop-2 ) = zero
                 istop = istop-2
                 ld = 0_${ik}$
                 eshift = zero
              else if ( abs( a( istop, istop-1 ) ) <= max( smlnum,ulp*( abs( a( istop, istop ) )+&
                        abs( a( istop-1,istop-1 ) ) ) ) ) then
                 a( istop, istop-1 ) = zero
                 istop = istop-1
                 ld = 0_${ik}$
                 eshift = zero
              end if
              ! check deflations at the start
              if ( abs( a( istart+2, istart+1 ) ) <= max( smlnum,ulp*( abs( a( istart+1, istart+1 &
                        ) )+abs( a( istart+2,istart+2 ) ) ) ) ) then
                 a( istart+2, istart+1 ) = zero
                 istart = istart+2
                 ld = 0_${ik}$
                 eshift = zero
              else if ( abs( a( istart+1, istart ) ) <= max( smlnum,ulp*( abs( a( istart, istart )&
                         )+abs( a( istart+1,istart+1 ) ) ) ) ) then
                 a( istart+1, istart ) = zero
                 istart = istart+1
                 ld = 0_${ik}$
                 eshift = zero
              end if
              if ( istart+1 >= istop ) then
                 exit
              end if
              ! check interior deflations
              istart2 = istart
              do k = istop, istart+1, -1
                 if ( abs( a( k, k-1 ) ) <= max( smlnum, ulp*( abs( a( k,k ) )+abs( a( k-1, k-1 ) &
                           ) ) ) ) then
                    a( k, k-1 ) = zero
                    istart2 = k
                    exit
                 end if
              end do
              ! get range to apply rotations to
              if ( ilschur ) then
                 istartm = 1_${ik}$
                 istopm = n
              else
                 istartm = istart2
                 istopm = istop
              end if
              ! check infinite eigenvalues, this is done without blocking so might
              ! slow down the method when many infinite eigenvalues are present
              k = istop
              do while ( k>=istart2 )
                 temp = zero
                 if( k < istop ) then
                    temp = temp+abs( b( k, k+1 ) )
                 end if
                 if( k > istart2 ) then
                    temp = temp+abs( b( k-1, k ) )
                 end if
                 if( abs( b( k, k ) ) < max( smlnum, ulp*temp ) ) then
                    ! a diagonal element of b is negligable, move it
                    ! to the top and deflate it
                    do k2 = k, istart2+1, -1
                       call stdlib${ii}$_slartg( b( k2-1, k2 ), b( k2-1, k2-1 ), c1, s1,temp )
                       b( k2-1, k2 ) = temp
                       b( k2-1, k2-1 ) = zero
                       call stdlib${ii}$_srot( k2-2-istartm+1, b( istartm, k2 ), 1_${ik}$,b( istartm, k2-1 ), &
                                 1_${ik}$, c1, s1 )
                       call stdlib${ii}$_srot( min( k2+1, istop )-istartm+1, a( istartm,k2 ), 1_${ik}$, a( &
                                 istartm, k2-1 ), 1_${ik}$, c1, s1 )
                       if ( ilz ) then
                          call stdlib${ii}$_srot( n, z( 1_${ik}$, k2 ), 1_${ik}$, z( 1_${ik}$, k2-1 ), 1_${ik}$, c1,s1 )
                       end if
                       if( k2<istop ) then
                          call stdlib${ii}$_slartg( a( k2, k2-1 ), a( k2+1, k2-1 ), c1,s1, temp )
                                    
                          a( k2, k2-1 ) = temp
                          a( k2+1, k2-1 ) = zero
                          call stdlib${ii}$_srot( istopm-k2+1, a( k2, k2 ), lda, a( k2+1,k2 ), lda, c1, &
                                    s1 )
                          call stdlib${ii}$_srot( istopm-k2+1, b( k2, k2 ), ldb, b( k2+1,k2 ), ldb, c1, &
                                    s1 )
                          if( ilq ) then
                             call stdlib${ii}$_srot( n, q( 1_${ik}$, k2 ), 1_${ik}$, q( 1_${ik}$, k2+1 ), 1_${ik}$,c1, s1 )
                          end if
                       end if
                    end do
                    if( istart2<istop )then
                       call stdlib${ii}$_slartg( a( istart2, istart2 ), a( istart2+1,istart2 ), c1, s1, &
                                 temp )
                       a( istart2, istart2 ) = temp
                       a( istart2+1, istart2 ) = zero
                       call stdlib${ii}$_srot( istopm-( istart2+1 )+1_${ik}$, a( istart2,istart2+1 ), lda, a( &
                                 istart2+1,istart2+1 ), lda, c1, s1 )
                       call stdlib${ii}$_srot( istopm-( istart2+1 )+1_${ik}$, b( istart2,istart2+1 ), ldb, b( &
                                 istart2+1,istart2+1 ), ldb, c1, s1 )
                       if( ilq ) then
                          call stdlib${ii}$_srot( n, q( 1_${ik}$, istart2 ), 1_${ik}$, q( 1_${ik}$,istart2+1 ), 1_${ik}$, c1, s1 )
                                    
                       end if
                    end if
                    istart2 = istart2+1
                 end if
                 k = k-1
              end do
              ! istart2 now points to the top of the bottom right
              ! unreduced hessenberg block
              if ( istart2 >= istop ) then
                 istop = istart2-1
                 ld = 0_${ik}$
                 eshift = zero
                 cycle
              end if
              nw = nwr
              nshifts = nsr
              nblock = nbr
              if ( istop-istart2+1 < nmin ) then
                 ! setting nw to the size of the subblock will make aed deflate
                 ! all the eigenvalues. this is slightly more efficient than just
                 ! using qz_small because the off diagonal part gets updated via blas.
                 if ( istop-istart+1 < nmin ) then
                    nw = istop-istart+1
                    istart2 = istart
                 else
                    nw = istop-istart2+1
                 end if
              end if
              ! time for aed
              call stdlib${ii}$_slaqz3( ilschur, ilq, ilz, n, istart2, istop, nw, a, lda,b, ldb, q, ldq,&
               z, ldz, n_undeflated, n_deflated,alphar, alphai, beta, work, nw, work( nw**2_${ik}$+1 ),&
                         nw, work( 2_${ik}$*nw**2_${ik}$+1 ), lwork-2*nw**2_${ik}$, rec,aed_info )
              if ( n_deflated > 0_${ik}$ ) then
                 istop = istop-n_deflated
                 ld = 0_${ik}$
                 eshift = zero
              end if
              if ( 100_${ik}$*n_deflated > nibble*( n_deflated+n_undeflated ) .or.istop-istart2+1 < nmin &
                        ) then
                 ! aed has uncovered many eigenvalues. skip a qz sweep and run
                 ! aed again.
                 cycle
              end if
              ld = ld+1
              ns = min( nshifts, istop-istart2 )
              ns = min( ns, n_undeflated )
              shiftpos = istop-n_deflated-n_undeflated+1
              ! shuffle shifts to put double shifts in front
              ! this ensures that we don't split up a double shift
              do i = shiftpos, shiftpos+n_undeflated-1, 2
                 if( alphai( i )/=-alphai( i+1 ) ) then
                    swap = alphar( i )
                    alphar( i ) = alphar( i+1 )
                    alphar( i+1 ) = alphar( i+2 )
                    alphar( i+2 ) = swap
                    swap = alphai( i )
                    alphai( i ) = alphai( i+1 )
                    alphai( i+1 ) = alphai( i+2 )
                    alphai( i+2 ) = swap
                    swap = beta( i )
                    beta( i ) = beta( i+1 )
                    beta( i+1 ) = beta( i+2 )
                    beta( i+2 ) = swap
                 end if
              end do
              if ( mod( ld, 6_${ik}$ ) == 0_${ik}$ ) then
                 ! exceptional shift.  chosen for no particularly good reason.
                 if( ( real( maxit,KIND=sp)*safmin )*abs( a( istop,istop-1 ) )<abs( a( istop-1, &
                           istop-1 ) ) ) then
                    eshift = a( istop, istop-1 )/b( istop-1, istop-1 )
                 else
                    eshift = eshift+one/( safmin*real( maxit,KIND=sp) )
                 end if
                 alphar( shiftpos ) = one
                 alphar( shiftpos+1 ) = zero
                 alphai( shiftpos ) = zero
                 alphai( shiftpos+1 ) = zero
                 beta( shiftpos ) = eshift
                 beta( shiftpos+1 ) = eshift
                 ns = 2_${ik}$
              end if
              ! time for a qz sweep
              call stdlib${ii}$_slaqz4( ilschur, ilq, ilz, n, istart2, istop, ns, nblock,alphar( &
              shiftpos ), alphai( shiftpos ),beta( shiftpos ), a, lda, b, ldb, q, ldq, z, ldz,&
              work, nblock, work( nblock**2_${ik}$+1 ), nblock,work( 2_${ik}$*nblock**2_${ik}$+1 ), lwork-2*nblock**2_${ik}$,&
                        sweep_info )
           end do
           ! call stdlib${ii}$_shgeqz to normalize the eigenvalue blocks and set the eigenvalues
           ! if all the eigenvalues have been found, stdlib${ii}$_shgeqz will not do any iterations
           ! and only normalize the blocks. in case of a rare convergence failure,
           ! the single shift might perform better.
        80 continue 
           call stdlib${ii}$_shgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, &
                  beta, q, ldq, z, ldz, work, lwork,norm_info )
           info = norm_info
     end subroutine stdlib${ii}$_slaqz0

     recursive module subroutine stdlib${ii}$_dlaqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, alphar, &
     !! DLAQZ0 computes the eigenvalues of a real matrix pair (H,T),
     !! where H is an upper Hessenberg matrix and T is upper triangular,
     !! using the double-shift QZ method.
     !! Matrix pairs of this type are produced by the reduction to
     !! generalized upper Hessenberg form of a real matrix pair (A,B):
     !! A = Q1*H*Z1**T,  B = Q1*T*Z1**T,
     !! as computed by DGGHRD.
     !! If JOB='S', then the Hessenberg-triangular pair (H,T) is
     !! also reduced to generalized Schur form,
     !! H = Q*S*Z**T,  T = Q*P*Z**T,
     !! where Q and Z are orthogonal matrices, P is an upper triangular
     !! matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2
     !! diagonal blocks.
     !! The 1-by-1 blocks correspond to real eigenvalues of the matrix pair
     !! (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of
     !! eigenvalues.
     !! Additionally, the 2-by-2 upper triangular diagonal blocks of P
     !! corresponding to 2-by-2 blocks of S are reduced to positive diagonal
     !! form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0,
     !! P(j,j) > 0, and P(j+1,j+1) > 0.
     !! Optionally, the orthogonal matrix Q from the generalized Schur
     !! factorization may be postmultiplied into an input matrix Q1, and the
     !! orthogonal matrix Z may be postmultiplied into an input matrix Z1.
     !! If Q1 and Z1 are the orthogonal matrices from DGGHRD that reduced
     !! the matrix pair (A,B) to generalized upper Hessenberg form, then the
     !! output matrices Q1*Q and Z1*Z are the orthogonal factors from the
     !! generalized Schur factorization of (A,B):
     !! A = (Q1*Q)*S*(Z1*Z)**T,  B = (Q1*Q)*P*(Z1*Z)**T.
     !! To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently,
     !! of (A,B)) are computed as a pair of values (alpha,beta), where alpha is
     !! complex and beta real.
     !! If beta is nonzero, lambda = alpha / beta is an eigenvalue of the
     !! generalized nonsymmetric eigenvalue problem (GNEP)
     !! A*x = lambda*B*x
     !! and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the
     !! alternate form of the GNEP
     !! mu*A*y = B*y.
     !! Real eigenvalues can be read directly from the generalized Schur
     !! form:
     !! alpha = S(i,i), beta = P(i,i).
     !! Ref: C.B. Moler
     !! Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973),
     !! pp. 241--256.
     !! Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ
     !! Algorithm with Aggressive Early Deflation", SIAM J. Numer.
     !! Anal., 29(2006), pp. 199--227.
     !! Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift,
     !! multipole rational QZ method with agressive early deflation"
               alphai, beta,q, ldq, z, ldz, work, lwork, rec,info )
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           character, intent( in ) :: wants, wantq, wantz
           integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,rec
           integer(${ik}$), intent( out ) :: info
           real(dp), intent( inout ) :: a( lda, * ), b( ldb, * ),q( ldq, * ), z( ldz, * ), alphar(&
                      * ),alphai( * ), beta( * ), work( * )
           ! ================================================================
           ! local scalars
           real(dp) :: smlnum, ulp, eshift, safmin, safmax, c1, s1, temp, swap
           integer(${ik}$) :: istart, istop, iiter, maxit, istart2, k, ld, nshifts, nblock, nw, nmin,&
            nibble, n_undeflated, n_deflated, ns, sweep_info, shiftpos, lworkreq, k2, istartm, &
            istopm, iwants, iwantq, iwantz, norm_info, aed_info, nwr, nbr, nsr, itemp1, itemp2, &
                      rcost, i
           logical(lk) :: ilschur, ilq, ilz
           character(len=3_${ik}$) :: jbcmpz
           if( stdlib_lsame( wants, 'E' ) ) then
              ilschur = .false.
              iwants = 1_${ik}$
           else if( stdlib_lsame( wants, 'S' ) ) then
              ilschur = .true.
              iwants = 2_${ik}$
           else
              iwants = 0_${ik}$
           end if
           if( stdlib_lsame( wantq, 'N' ) ) then
              ilq = .false.
              iwantq = 1_${ik}$
           else if( stdlib_lsame( wantq, 'V' ) ) then
              ilq = .true.
              iwantq = 2_${ik}$
           else if( stdlib_lsame( wantq, 'I' ) ) then
              ilq = .true.
              iwantq = 3_${ik}$
           else
              iwantq = 0_${ik}$
           end if
           if( stdlib_lsame( wantz, 'N' ) ) then
              ilz = .false.
              iwantz = 1_${ik}$
           else if( stdlib_lsame( wantz, 'V' ) ) then
              ilz = .true.
              iwantz = 2_${ik}$
           else if( stdlib_lsame( wantz, 'I' ) ) then
              ilz = .true.
              iwantz = 3_${ik}$
           else
              iwantz = 0_${ik}$
           end if
           ! check argument values
           info = 0_${ik}$
           if( iwants==0_${ik}$ ) then
              info = -1_${ik}$
           else if( iwantq==0_${ik}$ ) then
              info = -2_${ik}$
           else if( iwantz==0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( ilo<1_${ik}$ ) then
              info = -5_${ik}$
           else if( ihi>n .or. ihi<ilo-1 ) then
              info = -6_${ik}$
           else if( lda<n ) then
              info = -8_${ik}$
           else if( ldb<n ) then
              info = -10_${ik}$
           else if( ldq<1_${ik}$ .or. ( ilq .and. ldq<n ) ) then
              info = -15_${ik}$
           else if( ldz<1_${ik}$ .or. ( ilz .and. ldz<n ) ) then
              info = -17_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLAQZ0', -info )
              return
           end if
           ! quick return if possible
           if( n<=0_${ik}$ ) then
              work( 1_${ik}$ ) = real( 1_${ik}$,KIND=dp)
              return
           end if
           ! get the parameters
           jbcmpz( 1_${ik}$:1_${ik}$ ) = wants
           jbcmpz( 2_${ik}$:2_${ik}$ ) = wantq
           jbcmpz( 3_${ik}$:3_${ik}$ ) = wantz
           nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nwr = max( 2_${ik}$, nwr )
           nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr )
           nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nsr = min( nsr, ( n+6 ) / 9_${ik}$, ihi-ilo )
           nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) )
           rcost = stdlib${ii}$_ilaenv( 17_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           itemp1 = int( nsr/sqrt( 1_${ik}$+2*nsr/( real( rcost,KIND=dp)/100_${ik}$*n ) ),KIND=${ik}$)
           itemp1 = ( ( itemp1-1 )/4_${ik}$ )*4_${ik}$+4
           nbr = nsr+itemp1
           if( n < nmin .or. rec >= 2_${ik}$ ) then
              call stdlib${ii}$_dhgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alphar, alphai,&
                         beta, q, ldq, z, ldz, work,lwork, info )
              return
           end if
           ! find out required workspace
           ! workspace query to stdlib${ii}$_dlaqz3
           nw = max( nwr, nmin )
           call stdlib${ii}$_dlaqz3( ilschur, ilq, ilz, n, ilo, ihi, nw, a, lda, b, ldb,q, ldq, z, ldz, &
           n_undeflated, n_deflated, alphar,alphai, beta, work, nw, work, nw, work, -1_${ik}$, rec,&
                     aed_info )
           itemp1 = int( work( 1_${ik}$ ),KIND=${ik}$)
           ! workspace query to stdlib${ii}$_dlaqz4
           call stdlib${ii}$_dlaqz4( ilschur, ilq, ilz, n, ilo, ihi, nsr, nbr, alphar,alphai, beta, a, &
                     lda, b, ldb, q, ldq, z, ldz, work,nbr, work, nbr, work, -1_${ik}$, sweep_info )
           itemp2 = int( work( 1_${ik}$ ),KIND=${ik}$)
           lworkreq = max( itemp1+2*nw**2_${ik}$, itemp2+2*nbr**2_${ik}$ )
           if ( lwork ==-1_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkreq,KIND=dp)
              return
           else if ( lwork < lworkreq ) then
              info = -19_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLAQZ0', info )
              return
           end if
           ! initialize q and z
           if( iwantq==3_${ik}$ ) call stdlib${ii}$_dlaset( 'FULL', n, n, zero, one, q, ldq )
           if( iwantz==3_${ik}$ ) call stdlib${ii}$_dlaset( 'FULL', n, n, zero, one, z, ldz )
           ! get machine constants
           safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' )
           safmax = one/safmin
           call stdlib${ii}$_dlabad( safmin, safmax )
           ulp = stdlib${ii}$_dlamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=dp)/ulp )
           istart = ilo
           istop = ihi
           maxit = 3_${ik}$*( ihi-ilo+1 )
           ld = 0_${ik}$
           do iiter = 1, maxit
              if( iiter >= maxit ) then
                 info = istop+1
                 goto 80
              end if
              if ( istart+1 >= istop ) then
                 istop = istart
                 exit
              end if
              ! check deflations at the end
              if ( abs( a( istop-1, istop-2 ) ) <= max( smlnum,ulp*( abs( a( istop-1, istop-1 ) )+&
                        abs( a( istop-2,istop-2 ) ) ) ) ) then
                 a( istop-1, istop-2 ) = zero
                 istop = istop-2
                 ld = 0_${ik}$
                 eshift = zero
              else if ( abs( a( istop, istop-1 ) ) <= max( smlnum,ulp*( abs( a( istop, istop ) )+&
                        abs( a( istop-1,istop-1 ) ) ) ) ) then
                 a( istop, istop-1 ) = zero
                 istop = istop-1
                 ld = 0_${ik}$
                 eshift = zero
              end if
              ! check deflations at the start
              if ( abs( a( istart+2, istart+1 ) ) <= max( smlnum,ulp*( abs( a( istart+1, istart+1 &
                        ) )+abs( a( istart+2,istart+2 ) ) ) ) ) then
                 a( istart+2, istart+1 ) = zero
                 istart = istart+2
                 ld = 0_${ik}$
                 eshift = zero
              else if ( abs( a( istart+1, istart ) ) <= max( smlnum,ulp*( abs( a( istart, istart )&
                         )+abs( a( istart+1,istart+1 ) ) ) ) ) then
                 a( istart+1, istart ) = zero
                 istart = istart+1
                 ld = 0_${ik}$
                 eshift = zero
              end if
              if ( istart+1 >= istop ) then
                 exit
              end if
              ! check interior deflations
              istart2 = istart
              do k = istop, istart+1, -1
                 if ( abs( a( k, k-1 ) ) <= max( smlnum, ulp*( abs( a( k,k ) )+abs( a( k-1, k-1 ) &
                           ) ) ) ) then
                    a( k, k-1 ) = zero
                    istart2 = k
                    exit
                 end if
              end do
              ! get range to apply rotations to
              if ( ilschur ) then
                 istartm = 1_${ik}$
                 istopm = n
              else
                 istartm = istart2
                 istopm = istop
              end if
              ! check infinite eigenvalues, this is done without blocking so might
              ! slow down the method when many infinite eigenvalues are present
              k = istop
              do while ( k>=istart2 )
                 temp = zero
                 if( k < istop ) then
                    temp = temp+abs( b( k, k+1 ) )
                 end if
                 if( k > istart2 ) then
                    temp = temp+abs( b( k-1, k ) )
                 end if
                 if( abs( b( k, k ) ) < max( smlnum, ulp*temp ) ) then
                    ! a diagonal element of b is negligable, move it
                    ! to the top and deflate it
                    do k2 = k, istart2+1, -1
                       call stdlib${ii}$_dlartg( b( k2-1, k2 ), b( k2-1, k2-1 ), c1, s1,temp )
                       b( k2-1, k2 ) = temp
                       b( k2-1, k2-1 ) = zero
                       call stdlib${ii}$_drot( k2-2-istartm+1, b( istartm, k2 ), 1_${ik}$,b( istartm, k2-1 ), &
                                 1_${ik}$, c1, s1 )
                       call stdlib${ii}$_drot( min( k2+1, istop )-istartm+1, a( istartm,k2 ), 1_${ik}$, a( &
                                 istartm, k2-1 ), 1_${ik}$, c1, s1 )
                       if ( ilz ) then
                          call stdlib${ii}$_drot( n, z( 1_${ik}$, k2 ), 1_${ik}$, z( 1_${ik}$, k2-1 ), 1_${ik}$, c1,s1 )
                       end if
                       if( k2<istop ) then
                          call stdlib${ii}$_dlartg( a( k2, k2-1 ), a( k2+1, k2-1 ), c1,s1, temp )
                                    
                          a( k2, k2-1 ) = temp
                          a( k2+1, k2-1 ) = zero
                          call stdlib${ii}$_drot( istopm-k2+1, a( k2, k2 ), lda, a( k2+1,k2 ), lda, c1, &
                                    s1 )
                          call stdlib${ii}$_drot( istopm-k2+1, b( k2, k2 ), ldb, b( k2+1,k2 ), ldb, c1, &
                                    s1 )
                          if( ilq ) then
                             call stdlib${ii}$_drot( n, q( 1_${ik}$, k2 ), 1_${ik}$, q( 1_${ik}$, k2+1 ), 1_${ik}$,c1, s1 )
                          end if
                       end if
                    end do
                    if( istart2<istop )then
                       call stdlib${ii}$_dlartg( a( istart2, istart2 ), a( istart2+1,istart2 ), c1, s1, &
                                 temp )
                       a( istart2, istart2 ) = temp
                       a( istart2+1, istart2 ) = zero
                       call stdlib${ii}$_drot( istopm-( istart2+1 )+1_${ik}$, a( istart2,istart2+1 ), lda, a( &
                                 istart2+1,istart2+1 ), lda, c1, s1 )
                       call stdlib${ii}$_drot( istopm-( istart2+1 )+1_${ik}$, b( istart2,istart2+1 ), ldb, b( &
                                 istart2+1,istart2+1 ), ldb, c1, s1 )
                       if( ilq ) then
                          call stdlib${ii}$_drot( n, q( 1_${ik}$, istart2 ), 1_${ik}$, q( 1_${ik}$,istart2+1 ), 1_${ik}$, c1, s1 )
                                    
                       end if
                    end if
                    istart2 = istart2+1
                 end if
                 k = k-1
              end do
              ! istart2 now points to the top of the bottom right
              ! unreduced hessenberg block
              if ( istart2 >= istop ) then
                 istop = istart2-1
                 ld = 0_${ik}$
                 eshift = zero
                 cycle
              end if
              nw = nwr
              nshifts = nsr
              nblock = nbr
              if ( istop-istart2+1 < nmin ) then
                 ! setting nw to the size of the subblock will make aed deflate
                 ! all the eigenvalues. this is slightly more efficient than just
                 ! using stdlib${ii}$_dhgeqz because the off diagonal part gets updated via blas.
                 if ( istop-istart+1 < nmin ) then
                    nw = istop-istart+1
                    istart2 = istart
                 else
                    nw = istop-istart2+1
                 end if
              end if
              ! time for aed
              call stdlib${ii}$_dlaqz3( ilschur, ilq, ilz, n, istart2, istop, nw, a, lda,b, ldb, q, ldq,&
               z, ldz, n_undeflated, n_deflated,alphar, alphai, beta, work, nw, work( nw**2_${ik}$+1 ),&
                         nw, work( 2_${ik}$*nw**2_${ik}$+1 ), lwork-2*nw**2_${ik}$, rec,aed_info )
              if ( n_deflated > 0_${ik}$ ) then
                 istop = istop-n_deflated
                 ld = 0_${ik}$
                 eshift = zero
              end if
              if ( 100_${ik}$*n_deflated > nibble*( n_deflated+n_undeflated ) .or.istop-istart2+1 < nmin &
                        ) then
                 ! aed has uncovered many eigenvalues. skip a qz sweep and run
                 ! aed again.
                 cycle
              end if
              ld = ld+1
              ns = min( nshifts, istop-istart2 )
              ns = min( ns, n_undeflated )
              shiftpos = istop-n_deflated-n_undeflated+1
              ! shuffle shifts to put double shifts in front
              ! this ensures that we don't split up a double shift
              do i = shiftpos, shiftpos+n_undeflated-1, 2
                 if( alphai( i )/=-alphai( i+1 ) ) then
                    swap = alphar( i )
                    alphar( i ) = alphar( i+1 )
                    alphar( i+1 ) = alphar( i+2 )
                    alphar( i+2 ) = swap
                    swap = alphai( i )
                    alphai( i ) = alphai( i+1 )
                    alphai( i+1 ) = alphai( i+2 )
                    alphai( i+2 ) = swap
                    swap = beta( i )
                    beta( i ) = beta( i+1 )
                    beta( i+1 ) = beta( i+2 )
                    beta( i+2 ) = swap
                 end if
              end do
              if ( mod( ld, 6_${ik}$ ) == 0_${ik}$ ) then
                 ! exceptional shift.  chosen for no particularly good reason.
                 if( ( real( maxit,KIND=dp)*safmin )*abs( a( istop,istop-1 ) )<abs( a( istop-1, &
                           istop-1 ) ) ) then
                    eshift = a( istop, istop-1 )/b( istop-1, istop-1 )
                 else
                    eshift = eshift+one/( safmin*real( maxit,KIND=dp) )
                 end if
                 alphar( shiftpos ) = one
                 alphar( shiftpos+1 ) = zero
                 alphai( shiftpos ) = zero
                 alphai( shiftpos+1 ) = zero
                 beta( shiftpos ) = eshift
                 beta( shiftpos+1 ) = eshift
                 ns = 2_${ik}$
              end if
              ! time for a qz sweep
              call stdlib${ii}$_dlaqz4( ilschur, ilq, ilz, n, istart2, istop, ns, nblock,alphar( &
              shiftpos ), alphai( shiftpos ),beta( shiftpos ), a, lda, b, ldb, q, ldq, z, ldz,&
              work, nblock, work( nblock**2_${ik}$+1 ), nblock,work( 2_${ik}$*nblock**2_${ik}$+1 ), lwork-2*nblock**2_${ik}$,&
                        sweep_info )
           end do
           ! call stdlib${ii}$_dhgeqz to normalize the eigenvalue blocks and set the eigenvalues
           ! if all the eigenvalues have been found, stdlib${ii}$_dhgeqz will not do any iterations
           ! and only normalize the blocks. in case of a rare convergence failure,
           ! the single shift might perform better.
        80 call stdlib${ii}$_dhgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, &
                  beta, q, ldq, z, ldz, work, lwork,norm_info )
           info = norm_info
     end subroutine stdlib${ii}$_dlaqz0

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     recursive module subroutine stdlib${ii}$_${ri}$laqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, alphar, &
     !! DLAQZ0: computes the eigenvalues of a real matrix pair (H,T),
     !! where H is an upper Hessenberg matrix and T is upper triangular,
     !! using the double-shift QZ method.
     !! Matrix pairs of this type are produced by the reduction to
     !! generalized upper Hessenberg form of a real matrix pair (A,B):
     !! A = Q1*H*Z1**T,  B = Q1*T*Z1**T,
     !! as computed by DGGHRD.
     !! If JOB='S', then the Hessenberg-triangular pair (H,T) is
     !! also reduced to generalized Schur form,
     !! H = Q*S*Z**T,  T = Q*P*Z**T,
     !! where Q and Z are orthogonal matrices, P is an upper triangular
     !! matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2
     !! diagonal blocks.
     !! The 1-by-1 blocks correspond to real eigenvalues of the matrix pair
     !! (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of
     !! eigenvalues.
     !! Additionally, the 2-by-2 upper triangular diagonal blocks of P
     !! corresponding to 2-by-2 blocks of S are reduced to positive diagonal
     !! form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0,
     !! P(j,j) > 0, and P(j+1,j+1) > 0.
     !! Optionally, the orthogonal matrix Q from the generalized Schur
     !! factorization may be postmultiplied into an input matrix Q1, and the
     !! orthogonal matrix Z may be postmultiplied into an input matrix Z1.
     !! If Q1 and Z1 are the orthogonal matrices from DGGHRD that reduced
     !! the matrix pair (A,B) to generalized upper Hessenberg form, then the
     !! output matrices Q1*Q and Z1*Z are the orthogonal factors from the
     !! generalized Schur factorization of (A,B):
     !! A = (Q1*Q)*S*(Z1*Z)**T,  B = (Q1*Q)*P*(Z1*Z)**T.
     !! To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently,
     !! of (A,B)) are computed as a pair of values (alpha,beta), where alpha is
     !! complex and beta real.
     !! If beta is nonzero, lambda = alpha / beta is an eigenvalue of the
     !! generalized nonsymmetric eigenvalue problem (GNEP)
     !! A*x = lambda*B*x
     !! and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the
     !! alternate form of the GNEP
     !! mu*A*y = B*y.
     !! Real eigenvalues can be read directly from the generalized Schur
     !! form:
     !! alpha = S(i,i), beta = P(i,i).
     !! Ref: C.B. Moler
     !! Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973),
     !! pp. 241--256.
     !! Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ
     !! Algorithm with Aggressive Early Deflation", SIAM J. Numer.
     !! Anal., 29(2006), pp. 199--227.
     !! Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift,
     !! multipole rational QZ method with agressive early deflation"
               alphai, beta,q, ldq, z, ldz, work, lwork, rec,info )
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           character, intent( in ) :: wants, wantq, wantz
           integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,rec
           integer(${ik}$), intent( out ) :: info
           real(${rk}$), intent( inout ) :: a( lda, * ), b( ldb, * ),q( ldq, * ), z( ldz, * ), alphar(&
                      * ),alphai( * ), beta( * ), work( * )
           ! ================================================================
           ! local scalars
           real(${rk}$) :: smlnum, ulp, eshift, safmin, safmax, c1, s1, temp, swap
           integer(${ik}$) :: istart, istop, iiter, maxit, istart2, k, ld, nshifts, nblock, nw, nmin,&
            nibble, n_undeflated, n_qeflated, ns, sweep_info, shiftpos, lworkreq, k2, istartm, &
            istopm, iwants, iwantq, iwantz, norm_info, aed_info, nwr, nbr, nsr, itemp1, itemp2, &
                      rcost, i
           logical(lk) :: ilschur, ilq, ilz
           character(len=3_${ik}$) :: jbcmpz
           if( stdlib_lsame( wants, 'E' ) ) then
              ilschur = .false.
              iwants = 1_${ik}$
           else if( stdlib_lsame( wants, 'S' ) ) then
              ilschur = .true.
              iwants = 2_${ik}$
           else
              iwants = 0_${ik}$
           end if
           if( stdlib_lsame( wantq, 'N' ) ) then
              ilq = .false.
              iwantq = 1_${ik}$
           else if( stdlib_lsame( wantq, 'V' ) ) then
              ilq = .true.
              iwantq = 2_${ik}$
           else if( stdlib_lsame( wantq, 'I' ) ) then
              ilq = .true.
              iwantq = 3_${ik}$
           else
              iwantq = 0_${ik}$
           end if
           if( stdlib_lsame( wantz, 'N' ) ) then
              ilz = .false.
              iwantz = 1_${ik}$
           else if( stdlib_lsame( wantz, 'V' ) ) then
              ilz = .true.
              iwantz = 2_${ik}$
           else if( stdlib_lsame( wantz, 'I' ) ) then
              ilz = .true.
              iwantz = 3_${ik}$
           else
              iwantz = 0_${ik}$
           end if
           ! check argument values
           info = 0_${ik}$
           if( iwants==0_${ik}$ ) then
              info = -1_${ik}$
           else if( iwantq==0_${ik}$ ) then
              info = -2_${ik}$
           else if( iwantz==0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( ilo<1_${ik}$ ) then
              info = -5_${ik}$
           else if( ihi>n .or. ihi<ilo-1 ) then
              info = -6_${ik}$
           else if( lda<n ) then
              info = -8_${ik}$
           else if( ldb<n ) then
              info = -10_${ik}$
           else if( ldq<1_${ik}$ .or. ( ilq .and. ldq<n ) ) then
              info = -15_${ik}$
           else if( ldz<1_${ik}$ .or. ( ilz .and. ldz<n ) ) then
              info = -17_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLAQZ0', -info )
              return
           end if
           ! quick return if possible
           if( n<=0_${ik}$ ) then
              work( 1_${ik}$ ) = real( 1_${ik}$,KIND=${rk}$)
              return
           end if
           ! get the parameters
           jbcmpz( 1_${ik}$:1_${ik}$ ) = wants
           jbcmpz( 2_${ik}$:2_${ik}$ ) = wantq
           jbcmpz( 3_${ik}$:3_${ik}$ ) = wantz
           nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nwr = max( 2_${ik}$, nwr )
           nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr )
           nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nsr = min( nsr, ( n+6 ) / 9_${ik}$, ihi-ilo )
           nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) )
           rcost = stdlib${ii}$_ilaenv( 17_${ik}$, 'DLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           itemp1 = int( nsr/sqrt( 1_${ik}$+2*nsr/( real( rcost,KIND=${rk}$)/100_${ik}$*n ) ),KIND=${ik}$)
           itemp1 = ( ( itemp1-1 )/4_${ik}$ )*4_${ik}$+4
           nbr = nsr+itemp1
           if( n < nmin .or. rec >= 2_${ik}$ ) then
              call stdlib${ii}$_${ri}$hgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alphar, alphai,&
                         beta, q, ldq, z, ldz, work,lwork, info )
              return
           end if
           ! find out required workspace
           ! workspace query to stdlib${ii}$_${ri}$laqz3
           nw = max( nwr, nmin )
           call stdlib${ii}$_${ri}$laqz3( ilschur, ilq, ilz, n, ilo, ihi, nw, a, lda, b, ldb,q, ldq, z, ldz, &
           n_undeflated, n_qeflated, alphar,alphai, beta, work, nw, work, nw, work, -1_${ik}$, rec,&
                     aed_info )
           itemp1 = int( work( 1_${ik}$ ),KIND=${ik}$)
           ! workspace query to stdlib${ii}$_${ri}$laqz4
           call stdlib${ii}$_${ri}$laqz4( ilschur, ilq, ilz, n, ilo, ihi, nsr, nbr, alphar,alphai, beta, a, &
                     lda, b, ldb, q, ldq, z, ldz, work,nbr, work, nbr, work, -1_${ik}$, sweep_info )
           itemp2 = int( work( 1_${ik}$ ),KIND=${ik}$)
           lworkreq = max( itemp1+2*nw**2_${ik}$, itemp2+2*nbr**2_${ik}$ )
           if ( lwork ==-1_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkreq,KIND=${rk}$)
              return
           else if ( lwork < lworkreq ) then
              info = -19_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLAQZ0', info )
              return
           end if
           ! initialize q and z
           if( iwantq==3_${ik}$ ) call stdlib${ii}$_${ri}$laset( 'FULL', n, n, zero, one, q, ldq )
           if( iwantz==3_${ik}$ ) call stdlib${ii}$_${ri}$laset( 'FULL', n, n, zero, one, z, ldz )
           ! get machine constants
           safmin = stdlib${ii}$_${ri}$lamch( 'SAFE MINIMUM' )
           safmax = one/safmin
           call stdlib${ii}$_${ri}$labad( safmin, safmax )
           ulp = stdlib${ii}$_${ri}$lamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=${rk}$)/ulp )
           istart = ilo
           istop = ihi
           maxit = 3_${ik}$*( ihi-ilo+1 )
           ld = 0_${ik}$
           do iiter = 1, maxit
              if( iiter >= maxit ) then
                 info = istop+1
                 goto 80
              end if
              if ( istart+1 >= istop ) then
                 istop = istart
                 exit
              end if
              ! check deflations at the end
              if ( abs( a( istop-1, istop-2 ) ) <= max( smlnum,ulp*( abs( a( istop-1, istop-1 ) )+&
                        abs( a( istop-2,istop-2 ) ) ) ) ) then
                 a( istop-1, istop-2 ) = zero
                 istop = istop-2
                 ld = 0_${ik}$
                 eshift = zero
              else if ( abs( a( istop, istop-1 ) ) <= max( smlnum,ulp*( abs( a( istop, istop ) )+&
                        abs( a( istop-1,istop-1 ) ) ) ) ) then
                 a( istop, istop-1 ) = zero
                 istop = istop-1
                 ld = 0_${ik}$
                 eshift = zero
              end if
              ! check deflations at the start
              if ( abs( a( istart+2, istart+1 ) ) <= max( smlnum,ulp*( abs( a( istart+1, istart+1 &
                        ) )+abs( a( istart+2,istart+2 ) ) ) ) ) then
                 a( istart+2, istart+1 ) = zero
                 istart = istart+2
                 ld = 0_${ik}$
                 eshift = zero
              else if ( abs( a( istart+1, istart ) ) <= max( smlnum,ulp*( abs( a( istart, istart )&
                         )+abs( a( istart+1,istart+1 ) ) ) ) ) then
                 a( istart+1, istart ) = zero
                 istart = istart+1
                 ld = 0_${ik}$
                 eshift = zero
              end if
              if ( istart+1 >= istop ) then
                 exit
              end if
              ! check interior deflations
              istart2 = istart
              do k = istop, istart+1, -1
                 if ( abs( a( k, k-1 ) ) <= max( smlnum, ulp*( abs( a( k,k ) )+abs( a( k-1, k-1 ) &
                           ) ) ) ) then
                    a( k, k-1 ) = zero
                    istart2 = k
                    exit
                 end if
              end do
              ! get range to apply rotations to
              if ( ilschur ) then
                 istartm = 1_${ik}$
                 istopm = n
              else
                 istartm = istart2
                 istopm = istop
              end if
              ! check infinite eigenvalues, this is done without blocking so might
              ! slow down the method when many infinite eigenvalues are present
              k = istop
              do while ( k>=istart2 )
                 temp = zero
                 if( k < istop ) then
                    temp = temp+abs( b( k, k+1 ) )
                 end if
                 if( k > istart2 ) then
                    temp = temp+abs( b( k-1, k ) )
                 end if
                 if( abs( b( k, k ) ) < max( smlnum, ulp*temp ) ) then
                    ! a diagonal element of b is negligable, move it
                    ! to the top and deflate it
                    do k2 = k, istart2+1, -1
                       call stdlib${ii}$_${ri}$lartg( b( k2-1, k2 ), b( k2-1, k2-1 ), c1, s1,temp )
                       b( k2-1, k2 ) = temp
                       b( k2-1, k2-1 ) = zero
                       call stdlib${ii}$_${ri}$rot( k2-2-istartm+1, b( istartm, k2 ), 1_${ik}$,b( istartm, k2-1 ), &
                                 1_${ik}$, c1, s1 )
                       call stdlib${ii}$_${ri}$rot( min( k2+1, istop )-istartm+1, a( istartm,k2 ), 1_${ik}$, a( &
                                 istartm, k2-1 ), 1_${ik}$, c1, s1 )
                       if ( ilz ) then
                          call stdlib${ii}$_${ri}$rot( n, z( 1_${ik}$, k2 ), 1_${ik}$, z( 1_${ik}$, k2-1 ), 1_${ik}$, c1,s1 )
                       end if
                       if( k2<istop ) then
                          call stdlib${ii}$_${ri}$lartg( a( k2, k2-1 ), a( k2+1, k2-1 ), c1,s1, temp )
                                    
                          a( k2, k2-1 ) = temp
                          a( k2+1, k2-1 ) = zero
                          call stdlib${ii}$_${ri}$rot( istopm-k2+1, a( k2, k2 ), lda, a( k2+1,k2 ), lda, c1, &
                                    s1 )
                          call stdlib${ii}$_${ri}$rot( istopm-k2+1, b( k2, k2 ), ldb, b( k2+1,k2 ), ldb, c1, &
                                    s1 )
                          if( ilq ) then
                             call stdlib${ii}$_${ri}$rot( n, q( 1_${ik}$, k2 ), 1_${ik}$, q( 1_${ik}$, k2+1 ), 1_${ik}$,c1, s1 )
                          end if
                       end if
                    end do
                    if( istart2<istop )then
                       call stdlib${ii}$_${ri}$lartg( a( istart2, istart2 ), a( istart2+1,istart2 ), c1, s1, &
                                 temp )
                       a( istart2, istart2 ) = temp
                       a( istart2+1, istart2 ) = zero
                       call stdlib${ii}$_${ri}$rot( istopm-( istart2+1 )+1_${ik}$, a( istart2,istart2+1 ), lda, a( &
                                 istart2+1,istart2+1 ), lda, c1, s1 )
                       call stdlib${ii}$_${ri}$rot( istopm-( istart2+1 )+1_${ik}$, b( istart2,istart2+1 ), ldb, b( &
                                 istart2+1,istart2+1 ), ldb, c1, s1 )
                       if( ilq ) then
                          call stdlib${ii}$_${ri}$rot( n, q( 1_${ik}$, istart2 ), 1_${ik}$, q( 1_${ik}$,istart2+1 ), 1_${ik}$, c1, s1 )
                                    
                       end if
                    end if
                    istart2 = istart2+1
                 end if
                 k = k-1
              end do
              ! istart2 now points to the top of the bottom right
              ! unreduced hessenberg block
              if ( istart2 >= istop ) then
                 istop = istart2-1
                 ld = 0_${ik}$
                 eshift = zero
                 cycle
              end if
              nw = nwr
              nshifts = nsr
              nblock = nbr
              if ( istop-istart2+1 < nmin ) then
                 ! setting nw to the size of the subblock will make aed deflate
                 ! all the eigenvalues. this is slightly more efficient than just
                 ! using stdlib${ii}$_${ri}$hgeqz because the off diagonal part gets updated via blas.
                 if ( istop-istart+1 < nmin ) then
                    nw = istop-istart+1
                    istart2 = istart
                 else
                    nw = istop-istart2+1
                 end if
              end if
              ! time for aed
              call stdlib${ii}$_${ri}$laqz3( ilschur, ilq, ilz, n, istart2, istop, nw, a, lda,b, ldb, q, ldq,&
               z, ldz, n_undeflated, n_qeflated,alphar, alphai, beta, work, nw, work( nw**2_${ik}$+1 ),&
                         nw, work( 2_${ik}$*nw**2_${ik}$+1 ), lwork-2*nw**2_${ik}$, rec,aed_info )
              if ( n_qeflated > 0_${ik}$ ) then
                 istop = istop-n_qeflated
                 ld = 0_${ik}$
                 eshift = zero
              end if
              if ( 100_${ik}$*n_qeflated > nibble*( n_qeflated+n_undeflated ) .or.istop-istart2+1 < nmin &
                        ) then
                 ! aed has uncovered many eigenvalues. skip a qz sweep and run
                 ! aed again.
                 cycle
              end if
              ld = ld+1
              ns = min( nshifts, istop-istart2 )
              ns = min( ns, n_undeflated )
              shiftpos = istop-n_qeflated-n_undeflated+1
              ! shuffle shifts to put double shifts in front
              ! this ensures that we don't split up a double shift
              do i = shiftpos, shiftpos+n_undeflated-1, 2
                 if( alphai( i )/=-alphai( i+1 ) ) then
                    swap = alphar( i )
                    alphar( i ) = alphar( i+1 )
                    alphar( i+1 ) = alphar( i+2 )
                    alphar( i+2 ) = swap
                    swap = alphai( i )
                    alphai( i ) = alphai( i+1 )
                    alphai( i+1 ) = alphai( i+2 )
                    alphai( i+2 ) = swap
                    swap = beta( i )
                    beta( i ) = beta( i+1 )
                    beta( i+1 ) = beta( i+2 )
                    beta( i+2 ) = swap
                 end if
              end do
              if ( mod( ld, 6_${ik}$ ) == 0_${ik}$ ) then
                 ! exceptional shift.  chosen for no particularly good reason.
                 if( ( real( maxit,KIND=${rk}$)*safmin )*abs( a( istop,istop-1 ) )<abs( a( istop-1, &
                           istop-1 ) ) ) then
                    eshift = a( istop, istop-1 )/b( istop-1, istop-1 )
                 else
                    eshift = eshift+one/( safmin*real( maxit,KIND=${rk}$) )
                 end if
                 alphar( shiftpos ) = one
                 alphar( shiftpos+1 ) = zero
                 alphai( shiftpos ) = zero
                 alphai( shiftpos+1 ) = zero
                 beta( shiftpos ) = eshift
                 beta( shiftpos+1 ) = eshift
                 ns = 2_${ik}$
              end if
              ! time for a qz sweep
              call stdlib${ii}$_${ri}$laqz4( ilschur, ilq, ilz, n, istart2, istop, ns, nblock,alphar( &
              shiftpos ), alphai( shiftpos ),beta( shiftpos ), a, lda, b, ldb, q, ldq, z, ldz,&
              work, nblock, work( nblock**2_${ik}$+1 ), nblock,work( 2_${ik}$*nblock**2_${ik}$+1 ), lwork-2*nblock**2_${ik}$,&
                        sweep_info )
           end do
           ! call stdlib${ii}$_${ri}$hgeqz to normalize the eigenvalue blocks and set the eigenvalues
           ! if all the eigenvalues have been found, stdlib${ii}$_${ri}$hgeqz will not do any iterations
           ! and only normalize the blocks. in case of a rare convergence failure,
           ! the single shift might perform better.
        80 continue
           call stdlib${ii}$_${ri}$hgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alphar, alphai, &
                  beta, q, ldq, z, ldz, work, lwork,norm_info )
           info = norm_info
     end subroutine stdlib${ii}$_${ri}$laqz0

#:endif
#:endfor

     recursive module subroutine stdlib${ii}$_claqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, alpha, &
     !! CLAQZ0 computes the eigenvalues of a matrix pair (H,T),
     !! where H is an upper Hessenberg matrix and T is upper triangular,
     !! using the double-shift QZ method.
     !! Matrix pairs of this type are produced by the reduction to
     !! generalized upper Hessenberg form of a matrix pair (A,B):
     !! A = Q1*H*Z1**H,  B = Q1*T*Z1**H,
     !! as computed by CGGHRD.
     !! If JOB='S', then the Hessenberg-triangular pair (H,T) is
     !! also reduced to generalized Schur form,
     !! H = Q*S*Z**H,  T = Q*P*Z**H,
     !! where Q and Z are unitary matrices, P and S are an upper triangular
     !! matrices.
     !! Optionally, the unitary matrix Q from the generalized Schur
     !! factorization may be postmultiplied into an input matrix Q1, and the
     !! unitary matrix Z may be postmultiplied into an input matrix Z1.
     !! If Q1 and Z1 are the unitary matrices from CGGHRD that reduced
     !! the matrix pair (A,B) to generalized upper Hessenberg form, then the
     !! output matrices Q1*Q and Z1*Z are the unitary factors from the
     !! generalized Schur factorization of (A,B):
     !! A = (Q1*Q)*S*(Z1*Z)**H,  B = (Q1*Q)*P*(Z1*Z)**H.
     !! To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently,
     !! of (A,B)) are computed as a pair of values (alpha,beta), where alpha is
     !! complex and beta real.
     !! If beta is nonzero, lambda = alpha / beta is an eigenvalue of the
     !! generalized nonsymmetric eigenvalue problem (GNEP)
     !! A*x = lambda*B*x
     !! and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the
     !! alternate form of the GNEP
     !! mu*A*y = B*y.
     !! Eigenvalues can be read directly from the generalized Schur
     !! form:
     !! alpha = S(i,i), beta = P(i,i).
     !! Ref: C.B. Moler
     !! Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973),
     !! pp. 241--256.
     !! Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ
     !! Algorithm with Aggressive Early Deflation", SIAM J. Numer.
     !! Anal., 29(2006), pp. 199--227.
     !! Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift,
     !! multipole rational QZ method with agressive early deflation"
               beta, q, ldq, z,ldz, work, lwork, rwork, rec,info )
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           character, intent( in ) :: wants, wantq, wantz
           integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,rec
           integer(${ik}$), intent( out ) :: info
           complex(sp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq, * ),z( ldz, * ), &
                     alpha( * ), beta( * ), work( * )
           real(sp), intent( out ) :: rwork( * )
           
           ! ================================================================
           ! local scalars
           real(sp) :: smlnum, ulp, safmin, safmax, c1, tempr
           complex(sp) :: eshift, s1, temp
           integer(${ik}$) :: istart, istop, iiter, maxit, istart2, k, ld, nshifts, nblock, nw, nmin,&
            nibble, n_undeflated, n_deflated, ns, sweep_info, shiftpos, lworkreq, k2, istartm, &
            istopm, iwants, iwantq, iwantz, norm_info, aed_info, nwr, nbr, nsr, itemp1, itemp2, &
                      rcost
           logical(lk) :: ilschur, ilq, ilz
           character(len=3) :: jbcmpz
           if( stdlib_lsame( wants, 'E' ) ) then
              ilschur = .false.
              iwants = 1_${ik}$
           else if( stdlib_lsame( wants, 'S' ) ) then
              ilschur = .true.
              iwants = 2_${ik}$
           else
              iwants = 0_${ik}$
           end if
           if( stdlib_lsame( wantq, 'N' ) ) then
              ilq = .false.
              iwantq = 1_${ik}$
           else if( stdlib_lsame( wantq, 'V' ) ) then
              ilq = .true.
              iwantq = 2_${ik}$
           else if( stdlib_lsame( wantq, 'I' ) ) then
              ilq = .true.
              iwantq = 3_${ik}$
           else
              iwantq = 0_${ik}$
           end if
           if( stdlib_lsame( wantz, 'N' ) ) then
              ilz = .false.
              iwantz = 1_${ik}$
           else if( stdlib_lsame( wantz, 'V' ) ) then
              ilz = .true.
              iwantz = 2_${ik}$
           else if( stdlib_lsame( wantz, 'I' ) ) then
              ilz = .true.
              iwantz = 3_${ik}$
           else
              iwantz = 0_${ik}$
           end if
           ! check argument values
           info = 0_${ik}$
           if( iwants==0_${ik}$ ) then
              info = -1_${ik}$
           else if( iwantq==0_${ik}$ ) then
              info = -2_${ik}$
           else if( iwantz==0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( ilo<1_${ik}$ ) then
              info = -5_${ik}$
           else if( ihi>n .or. ihi<ilo-1 ) then
              info = -6_${ik}$
           else if( lda<n ) then
              info = -8_${ik}$
           else if( ldb<n ) then
              info = -10_${ik}$
           else if( ldq<1_${ik}$ .or. ( ilq .and. ldq<n ) ) then
              info = -15_${ik}$
           else if( ldz<1_${ik}$ .or. ( ilz .and. ldz<n ) ) then
              info = -17_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CLAQZ0', -info )
              return
           end if
           ! quick return if possible
           if( n<=0_${ik}$ ) then
              work( 1_${ik}$ ) = real( 1_${ik}$,KIND=sp)
              return
           end if
           ! get the parameters
           jbcmpz( 1_${ik}$:1_${ik}$ ) = wants
           jbcmpz( 2_${ik}$:2_${ik}$ ) = wantq
           jbcmpz( 3_${ik}$:3_${ik}$ ) = wantz
           nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'CLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'CLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nwr = max( 2_${ik}$, nwr )
           nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr )
           nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'CLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'CLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nsr = min( nsr, ( n+6 ) / 9_${ik}$, ihi-ilo )
           nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) )
           rcost = stdlib${ii}$_ilaenv( 17_${ik}$, 'CLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           itemp1 = int( nsr/sqrt( 1_${ik}$+2*nsr/( real( rcost,KIND=sp)/100_${ik}$*n ) ),KIND=${ik}$)
           itemp1 = ( ( itemp1-1 )/4_${ik}$ )*4_${ik}$+4
           nbr = nsr+itemp1
           if( n < nmin .or. rec >= 2_${ik}$ ) then
              call stdlib${ii}$_chgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alpha, beta, q,&
                         ldq, z, ldz, work, lwork, rwork,info )
              return
           end if
           ! find out required workspace
           ! workspace query to stdlib${ii}$_claqz2
           nw = max( nwr, nmin )
           call stdlib${ii}$_claqz2( ilschur, ilq, ilz, n, ilo, ihi, nw, a, lda, b, ldb,q, ldq, z, ldz, &
           n_undeflated, n_deflated, alpha,beta, work, nw, work, nw, work, -1_${ik}$, rwork, rec,&
                     aed_info )
           itemp1 = int( work( 1_${ik}$ ),KIND=${ik}$)
           ! workspace query to stdlib${ii}$_claqz3
           call stdlib${ii}$_claqz3( ilschur, ilq, ilz, n, ilo, ihi, nsr, nbr, alpha,beta, a, lda, b, &
                     ldb, q, ldq, z, ldz, work, nbr,work, nbr, work, -1_${ik}$, sweep_info )
           itemp2 = int( work( 1_${ik}$ ),KIND=${ik}$)
           lworkreq = max( itemp1+2*nw**2_${ik}$, itemp2+2*nbr**2_${ik}$ )
           if ( lwork ==-1_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkreq,KIND=sp)
              return
           else if ( lwork < lworkreq ) then
              info = -19_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CLAQZ0', info )
              return
           end if
           ! initialize q and z
           if( iwantq==3_${ik}$ ) call stdlib${ii}$_claset( 'FULL', n, n, czero, cone, q,ldq )
           if( iwantz==3_${ik}$ ) call stdlib${ii}$_claset( 'FULL', n, n, czero, cone, z,ldz )
           ! get machine constants
           safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' )
           safmax = one/safmin
           call stdlib${ii}$_slabad( safmin, safmax )
           ulp = stdlib${ii}$_slamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=sp)/ulp )
           istart = ilo
           istop = ihi
           maxit = 30_${ik}$*( ihi-ilo+1 )
           ld = 0_${ik}$
           do iiter = 1, maxit
              if( iiter >= maxit ) then
                 info = istop+1
                 goto 80
              end if
              if ( istart+1 >= istop ) then
                 istop = istart
                 exit
              end if
              ! check deflations at the end
              if ( abs( a( istop, istop-1 ) ) <= max( smlnum,ulp*( abs( a( istop, istop ) )+abs( &
                        a( istop-1,istop-1 ) ) ) ) ) then
                 a( istop, istop-1 ) = czero
                 istop = istop-1
                 ld = 0_${ik}$
                 eshift = czero
              end if
              ! check deflations at the start
              if ( abs( a( istart+1, istart ) ) <= max( smlnum,ulp*( abs( a( istart, istart ) )+&
                        abs( a( istart+1,istart+1 ) ) ) ) ) then
                 a( istart+1, istart ) = czero
                 istart = istart+1
                 ld = 0_${ik}$
                 eshift = czero
              end if
              if ( istart+1 >= istop ) then
                 exit
              end if
              ! check interior deflations
              istart2 = istart
              do k = istop, istart+1, -1
                 if ( abs( a( k, k-1 ) ) <= max( smlnum, ulp*( abs( a( k,k ) )+abs( a( k-1, k-1 ) &
                           ) ) ) ) then
                    a( k, k-1 ) = czero
                    istart2 = k
                    exit
                 end if
              end do
              ! get range to apply rotations to
              if ( ilschur ) then
                 istartm = 1_${ik}$
                 istopm = n
              else
                 istartm = istart2
                 istopm = istop
              end if
              ! check infinite eigenvalues, this is done without blocking so might
              ! slow down the method when many infinite eigenvalues are present
              k = istop
              do while ( k>=istart2 )
                 tempr = zero
                 if( k < istop ) then
                    tempr = tempr+abs( b( k, k+1 ) )
                 end if
                 if( k > istart2 ) then
                    tempr = tempr+abs( b( k-1, k ) )
                 end if
                 if( abs( b( k, k ) ) < max( smlnum, ulp*tempr ) ) then
                    ! a diagonal element of b is negligable, move it
                    ! to the top and deflate it
                    do k2 = k, istart2+1, -1
                       call stdlib${ii}$_clartg( b( k2-1, k2 ), b( k2-1, k2-1 ), c1, s1,temp )
                       b( k2-1, k2 ) = temp
                       b( k2-1, k2-1 ) = czero
                       call stdlib${ii}$_crot( k2-2-istartm+1, b( istartm, k2 ), 1_${ik}$,b( istartm, k2-1 ), &
                                 1_${ik}$, c1, s1 )
                       call stdlib${ii}$_crot( min( k2+1, istop )-istartm+1, a( istartm,k2 ), 1_${ik}$, a( &
                                 istartm, k2-1 ), 1_${ik}$, c1, s1 )
                       if ( ilz ) then
                          call stdlib${ii}$_crot( n, z( 1_${ik}$, k2 ), 1_${ik}$, z( 1_${ik}$, k2-1 ), 1_${ik}$, c1,s1 )
                       end if
                       if( k2<istop ) then
                          call stdlib${ii}$_clartg( a( k2, k2-1 ), a( k2+1, k2-1 ), c1,s1, temp )
                                    
                          a( k2, k2-1 ) = temp
                          a( k2+1, k2-1 ) = czero
                          call stdlib${ii}$_crot( istopm-k2+1, a( k2, k2 ), lda, a( k2+1,k2 ), lda, c1, &
                                    s1 )
                          call stdlib${ii}$_crot( istopm-k2+1, b( k2, k2 ), ldb, b( k2+1,k2 ), ldb, c1, &
                                    s1 )
                          if( ilq ) then
                             call stdlib${ii}$_crot( n, q( 1_${ik}$, k2 ), 1_${ik}$, q( 1_${ik}$, k2+1 ), 1_${ik}$,c1, conjg( s1 ) )
                                       
                          end if
                       end if
                    end do
                    if( istart2<istop )then
                       call stdlib${ii}$_clartg( a( istart2, istart2 ), a( istart2+1,istart2 ), c1, s1, &
                                 temp )
                       a( istart2, istart2 ) = temp
                       a( istart2+1, istart2 ) = czero
                       call stdlib${ii}$_crot( istopm-( istart2+1 )+1_${ik}$, a( istart2,istart2+1 ), lda, a( &
                                 istart2+1,istart2+1 ), lda, c1, s1 )
                       call stdlib${ii}$_crot( istopm-( istart2+1 )+1_${ik}$, b( istart2,istart2+1 ), ldb, b( &
                                 istart2+1,istart2+1 ), ldb, c1, s1 )
                       if( ilq ) then
                          call stdlib${ii}$_crot( n, q( 1_${ik}$, istart2 ), 1_${ik}$, q( 1_${ik}$,istart2+1 ), 1_${ik}$, c1, conjg(&
                                     s1 ) )
                       end if
                    end if
                    istart2 = istart2+1
                 end if
                 k = k-1
              end do
              ! istart2 now points to the top of the bottom right
              ! unreduced hessenberg block
              if ( istart2 >= istop ) then
                 istop = istart2-1
                 ld = 0_${ik}$
                 eshift = czero
                 cycle
              end if
              nw = nwr
              nshifts = nsr
              nblock = nbr
              if ( istop-istart2+1 < nmin ) then
                 ! setting nw to the size of the subblock will make aed deflate
                 ! all the eigenvalues. this is slightly more efficient than just
                 ! using stdlib${ii}$_chgeqz because the off diagonal part gets updated via blas.
                 if ( istop-istart+1 < nmin ) then
                    nw = istop-istart+1
                    istart2 = istart
                 else
                    nw = istop-istart2+1
                 end if
              end if
              ! time for aed
              call stdlib${ii}$_claqz2( ilschur, ilq, ilz, n, istart2, istop, nw, a, lda,b, ldb, q, ldq,&
               z, ldz, n_undeflated, n_deflated,alpha, beta, work, nw, work( nw**2_${ik}$+1 ), nw,work( &
                         2_${ik}$*nw**2_${ik}$+1 ), lwork-2*nw**2_${ik}$, rwork, rec,aed_info )
              if ( n_deflated > 0_${ik}$ ) then
                 istop = istop-n_deflated
                 ld = 0_${ik}$
                 eshift = czero
              end if
              if ( 100_${ik}$*n_deflated > nibble*( n_deflated+n_undeflated ) .or.istop-istart2+1 < nmin &
                        ) then
                 ! aed has uncovered many eigenvalues. skip a qz sweep and run
                 ! aed again.
                 cycle
              end if
              ld = ld+1
              ns = min( nshifts, istop-istart2 )
              ns = min( ns, n_undeflated )
              shiftpos = istop-n_deflated-n_undeflated+1
              if ( mod( ld, 6_${ik}$ ) == 0_${ik}$ ) then
                 ! exceptional shift.  chosen for no particularly good reason.
                 if( ( real( maxit,KIND=sp)*safmin )*abs( a( istop,istop-1 ) )<abs( a( istop-1, &
                           istop-1 ) ) ) then
                    eshift = a( istop, istop-1 )/b( istop-1, istop-1 )
                 else
                    eshift = eshift+cone/( safmin*real( maxit,KIND=sp) )
                 end if
                 alpha( shiftpos ) = cone
                 beta( shiftpos ) = eshift
                 ns = 1_${ik}$
              end if
              ! time for a qz sweep
              call stdlib${ii}$_claqz3( ilschur, ilq, ilz, n, istart2, istop, ns, nblock,alpha( &
              shiftpos ), beta( shiftpos ), a, lda, b,ldb, q, ldq, z, ldz, work, nblock, work( &
                        nblock**2_${ik}$+1 ), nblock, work( 2_${ik}$*nblock**2_${ik}$+1 ),lwork-2*nblock**2_${ik}$, sweep_info )
           end do
           ! call stdlib${ii}$_chgeqz to normalize the eigenvalue blocks and set the eigenvalues
           ! if all the eigenvalues have been found, stdlib${ii}$_chgeqz will not do any iterations
           ! and only normalize the blocks. in case of a rare convergence failure,
           ! the single shift might perform better.
        80 call stdlib${ii}$_chgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alpha, beta, q, &
                  ldq, z, ldz, work, lwork, rwork,norm_info )
           info = norm_info
     end subroutine stdlib${ii}$_claqz0

     recursive module subroutine stdlib${ii}$_zlaqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, alpha, &
     !! ZLAQZ0 computes the eigenvalues of a real matrix pair (H,T),
     !! where H is an upper Hessenberg matrix and T is upper triangular,
     !! using the double-shift QZ method.
     !! Matrix pairs of this type are produced by the reduction to
     !! generalized upper Hessenberg form of a real matrix pair (A,B):
     !! A = Q1*H*Z1**H,  B = Q1*T*Z1**H,
     !! as computed by ZGGHRD.
     !! If JOB='S', then the Hessenberg-triangular pair (H,T) is
     !! also reduced to generalized Schur form,
     !! H = Q*S*Z**H,  T = Q*P*Z**H,
     !! where Q and Z are unitary matrices, P and S are an upper triangular
     !! matrices.
     !! Optionally, the unitary matrix Q from the generalized Schur
     !! factorization may be postmultiplied into an input matrix Q1, and the
     !! unitary matrix Z may be postmultiplied into an input matrix Z1.
     !! If Q1 and Z1 are the unitary matrices from ZGGHRD that reduced
     !! the matrix pair (A,B) to generalized upper Hessenberg form, then the
     !! output matrices Q1*Q and Z1*Z are the unitary factors from the
     !! generalized Schur factorization of (A,B):
     !! A = (Q1*Q)*S*(Z1*Z)**H,  B = (Q1*Q)*P*(Z1*Z)**H.
     !! To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently,
     !! of (A,B)) are computed as a pair of values (alpha,beta), where alpha is
     !! complex and beta real.
     !! If beta is nonzero, lambda = alpha / beta is an eigenvalue of the
     !! generalized nonsymmetric eigenvalue problem (GNEP)
     !! A*x = lambda*B*x
     !! and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the
     !! alternate form of the GNEP
     !! mu*A*y = B*y.
     !! Eigenvalues can be read directly from the generalized Schur
     !! form:
     !! alpha = S(i,i), beta = P(i,i).
     !! Ref: C.B. Moler
     !! Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973),
     !! pp. 241--256.
     !! Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ
     !! Algorithm with Aggressive Early Deflation", SIAM J. Numer.
     !! Anal., 29(2006), pp. 199--227.
     !! Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift,
     !! multipole rational QZ method with agressive early deflation"
               beta, q, ldq, z,ldz, work, lwork, rwork, rec,info )
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           character, intent( in ) :: wants, wantq, wantz
           integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,rec
           integer(${ik}$), intent( out ) :: info
           complex(dp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq,* ), z( ldz, * ), &
                     alpha( * ), beta( * ), work( * )
           real(dp), intent( out ) :: rwork( * )
           
           ! ================================================================
           ! local scalars
           real(dp) :: smlnum, ulp, safmin, safmax, c1, tempr
           complex(dp) :: eshift, s1, temp
           integer(${ik}$) :: istart, istop, iiter, maxit, istart2, k, ld, nshifts, nblock, nw, nmin,&
            nibble, n_undeflated, n_deflated, ns, sweep_info, shiftpos, lworkreq, k2, istartm, &
            istopm, iwants, iwantq, iwantz, norm_info, aed_info, nwr, nbr, nsr, itemp1, itemp2, &
                      rcost
           logical(lk) :: ilschur, ilq, ilz
           character(len=3) :: jbcmpz
           if( stdlib_lsame( wants, 'E' ) ) then
              ilschur = .false.
              iwants = 1_${ik}$
           else if( stdlib_lsame( wants, 'S' ) ) then
              ilschur = .true.
              iwants = 2_${ik}$
           else
              iwants = 0_${ik}$
           end if
           if( stdlib_lsame( wantq, 'N' ) ) then
              ilq = .false.
              iwantq = 1_${ik}$
           else if( stdlib_lsame( wantq, 'V' ) ) then
              ilq = .true.
              iwantq = 2_${ik}$
           else if( stdlib_lsame( wantq, 'I' ) ) then
              ilq = .true.
              iwantq = 3_${ik}$
           else
              iwantq = 0_${ik}$
           end if
           if( stdlib_lsame( wantz, 'N' ) ) then
              ilz = .false.
              iwantz = 1_${ik}$
           else if( stdlib_lsame( wantz, 'V' ) ) then
              ilz = .true.
              iwantz = 2_${ik}$
           else if( stdlib_lsame( wantz, 'I' ) ) then
              ilz = .true.
              iwantz = 3_${ik}$
           else
              iwantz = 0_${ik}$
           end if
           ! check argument values
           info = 0_${ik}$
           if( iwants==0_${ik}$ ) then
              info = -1_${ik}$
           else if( iwantq==0_${ik}$ ) then
              info = -2_${ik}$
           else if( iwantz==0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( ilo<1_${ik}$ ) then
              info = -5_${ik}$
           else if( ihi>n .or. ihi<ilo-1 ) then
              info = -6_${ik}$
           else if( lda<n ) then
              info = -8_${ik}$
           else if( ldb<n ) then
              info = -10_${ik}$
           else if( ldq<1_${ik}$ .or. ( ilq .and. ldq<n ) ) then
              info = -15_${ik}$
           else if( ldz<1_${ik}$ .or. ( ilz .and. ldz<n ) ) then
              info = -17_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZLAQZ0', -info )
              return
           end if
           ! quick return if possible
           if( n<=0_${ik}$ ) then
              work( 1_${ik}$ ) = real( 1_${ik}$,KIND=dp)
              return
           end if
           ! get the parameters
           jbcmpz( 1_${ik}$:1_${ik}$ ) = wants
           jbcmpz( 2_${ik}$:2_${ik}$ ) = wantq
           jbcmpz( 3_${ik}$:3_${ik}$ ) = wantz
           nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nwr = max( 2_${ik}$, nwr )
           nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr )
           nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nsr = min( nsr, ( n+6 ) / 9_${ik}$, ihi-ilo )
           nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) )
           rcost = stdlib${ii}$_ilaenv( 17_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           itemp1 = int( nsr/sqrt( 1_${ik}$+2*nsr/( real( rcost,KIND=dp)/100_${ik}$*n ) ),KIND=${ik}$)
           itemp1 = ( ( itemp1-1 )/4_${ik}$ )*4_${ik}$+4
           nbr = nsr+itemp1
           if( n < nmin .or. rec >= 2_${ik}$ ) then
              call stdlib${ii}$_zhgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alpha, beta, q,&
                         ldq, z, ldz, work, lwork, rwork,info )
              return
           end if
           ! find out required workspace
           ! workspace query to stdlib${ii}$_zlaqz2
           nw = max( nwr, nmin )
           call stdlib${ii}$_zlaqz2( ilschur, ilq, ilz, n, ilo, ihi, nw, a, lda, b, ldb,q, ldq, z, ldz, &
           n_undeflated, n_deflated, alpha,beta, work, nw, work, nw, work, -1_${ik}$, rwork, rec,&
                     aed_info )
           itemp1 = int( work( 1_${ik}$ ),KIND=${ik}$)
           ! workspace query to stdlib${ii}$_zlaqz3
           call stdlib${ii}$_zlaqz3( ilschur, ilq, ilz, n, ilo, ihi, nsr, nbr, alpha,beta, a, lda, b, &
                     ldb, q, ldq, z, ldz, work, nbr,work, nbr, work, -1_${ik}$, sweep_info )
           itemp2 = int( work( 1_${ik}$ ),KIND=${ik}$)
           lworkreq = max( itemp1+2*nw**2_${ik}$, itemp2+2*nbr**2_${ik}$ )
           if ( lwork ==-1_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkreq,KIND=dp)
              return
           else if ( lwork < lworkreq ) then
              info = -19_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZLAQZ0', info )
              return
           end if
           ! initialize q and z
           if( iwantq==3_${ik}$ ) call stdlib${ii}$_zlaset( 'FULL', n, n, czero, cone, q,ldq )
           if( iwantz==3_${ik}$ ) call stdlib${ii}$_zlaset( 'FULL', n, n, czero, cone, z,ldz )
           ! get machine constants
           safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' )
           safmax = one/safmin
           call stdlib${ii}$_dlabad( safmin, safmax )
           ulp = stdlib${ii}$_dlamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=dp)/ulp )
           istart = ilo
           istop = ihi
           maxit = 30_${ik}$*( ihi-ilo+1 )
           ld = 0_${ik}$
           do iiter = 1, maxit
              if( iiter >= maxit ) then
                 info = istop+1
                 goto 80
              end if
              if ( istart+1 >= istop ) then
                 istop = istart
                 exit
              end if
              ! check deflations at the end
              if ( abs( a( istop, istop-1 ) ) <= max( smlnum,ulp*( abs( a( istop, istop ) )+abs( &
                        a( istop-1,istop-1 ) ) ) ) ) then
                 a( istop, istop-1 ) = czero
                 istop = istop-1
                 ld = 0_${ik}$
                 eshift = czero
              end if
              ! check deflations at the start
              if ( abs( a( istart+1, istart ) ) <= max( smlnum,ulp*( abs( a( istart, istart ) )+&
                        abs( a( istart+1,istart+1 ) ) ) ) ) then
                 a( istart+1, istart ) = czero
                 istart = istart+1
                 ld = 0_${ik}$
                 eshift = czero
              end if
              if ( istart+1 >= istop ) then
                 exit
              end if
              ! check interior deflations
              istart2 = istart
              do k = istop, istart+1, -1
                 if ( abs( a( k, k-1 ) ) <= max( smlnum, ulp*( abs( a( k,k ) )+abs( a( k-1, k-1 ) &
                           ) ) ) ) then
                    a( k, k-1 ) = czero
                    istart2 = k
                    exit
                 end if
              end do
              ! get range to apply rotations to
              if ( ilschur ) then
                 istartm = 1_${ik}$
                 istopm = n
              else
                 istartm = istart2
                 istopm = istop
              end if
              ! check infinite eigenvalues, this is done without blocking so might
              ! slow down the method when many infinite eigenvalues are present
              k = istop
              do while ( k>=istart2 )
                 tempr = zero
                 if( k < istop ) then
                    tempr = tempr+abs( b( k, k+1 ) )
                 end if
                 if( k > istart2 ) then
                    tempr = tempr+abs( b( k-1, k ) )
                 end if
                 if( abs( b( k, k ) ) < max( smlnum, ulp*tempr ) ) then
                    ! a diagonal element of b is negligable, move it
                    ! to the top and deflate it
                    do k2 = k, istart2+1, -1
                       call stdlib${ii}$_zlartg( b( k2-1, k2 ), b( k2-1, k2-1 ), c1, s1,temp )
                       b( k2-1, k2 ) = temp
                       b( k2-1, k2-1 ) = czero
                       call stdlib${ii}$_zrot( k2-2-istartm+1, b( istartm, k2 ), 1_${ik}$,b( istartm, k2-1 ), &
                                 1_${ik}$, c1, s1 )
                       call stdlib${ii}$_zrot( min( k2+1, istop )-istartm+1, a( istartm,k2 ), 1_${ik}$, a( &
                                 istartm, k2-1 ), 1_${ik}$, c1, s1 )
                       if ( ilz ) then
                          call stdlib${ii}$_zrot( n, z( 1_${ik}$, k2 ), 1_${ik}$, z( 1_${ik}$, k2-1 ), 1_${ik}$, c1,s1 )
                       end if
                       if( k2<istop ) then
                          call stdlib${ii}$_zlartg( a( k2, k2-1 ), a( k2+1, k2-1 ), c1,s1, temp )
                                    
                          a( k2, k2-1 ) = temp
                          a( k2+1, k2-1 ) = czero
                          call stdlib${ii}$_zrot( istopm-k2+1, a( k2, k2 ), lda, a( k2+1,k2 ), lda, c1, &
                                    s1 )
                          call stdlib${ii}$_zrot( istopm-k2+1, b( k2, k2 ), ldb, b( k2+1,k2 ), ldb, c1, &
                                    s1 )
                          if( ilq ) then
                             call stdlib${ii}$_zrot( n, q( 1_${ik}$, k2 ), 1_${ik}$, q( 1_${ik}$, k2+1 ), 1_${ik}$,c1, conjg( s1 ) )
                                       
                          end if
                       end if
                    end do
                    if( istart2<istop )then
                       call stdlib${ii}$_zlartg( a( istart2, istart2 ), a( istart2+1,istart2 ), c1, s1, &
                                 temp )
                       a( istart2, istart2 ) = temp
                       a( istart2+1, istart2 ) = czero
                       call stdlib${ii}$_zrot( istopm-( istart2+1 )+1_${ik}$, a( istart2,istart2+1 ), lda, a( &
                                 istart2+1,istart2+1 ), lda, c1, s1 )
                       call stdlib${ii}$_zrot( istopm-( istart2+1 )+1_${ik}$, b( istart2,istart2+1 ), ldb, b( &
                                 istart2+1,istart2+1 ), ldb, c1, s1 )
                       if( ilq ) then
                          call stdlib${ii}$_zrot( n, q( 1_${ik}$, istart2 ), 1_${ik}$, q( 1_${ik}$,istart2+1 ), 1_${ik}$, c1, conjg(&
                                     s1 ) )
                       end if
                    end if
                    istart2 = istart2+1
                 end if
                 k = k-1
              end do
              ! istart2 now points to the top of the bottom right
              ! unreduced hessenberg block
              if ( istart2 >= istop ) then
                 istop = istart2-1
                 ld = 0_${ik}$
                 eshift = czero
                 cycle
              end if
              nw = nwr
              nshifts = nsr
              nblock = nbr
              if ( istop-istart2+1 < nmin ) then
                 ! setting nw to the size of the subblock will make aed deflate
                 ! all the eigenvalues. this is slightly more efficient than just
                 ! using qz_small because the off diagonal part gets updated via blas.
                 if ( istop-istart+1 < nmin ) then
                    nw = istop-istart+1
                    istart2 = istart
                 else
                    nw = istop-istart2+1
                 end if
              end if
              ! time for aed
              call stdlib${ii}$_zlaqz2( ilschur, ilq, ilz, n, istart2, istop, nw, a, lda,b, ldb, q, ldq,&
               z, ldz, n_undeflated, n_deflated,alpha, beta, work, nw, work( nw**2_${ik}$+1 ), nw,work( &
                         2_${ik}$*nw**2_${ik}$+1 ), lwork-2*nw**2_${ik}$, rwork, rec,aed_info )
              if ( n_deflated > 0_${ik}$ ) then
                 istop = istop-n_deflated
                 ld = 0_${ik}$
                 eshift = czero
              end if
              if ( 100_${ik}$*n_deflated > nibble*( n_deflated+n_undeflated ) .or.istop-istart2+1 < nmin &
                        ) then
                 ! aed has uncovered many eigenvalues. skip a qz sweep and run
                 ! aed again.
                 cycle
              end if
              ld = ld+1
              ns = min( nshifts, istop-istart2 )
              ns = min( ns, n_undeflated )
              shiftpos = istop-n_deflated-n_undeflated+1
              if ( mod( ld, 6_${ik}$ ) == 0_${ik}$ ) then
                 ! exceptional shift.  chosen for no particularly good reason.
                 if( ( real( maxit,KIND=dp)*safmin )*abs( a( istop,istop-1 ) )<abs( a( istop-1, &
                           istop-1 ) ) ) then
                    eshift = a( istop, istop-1 )/b( istop-1, istop-1 )
                 else
                    eshift = eshift+cone/( safmin*real( maxit,KIND=dp) )
                 end if
                 alpha( shiftpos ) = cone
                 beta( shiftpos ) = eshift
                 ns = 1_${ik}$
              end if
              ! time for a qz sweep
              call stdlib${ii}$_zlaqz3( ilschur, ilq, ilz, n, istart2, istop, ns, nblock,alpha( &
              shiftpos ), beta( shiftpos ), a, lda, b,ldb, q, ldq, z, ldz, work, nblock, work( &
                        nblock**2_${ik}$+1 ), nblock, work( 2_${ik}$*nblock**2_${ik}$+1 ),lwork-2*nblock**2_${ik}$, sweep_info )
           end do
           ! call stdlib${ii}$_zhgeqz to normalize the eigenvalue blocks and set the eigenvalues
           ! if all the eigenvalues have been found, stdlib${ii}$_zhgeqz will not do any iterations
           ! and only normalize the blocks. in case of a rare convergence failure,
           ! the single shift might perform better.
        80 call stdlib${ii}$_zhgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alpha, beta, q, &
                  ldq, z, ldz, work, lwork, rwork,norm_info )
           info = norm_info
     end subroutine stdlib${ii}$_zlaqz0

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     recursive module subroutine stdlib${ii}$_${ci}$laqz0( wants, wantq, wantz, n, ilo, ihi, a,lda, b, ldb, alpha, &
     !! ZLAQZ0: computes the eigenvalues of a real matrix pair (H,T),
     !! where H is an upper Hessenberg matrix and T is upper triangular,
     !! using the double-shift QZ method.
     !! Matrix pairs of this type are produced by the reduction to
     !! generalized upper Hessenberg form of a real matrix pair (A,B):
     !! A = Q1*H*Z1**H,  B = Q1*T*Z1**H,
     !! as computed by ZGGHRD.
     !! If JOB='S', then the Hessenberg-triangular pair (H,T) is
     !! also reduced to generalized Schur form,
     !! H = Q*S*Z**H,  T = Q*P*Z**H,
     !! where Q and Z are unitary matrices, P and S are an upper triangular
     !! matrices.
     !! Optionally, the unitary matrix Q from the generalized Schur
     !! factorization may be postmultiplied into an input matrix Q1, and the
     !! unitary matrix Z may be postmultiplied into an input matrix Z1.
     !! If Q1 and Z1 are the unitary matrices from ZGGHRD that reduced
     !! the matrix pair (A,B) to generalized upper Hessenberg form, then the
     !! output matrices Q1*Q and Z1*Z are the unitary factors from the
     !! generalized Schur factorization of (A,B):
     !! A = (Q1*Q)*S*(Z1*Z)**H,  B = (Q1*Q)*P*(Z1*Z)**H.
     !! To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently,
     !! of (A,B)) are computed as a pair of values (alpha,beta), where alpha is
     !! complex and beta real.
     !! If beta is nonzero, lambda = alpha / beta is an eigenvalue of the
     !! generalized nonsymmetric eigenvalue problem (GNEP)
     !! A*x = lambda*B*x
     !! and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the
     !! alternate form of the GNEP
     !! mu*A*y = B*y.
     !! Eigenvalues can be read directly from the generalized Schur
     !! form:
     !! alpha = S(i,i), beta = P(i,i).
     !! Ref: C.B. Moler
     !! Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973),
     !! pp. 241--256.
     !! Ref: B. Kagstrom, D. Kressner, "Multishift Variants of the QZ
     !! Algorithm with Aggressive Early Deflation", SIAM J. Numer.
     !! Anal., 29(2006), pp. 199--227.
     !! Ref: T. Steel, D. Camps, K. Meerbergen, R. Vandebril "A multishift,
     !! multipole rational QZ method with agressive early deflation"
               beta, q, ldq, z,ldz, work, lwork, rwork, rec,info )
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           character, intent( in ) :: wants, wantq, wantz
           integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,rec
           integer(${ik}$), intent( out ) :: info
           complex(${ck}$), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq,* ), z( ldz, * ), &
                     alpha( * ), beta( * ), work( * )
           real(${ck}$), intent( out ) :: rwork( * )
           
           ! ================================================================
           ! local scalars
           real(${ck}$) :: smlnum, ulp, safmin, safmax, c1, tempr
           complex(${ck}$) :: eshift, s1, temp
           integer(${ik}$) :: istart, istop, iiter, maxit, istart2, k, ld, nshifts, nblock, nw, nmin,&
            nibble, n_undeflated, n_qeflated, ns, sweep_info, shiftpos, lworkreq, k2, istartm, &
            istopm, iwants, iwantq, iwantz, norm_info, aed_info, nwr, nbr, nsr, itemp1, itemp2, &
                      rcost
           logical(lk) :: ilschur, ilq, ilz
           character(len=3) :: jbcmpz
           if( stdlib_lsame( wants, 'E' ) ) then
              ilschur = .false.
              iwants = 1_${ik}$
           else if( stdlib_lsame( wants, 'S' ) ) then
              ilschur = .true.
              iwants = 2_${ik}$
           else
              iwants = 0_${ik}$
           end if
           if( stdlib_lsame( wantq, 'N' ) ) then
              ilq = .false.
              iwantq = 1_${ik}$
           else if( stdlib_lsame( wantq, 'V' ) ) then
              ilq = .true.
              iwantq = 2_${ik}$
           else if( stdlib_lsame( wantq, 'I' ) ) then
              ilq = .true.
              iwantq = 3_${ik}$
           else
              iwantq = 0_${ik}$
           end if
           if( stdlib_lsame( wantz, 'N' ) ) then
              ilz = .false.
              iwantz = 1_${ik}$
           else if( stdlib_lsame( wantz, 'V' ) ) then
              ilz = .true.
              iwantz = 2_${ik}$
           else if( stdlib_lsame( wantz, 'I' ) ) then
              ilz = .true.
              iwantz = 3_${ik}$
           else
              iwantz = 0_${ik}$
           end if
           ! check argument values
           info = 0_${ik}$
           if( iwants==0_${ik}$ ) then
              info = -1_${ik}$
           else if( iwantq==0_${ik}$ ) then
              info = -2_${ik}$
           else if( iwantz==0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( ilo<1_${ik}$ ) then
              info = -5_${ik}$
           else if( ihi>n .or. ihi<ilo-1 ) then
              info = -6_${ik}$
           else if( lda<n ) then
              info = -8_${ik}$
           else if( ldb<n ) then
              info = -10_${ik}$
           else if( ldq<1_${ik}$ .or. ( ilq .and. ldq<n ) ) then
              info = -15_${ik}$
           else if( ldz<1_${ik}$ .or. ( ilz .and. ldz<n ) ) then
              info = -17_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZLAQZ0', -info )
              return
           end if
           ! quick return if possible
           if( n<=0_${ik}$ ) then
              work( 1_${ik}$ ) = real( 1_${ik}$,KIND=${ck}$)
              return
           end if
           ! get the parameters
           jbcmpz( 1_${ik}$:1_${ik}$ ) = wants
           jbcmpz( 2_${ik}$:2_${ik}$ ) = wantq
           jbcmpz( 3_${ik}$:3_${ik}$ ) = wantz
           nmin = stdlib${ii}$_ilaenv( 12_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nwr = stdlib${ii}$_ilaenv( 13_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nwr = max( 2_${ik}$, nwr )
           nwr = min( ihi-ilo+1, ( n-1 ) / 3_${ik}$, nwr )
           nibble = stdlib${ii}$_ilaenv( 14_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nsr = stdlib${ii}$_ilaenv( 15_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           nsr = min( nsr, ( n+6 ) / 9_${ik}$, ihi-ilo )
           nsr = max( 2_${ik}$, nsr-mod( nsr, 2_${ik}$ ) )
           rcost = stdlib${ii}$_ilaenv( 17_${ik}$, 'ZLAQZ0', jbcmpz, n, ilo, ihi, lwork )
           itemp1 = int( nsr/sqrt( 1_${ik}$+2*nsr/( real( rcost,KIND=${ck}$)/100_${ik}$*n ) ),KIND=${ik}$)
           itemp1 = ( ( itemp1-1 )/4_${ik}$ )*4_${ik}$+4
           nbr = nsr+itemp1
           if( n < nmin .or. rec >= 2_${ik}$ ) then
              call stdlib${ii}$_${ci}$hgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alpha, beta, q,&
                         ldq, z, ldz, work, lwork, rwork,info )
              return
           end if
           ! find out required workspace
           ! workspace query to stdlib${ii}$_${ci}$laqz2
           nw = max( nwr, nmin )
           call stdlib${ii}$_${ci}$laqz2( ilschur, ilq, ilz, n, ilo, ihi, nw, a, lda, b, ldb,q, ldq, z, ldz, &
           n_undeflated, n_qeflated, alpha,beta, work, nw, work, nw, work, -1_${ik}$, rwork, rec,&
                     aed_info )
           itemp1 = int( work( 1_${ik}$ ),KIND=${ik}$)
           ! workspace query to stdlib${ii}$_${ci}$laqz3
           call stdlib${ii}$_${ci}$laqz3( ilschur, ilq, ilz, n, ilo, ihi, nsr, nbr, alpha,beta, a, lda, b, &
                     ldb, q, ldq, z, ldz, work, nbr,work, nbr, work, -1_${ik}$, sweep_info )
           itemp2 = int( work( 1_${ik}$ ),KIND=${ik}$)
           lworkreq = max( itemp1+2*nw**2_${ik}$, itemp2+2*nbr**2_${ik}$ )
           if ( lwork ==-1_${ik}$ ) then
              work( 1_${ik}$ ) = real( lworkreq,KIND=${ck}$)
              return
           else if ( lwork < lworkreq ) then
              info = -19_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZLAQZ0', info )
              return
           end if
           ! initialize q and z
           if( iwantq==3_${ik}$ ) call stdlib${ii}$_${ci}$laset( 'FULL', n, n, czero, cone, q,ldq )
           if( iwantz==3_${ik}$ ) call stdlib${ii}$_${ci}$laset( 'FULL', n, n, czero, cone, z,ldz )
           ! get machine constants
           safmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'SAFE MINIMUM' )
           safmax = one/safmin
           call stdlib${ii}$_${c2ri(ci)}$labad( safmin, safmax )
           ulp = stdlib${ii}$_${c2ri(ci)}$lamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=${ck}$)/ulp )
           istart = ilo
           istop = ihi
           maxit = 30_${ik}$*( ihi-ilo+1 )
           ld = 0_${ik}$
           do iiter = 1, maxit
              if( iiter >= maxit ) then
                 info = istop+1
                 goto 80
              end if
              if ( istart+1 >= istop ) then
                 istop = istart
                 exit
              end if
              ! check deflations at the end
              if ( abs( a( istop, istop-1 ) ) <= max( smlnum,ulp*( abs( a( istop, istop ) )+abs( &
                        a( istop-1,istop-1 ) ) ) ) ) then
                 a( istop, istop-1 ) = czero
                 istop = istop-1
                 ld = 0_${ik}$
                 eshift = czero
              end if
              ! check deflations at the start
              if ( abs( a( istart+1, istart ) ) <= max( smlnum,ulp*( abs( a( istart, istart ) )+&
                        abs( a( istart+1,istart+1 ) ) ) ) ) then
                 a( istart+1, istart ) = czero
                 istart = istart+1
                 ld = 0_${ik}$
                 eshift = czero
              end if
              if ( istart+1 >= istop ) then
                 exit
              end if
              ! check interior deflations
              istart2 = istart
              do k = istop, istart+1, -1
                 if ( abs( a( k, k-1 ) ) <= max( smlnum, ulp*( abs( a( k,k ) )+abs( a( k-1, k-1 ) &
                           ) ) ) ) then
                    a( k, k-1 ) = czero
                    istart2 = k
                    exit
                 end if
              end do
              ! get range to apply rotations to
              if ( ilschur ) then
                 istartm = 1_${ik}$
                 istopm = n
              else
                 istartm = istart2
                 istopm = istop
              end if
              ! check infinite eigenvalues, this is done without blocking so might
              ! slow down the method when many infinite eigenvalues are present
              k = istop
              do while ( k>=istart2 )
                 tempr = zero
                 if( k < istop ) then
                    tempr = tempr+abs( b( k, k+1 ) )
                 end if
                 if( k > istart2 ) then
                    tempr = tempr+abs( b( k-1, k ) )
                 end if
                 if( abs( b( k, k ) ) < max( smlnum, ulp*tempr ) ) then
                    ! a diagonal element of b is negligable, move it
                    ! to the top and deflate it
                    do k2 = k, istart2+1, -1
                       call stdlib${ii}$_${ci}$lartg( b( k2-1, k2 ), b( k2-1, k2-1 ), c1, s1,temp )
                       b( k2-1, k2 ) = temp
                       b( k2-1, k2-1 ) = czero
                       call stdlib${ii}$_${ci}$rot( k2-2-istartm+1, b( istartm, k2 ), 1_${ik}$,b( istartm, k2-1 ), &
                                 1_${ik}$, c1, s1 )
                       call stdlib${ii}$_${ci}$rot( min( k2+1, istop )-istartm+1, a( istartm,k2 ), 1_${ik}$, a( &
                                 istartm, k2-1 ), 1_${ik}$, c1, s1 )
                       if ( ilz ) then
                          call stdlib${ii}$_${ci}$rot( n, z( 1_${ik}$, k2 ), 1_${ik}$, z( 1_${ik}$, k2-1 ), 1_${ik}$, c1,s1 )
                       end if
                       if( k2<istop ) then
                          call stdlib${ii}$_${ci}$lartg( a( k2, k2-1 ), a( k2+1, k2-1 ), c1,s1, temp )
                                    
                          a( k2, k2-1 ) = temp
                          a( k2+1, k2-1 ) = czero
                          call stdlib${ii}$_${ci}$rot( istopm-k2+1, a( k2, k2 ), lda, a( k2+1,k2 ), lda, c1, &
                                    s1 )
                          call stdlib${ii}$_${ci}$rot( istopm-k2+1, b( k2, k2 ), ldb, b( k2+1,k2 ), ldb, c1, &
                                    s1 )
                          if( ilq ) then
                             call stdlib${ii}$_${ci}$rot( n, q( 1_${ik}$, k2 ), 1_${ik}$, q( 1_${ik}$, k2+1 ), 1_${ik}$,c1, conjg( s1 ) )
                                       
                          end if
                       end if
                    end do
                    if( istart2<istop )then
                       call stdlib${ii}$_${ci}$lartg( a( istart2, istart2 ), a( istart2+1,istart2 ), c1, s1, &
                                 temp )
                       a( istart2, istart2 ) = temp
                       a( istart2+1, istart2 ) = czero
                       call stdlib${ii}$_${ci}$rot( istopm-( istart2+1 )+1_${ik}$, a( istart2,istart2+1 ), lda, a( &
                                 istart2+1,istart2+1 ), lda, c1, s1 )
                       call stdlib${ii}$_${ci}$rot( istopm-( istart2+1 )+1_${ik}$, b( istart2,istart2+1 ), ldb, b( &
                                 istart2+1,istart2+1 ), ldb, c1, s1 )
                       if( ilq ) then
                          call stdlib${ii}$_${ci}$rot( n, q( 1_${ik}$, istart2 ), 1_${ik}$, q( 1_${ik}$,istart2+1 ), 1_${ik}$, c1, conjg(&
                                     s1 ) )
                       end if
                    end if
                    istart2 = istart2+1
                 end if
                 k = k-1
              end do
              ! istart2 now points to the top of the bottom right
              ! unreduced hessenberg block
              if ( istart2 >= istop ) then
                 istop = istart2-1
                 ld = 0_${ik}$
                 eshift = czero
                 cycle
              end if
              nw = nwr
              nshifts = nsr
              nblock = nbr
              if ( istop-istart2+1 < nmin ) then
                 ! setting nw to the size of the subblock will make aed deflate
                 ! all the eigenvalues. this is slightly more efficient than just
                 ! using qz_small because the off diagonal part gets updated via blas.
                 if ( istop-istart+1 < nmin ) then
                    nw = istop-istart+1
                    istart2 = istart
                 else
                    nw = istop-istart2+1
                 end if
              end if
              ! time for aed
              call stdlib${ii}$_${ci}$laqz2( ilschur, ilq, ilz, n, istart2, istop, nw, a, lda,b, ldb, q, ldq,&
               z, ldz, n_undeflated, n_qeflated,alpha, beta, work, nw, work( nw**2_${ik}$+1 ), nw,work( &
                         2_${ik}$*nw**2_${ik}$+1 ), lwork-2*nw**2_${ik}$, rwork, rec,aed_info )
              if ( n_qeflated > 0_${ik}$ ) then
                 istop = istop-n_qeflated
                 ld = 0_${ik}$
                 eshift = czero
              end if
              if ( 100_${ik}$*n_qeflated > nibble*( n_qeflated+n_undeflated ) .or.istop-istart2+1 < nmin &
                        ) then
                 ! aed has uncovered many eigenvalues. skip a qz sweep and run
                 ! aed again.
                 cycle
              end if
              ld = ld+1
              ns = min( nshifts, istop-istart2 )
              ns = min( ns, n_undeflated )
              shiftpos = istop-n_qeflated-n_undeflated+1
              if ( mod( ld, 6_${ik}$ ) == 0_${ik}$ ) then
                 ! exceptional shift.  chosen for no particularly good reason.
                 if( ( real( maxit,KIND=${ck}$)*safmin )*abs( a( istop,istop-1 ) )<abs( a( istop-1, &
                           istop-1 ) ) ) then
                    eshift = a( istop, istop-1 )/b( istop-1, istop-1 )
                 else
                    eshift = eshift+cone/( safmin*real( maxit,KIND=${ck}$) )
                 end if
                 alpha( shiftpos ) = cone
                 beta( shiftpos ) = eshift
                 ns = 1_${ik}$
              end if
              ! time for a qz sweep
              call stdlib${ii}$_${ci}$laqz3( ilschur, ilq, ilz, n, istart2, istop, ns, nblock,alpha( &
              shiftpos ), beta( shiftpos ), a, lda, b,ldb, q, ldq, z, ldz, work, nblock, work( &
                        nblock**2_${ik}$+1 ), nblock, work( 2_${ik}$*nblock**2_${ik}$+1 ),lwork-2*nblock**2_${ik}$, sweep_info )
           end do
           ! call stdlib${ii}$_${ci}$hgeqz to normalize the eigenvalue blocks and set the eigenvalues
           ! if all the eigenvalues have been found, stdlib${ii}$_${ci}$hgeqz will not do any iterations
           ! and only normalize the blocks. in case of a rare convergence failure,
           ! the single shift might perform better.
        80 call stdlib${ii}$_${ci}$hgeqz( wants, wantq, wantz, n, ilo, ihi, a, lda, b, ldb,alpha, beta, q, &
                  ldq, z, ldz, work, lwork, rwork,norm_info )
           info = norm_info
     end subroutine stdlib${ii}$_${ci}$laqz0

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slaqz1( a, lda, b, ldb, sr1, sr2, si, beta1, beta2,v )
     !! Given a 3-by-3 matrix pencil (A,B), SLAQZ1: sets v to a
     !! scalar multiple of the first column of the product
     !! (*)  K = (A - (beta2*sr2 - i*si)*B)*B^(-1)*(beta1*A - (sr2 + i*si2)*B)*B^(-1).
     !! It is assumed that either
     !! 1) sr1 = sr2
     !! or
     !! 2) si = 0.
     !! This is useful for starting double implicit shift bulges
     !! in the QZ algorithm.
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           integer(${ik}$), intent( in ) :: lda, ldb
           real(sp), intent( in ) :: a( lda, * ), b( ldb, * ), sr1, sr2, si,beta1, beta2
           real(sp), intent( out ) :: v( * )
           ! ================================================================
           ! local scalars
           real(sp) :: w(2_${ik}$), safmin, safmax, scale1, scale2
           safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' )
           safmax = one/safmin
           ! calculate first shifted vector
           w( 1_${ik}$ ) = beta1*a( 1_${ik}$, 1_${ik}$ )-sr1*b( 1_${ik}$, 1_${ik}$ )
           w( 2_${ik}$ ) = beta1*a( 2_${ik}$, 1_${ik}$ )-sr1*b( 2_${ik}$, 1_${ik}$ )
           scale1 = sqrt( abs( w( 1_${ik}$ ) ) ) * sqrt( abs( w( 2_${ik}$ ) ) )
           if( scale1 >= safmin .and. scale1 <= safmax ) then
              w( 1_${ik}$ ) = w( 1_${ik}$ )/scale1
              w( 2_${ik}$ ) = w( 2_${ik}$ )/scale1
           end if
           ! solve linear system
           w( 2_${ik}$ ) = w( 2_${ik}$ )/b( 2_${ik}$, 2_${ik}$ )
           w( 1_${ik}$ ) = ( w( 1_${ik}$ )-b( 1_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )/b( 1_${ik}$, 1_${ik}$ )
           scale2 = sqrt( abs( w( 1_${ik}$ ) ) ) * sqrt( abs( w( 2_${ik}$ ) ) )
           if( scale2 >= safmin .and. scale2 <= safmax ) then
              w( 1_${ik}$ ) = w( 1_${ik}$ )/scale2
              w( 2_${ik}$ ) = w( 2_${ik}$ )/scale2
           end if
           ! apply second shift
           v( 1_${ik}$ ) = beta2*( a( 1_${ik}$, 1_${ik}$ )*w( 1_${ik}$ )+a( 1_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )-sr2*( b( 1_${ik}$,1_${ik}$ )*w( 1_${ik}$ )+b( 1_${ik}$, 2_${ik}$ )*w(&
                      2_${ik}$ ) )
           v( 2_${ik}$ ) = beta2*( a( 2_${ik}$, 1_${ik}$ )*w( 1_${ik}$ )+a( 2_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )-sr2*( b( 2_${ik}$,1_${ik}$ )*w( 1_${ik}$ )+b( 2_${ik}$, 2_${ik}$ )*w(&
                      2_${ik}$ ) )
           v( 3_${ik}$ ) = beta2*( a( 3_${ik}$, 1_${ik}$ )*w( 1_${ik}$ )+a( 3_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )-sr2*( b( 3_${ik}$,1_${ik}$ )*w( 1_${ik}$ )+b( 3_${ik}$, 2_${ik}$ )*w(&
                      2_${ik}$ ) )
           ! account for imaginary part
           v( 1_${ik}$ ) = v( 1_${ik}$ )+si*si*b( 1_${ik}$, 1_${ik}$ )/scale1/scale2
           ! check for overflow
           if( abs( v( 1_${ik}$ ) )>safmax .or. abs( v( 2_${ik}$ ) ) > safmax .or.abs( v( 3_${ik}$ ) )>safmax .or. &
           stdlib${ii}$_sisnan( v( 1_${ik}$ ) ) .or.stdlib${ii}$_sisnan( v( 2_${ik}$ ) ) .or. stdlib${ii}$_sisnan( v( 3_${ik}$ ) ) ) &
                     then
              v( 1_${ik}$ ) = zero
              v( 2_${ik}$ ) = zero
              v( 3_${ik}$ ) = zero
           end if
     end subroutine stdlib${ii}$_slaqz1

     pure module subroutine stdlib${ii}$_dlaqz1( a, lda, b, ldb, sr1, sr2, si, beta1, beta2,v )
     !! Given a 3-by-3 matrix pencil (A,B), DLAQZ1: sets v to a
     !! scalar multiple of the first column of the product
     !! (*)  K = (A - (beta2*sr2 - i*si)*B)*B^(-1)*(beta1*A - (sr2 + i*si2)*B)*B^(-1).
     !! It is assumed that either
     !! 1) sr1 = sr2
     !! or
     !! 2) si = 0.
     !! This is useful for starting double implicit shift bulges
     !! in the QZ algorithm.
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           integer(${ik}$), intent( in ) :: lda, ldb
           real(dp), intent( in ) :: a( lda, * ), b( ldb, * ), sr1,sr2, si, beta1, beta2
           real(dp), intent( out ) :: v( * )
           ! ================================================================
           ! local scalars
           real(dp) :: w(2_${ik}$), safmin, safmax, scale1, scale2
           safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' )
           safmax = one/safmin
           ! calculate first shifted vector
           w( 1_${ik}$ ) = beta1*a( 1_${ik}$, 1_${ik}$ )-sr1*b( 1_${ik}$, 1_${ik}$ )
           w( 2_${ik}$ ) = beta1*a( 2_${ik}$, 1_${ik}$ )-sr1*b( 2_${ik}$, 1_${ik}$ )
           scale1 = sqrt( abs( w( 1_${ik}$ ) ) ) * sqrt( abs( w( 2_${ik}$ ) ) )
           if( scale1 >= safmin .and. scale1 <= safmax ) then
              w( 1_${ik}$ ) = w( 1_${ik}$ )/scale1
              w( 2_${ik}$ ) = w( 2_${ik}$ )/scale1
           end if
           ! solve linear system
           w( 2_${ik}$ ) = w( 2_${ik}$ )/b( 2_${ik}$, 2_${ik}$ )
           w( 1_${ik}$ ) = ( w( 1_${ik}$ )-b( 1_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )/b( 1_${ik}$, 1_${ik}$ )
           scale2 = sqrt( abs( w( 1_${ik}$ ) ) ) * sqrt( abs( w( 2_${ik}$ ) ) )
           if( scale2 >= safmin .and. scale2 <= safmax ) then
              w( 1_${ik}$ ) = w( 1_${ik}$ )/scale2
              w( 2_${ik}$ ) = w( 2_${ik}$ )/scale2
           end if
           ! apply second shift
           v( 1_${ik}$ ) = beta2*( a( 1_${ik}$, 1_${ik}$ )*w( 1_${ik}$ )+a( 1_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )-sr2*( b( 1_${ik}$,1_${ik}$ )*w( 1_${ik}$ )+b( 1_${ik}$, 2_${ik}$ )*w(&
                      2_${ik}$ ) )
           v( 2_${ik}$ ) = beta2*( a( 2_${ik}$, 1_${ik}$ )*w( 1_${ik}$ )+a( 2_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )-sr2*( b( 2_${ik}$,1_${ik}$ )*w( 1_${ik}$ )+b( 2_${ik}$, 2_${ik}$ )*w(&
                      2_${ik}$ ) )
           v( 3_${ik}$ ) = beta2*( a( 3_${ik}$, 1_${ik}$ )*w( 1_${ik}$ )+a( 3_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )-sr2*( b( 3_${ik}$,1_${ik}$ )*w( 1_${ik}$ )+b( 3_${ik}$, 2_${ik}$ )*w(&
                      2_${ik}$ ) )
           ! account for imaginary part
           v( 1_${ik}$ ) = v( 1_${ik}$ )+si*si*b( 1_${ik}$, 1_${ik}$ )/scale1/scale2
           ! check for overflow
           if( abs( v( 1_${ik}$ ) )>safmax .or. abs( v( 2_${ik}$ ) ) > safmax .or.abs( v( 3_${ik}$ ) )>safmax .or. &
           stdlib${ii}$_disnan( v( 1_${ik}$ ) ) .or.stdlib${ii}$_disnan( v( 2_${ik}$ ) ) .or. stdlib${ii}$_disnan( v( 3_${ik}$ ) ) ) &
                     then
              v( 1_${ik}$ ) = zero
              v( 2_${ik}$ ) = zero
              v( 3_${ik}$ ) = zero
           end if
     end subroutine stdlib${ii}$_dlaqz1

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$laqz1( a, lda, b, ldb, sr1, sr2, si, beta1, beta2,v )
     !! Given a 3-by-3 matrix pencil (A,B), DLAQZ1: sets v to a
     !! scalar multiple of the first column of the product
     !! (*)  K = (A - (beta2*sr2 - i*si)*B)*B^(-1)*(beta1*A - (sr2 + i*si2)*B)*B^(-1).
     !! It is assumed that either
     !! 1) sr1 = sr2
     !! or
     !! 2) si = 0.
     !! This is useful for starting double implicit shift bulges
     !! in the QZ algorithm.
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           integer(${ik}$), intent( in ) :: lda, ldb
           real(${rk}$), intent( in ) :: a( lda, * ), b( ldb, * ), sr1,sr2, si, beta1, beta2
           real(${rk}$), intent( out ) :: v( * )
           ! ================================================================
           ! local scalars
           real(${rk}$) :: w(2_${ik}$), safmin, safmax, scale1, scale2
           safmin = stdlib${ii}$_${ri}$lamch( 'SAFE MINIMUM' )
           safmax = one/safmin
           ! calculate first shifted vector
           w( 1_${ik}$ ) = beta1*a( 1_${ik}$, 1_${ik}$ )-sr1*b( 1_${ik}$, 1_${ik}$ )
           w( 2_${ik}$ ) = beta1*a( 2_${ik}$, 1_${ik}$ )-sr1*b( 2_${ik}$, 1_${ik}$ )
           scale1 = sqrt( abs( w( 1_${ik}$ ) ) ) * sqrt( abs( w( 2_${ik}$ ) ) )
           if( scale1 >= safmin .and. scale1 <= safmax ) then
              w( 1_${ik}$ ) = w( 1_${ik}$ )/scale1
              w( 2_${ik}$ ) = w( 2_${ik}$ )/scale1
           end if
           ! solve linear system
           w( 2_${ik}$ ) = w( 2_${ik}$ )/b( 2_${ik}$, 2_${ik}$ )
           w( 1_${ik}$ ) = ( w( 1_${ik}$ )-b( 1_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )/b( 1_${ik}$, 1_${ik}$ )
           scale2 = sqrt( abs( w( 1_${ik}$ ) ) ) * sqrt( abs( w( 2_${ik}$ ) ) )
           if( scale2 >= safmin .and. scale2 <= safmax ) then
              w( 1_${ik}$ ) = w( 1_${ik}$ )/scale2
              w( 2_${ik}$ ) = w( 2_${ik}$ )/scale2
           end if
           ! apply second shift
           v( 1_${ik}$ ) = beta2*( a( 1_${ik}$, 1_${ik}$ )*w( 1_${ik}$ )+a( 1_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )-sr2*( b( 1_${ik}$,1_${ik}$ )*w( 1_${ik}$ )+b( 1_${ik}$, 2_${ik}$ )*w(&
                      2_${ik}$ ) )
           v( 2_${ik}$ ) = beta2*( a( 2_${ik}$, 1_${ik}$ )*w( 1_${ik}$ )+a( 2_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )-sr2*( b( 2_${ik}$,1_${ik}$ )*w( 1_${ik}$ )+b( 2_${ik}$, 2_${ik}$ )*w(&
                      2_${ik}$ ) )
           v( 3_${ik}$ ) = beta2*( a( 3_${ik}$, 1_${ik}$ )*w( 1_${ik}$ )+a( 3_${ik}$, 2_${ik}$ )*w( 2_${ik}$ ) )-sr2*( b( 3_${ik}$,1_${ik}$ )*w( 1_${ik}$ )+b( 3_${ik}$, 2_${ik}$ )*w(&
                      2_${ik}$ ) )
           ! account for imaginary part
           v( 1_${ik}$ ) = v( 1_${ik}$ )+si*si*b( 1_${ik}$, 1_${ik}$ )/scale1/scale2
           ! check for overflow
           if( abs( v( 1_${ik}$ ) )>safmax .or. abs( v( 2_${ik}$ ) ) > safmax .or.abs( v( 3_${ik}$ ) )>safmax .or. &
           stdlib${ii}$_${ri}$isnan( v( 1_${ik}$ ) ) .or.stdlib${ii}$_${ri}$isnan( v( 2_${ik}$ ) ) .or. stdlib${ii}$_${ri}$isnan( v( 3_${ik}$ ) ) ) &
                     then
              v( 1_${ik}$ ) = zero
              v( 2_${ik}$ ) = zero
              v( 3_${ik}$ ) = zero
           end if
     end subroutine stdlib${ii}$_${ri}$laqz1

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_claqz1( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, qstart, &
     !! CLAQZ1 chases a 1x1 shift bulge in a matrix pencil down a single position
               q, ldq, nz, zstart, z, ldz )
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           logical(lk), intent( in ) :: ilq, ilz
           integer(${ik}$), intent( in ) :: k, lda, ldb, ldq, ldz, istartm, istopm,nq, nz, qstart, &
                     zstart, ihi
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
           
           ! ================================================================
           ! local variables
           real(sp) :: c
           complex(sp) :: s, temp
           if( k+1 == ihi ) then
              ! shift is located on the edge of the matrix, remove it
              call stdlib${ii}$_clartg( b( ihi, ihi ), b( ihi, ihi-1 ), c, s, temp )
              b( ihi, ihi ) = temp
              b( ihi, ihi-1 ) = czero
              call stdlib${ii}$_crot( ihi-istartm, b( istartm, ihi ), 1_${ik}$, b( istartm,ihi-1 ), 1_${ik}$, c, s )
                        
              call stdlib${ii}$_crot( ihi-istartm+1, a( istartm, ihi ), 1_${ik}$, a( istartm,ihi-1 ), 1_${ik}$, c, s )
                        
              if ( ilz ) then
                 call stdlib${ii}$_crot( nz, z( 1_${ik}$, ihi-zstart+1 ), 1_${ik}$, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, c, s )
                           
              end if
           else
              ! normal operation, move bulge down
              ! apply transformation from the right
              call stdlib${ii}$_clartg( b( k+1, k+1 ), b( k+1, k ), c, s, temp )
              b( k+1, k+1 ) = temp
              b( k+1, k ) = czero
              call stdlib${ii}$_crot( k+2-istartm+1, a( istartm, k+1 ), 1_${ik}$, a( istartm,k ), 1_${ik}$, c, s )
                        
              call stdlib${ii}$_crot( k-istartm+1, b( istartm, k+1 ), 1_${ik}$, b( istartm, k ),1_${ik}$, c, s )
                        
              if ( ilz ) then
                 call stdlib${ii}$_crot( nz, z( 1_${ik}$, k+1-zstart+1 ), 1_${ik}$, z( 1_${ik}$, k-zstart+1 ),1_${ik}$, c, s )
                           
              end if
              ! apply transformation from the left
              call stdlib${ii}$_clartg( a( k+1, k ), a( k+2, k ), c, s, temp )
              a( k+1, k ) = temp
              a( k+2, k ) = czero
              call stdlib${ii}$_crot( istopm-k, a( k+1, k+1 ), lda, a( k+2, k+1 ), lda, c,s )
              call stdlib${ii}$_crot( istopm-k, b( k+1, k+1 ), ldb, b( k+2, k+1 ), ldb, c,s )
              if ( ilq ) then
                 call stdlib${ii}$_crot( nq, q( 1_${ik}$, k+1-qstart+1 ), 1_${ik}$, q( 1_${ik}$, k+2-qstart+1 ), 1_${ik}$, c, conjg(&
                            s ) )
              end if
           end if
     end subroutine stdlib${ii}$_claqz1

     pure module subroutine stdlib${ii}$_zlaqz1( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, qstart, &
     !! ZLAQZ1 chases a 1x1 shift bulge in a matrix pencil down a single position
               q, ldq, nz, zstart, z, ldz )
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           logical(lk), intent( in ) :: ilq, ilz
           integer(${ik}$), intent( in ) :: k, lda, ldb, ldq, ldz, istartm, istopm,nq, nz, qstart, &
                     zstart, ihi
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
           
           ! ================================================================
           ! local variables
           real(dp) :: c
           complex(dp) :: s, temp
           if( k+1 == ihi ) then
              ! shift is located on the edge of the matrix, remove it
              call stdlib${ii}$_zlartg( b( ihi, ihi ), b( ihi, ihi-1 ), c, s, temp )
              b( ihi, ihi ) = temp
              b( ihi, ihi-1 ) = czero
              call stdlib${ii}$_zrot( ihi-istartm, b( istartm, ihi ), 1_${ik}$, b( istartm,ihi-1 ), 1_${ik}$, c, s )
                        
              call stdlib${ii}$_zrot( ihi-istartm+1, a( istartm, ihi ), 1_${ik}$, a( istartm,ihi-1 ), 1_${ik}$, c, s )
                        
              if ( ilz ) then
                 call stdlib${ii}$_zrot( nz, z( 1_${ik}$, ihi-zstart+1 ), 1_${ik}$, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, c, s )
                           
              end if
           else
              ! normal operation, move bulge down
              ! apply transformation from the right
              call stdlib${ii}$_zlartg( b( k+1, k+1 ), b( k+1, k ), c, s, temp )
              b( k+1, k+1 ) = temp
              b( k+1, k ) = czero
              call stdlib${ii}$_zrot( k+2-istartm+1, a( istartm, k+1 ), 1_${ik}$, a( istartm,k ), 1_${ik}$, c, s )
                        
              call stdlib${ii}$_zrot( k-istartm+1, b( istartm, k+1 ), 1_${ik}$, b( istartm, k ),1_${ik}$, c, s )
                        
              if ( ilz ) then
                 call stdlib${ii}$_zrot( nz, z( 1_${ik}$, k+1-zstart+1 ), 1_${ik}$, z( 1_${ik}$, k-zstart+1 ),1_${ik}$, c, s )
                           
              end if
              ! apply transformation from the left
              call stdlib${ii}$_zlartg( a( k+1, k ), a( k+2, k ), c, s, temp )
              a( k+1, k ) = temp
              a( k+2, k ) = czero
              call stdlib${ii}$_zrot( istopm-k, a( k+1, k+1 ), lda, a( k+2, k+1 ), lda, c,s )
              call stdlib${ii}$_zrot( istopm-k, b( k+1, k+1 ), ldb, b( k+2, k+1 ), ldb, c,s )
              if ( ilq ) then
                 call stdlib${ii}$_zrot( nq, q( 1_${ik}$, k+1-qstart+1 ), 1_${ik}$, q( 1_${ik}$, k+2-qstart+1 ), 1_${ik}$, c, conjg(&
                            s ) )
              end if
           end if
     end subroutine stdlib${ii}$_zlaqz1

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$laqz1( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, qstart, &
     !! ZLAQZ1: chases a 1x1 shift bulge in a matrix pencil down a single position
               q, ldq, nz, zstart, z, ldz )
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           logical(lk), intent( in ) :: ilq, ilz
           integer(${ik}$), intent( in ) :: k, lda, ldb, ldq, ldz, istartm, istopm,nq, nz, qstart, &
                     zstart, ihi
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
           ! ================================================================
           ! local variables
           real(${ck}$) :: c
           complex(${ck}$) :: s, temp
           if( k+1 == ihi ) then
              ! shift is located on the edge of the matrix, remove it
              call stdlib${ii}$_${ci}$lartg( b( ihi, ihi ), b( ihi, ihi-1 ), c, s, temp )
              b( ihi, ihi ) = temp
              b( ihi, ihi-1 ) = czero
              call stdlib${ii}$_${ci}$rot( ihi-istartm, b( istartm, ihi ), 1_${ik}$, b( istartm,ihi-1 ), 1_${ik}$, c, s )
                        
              call stdlib${ii}$_${ci}$rot( ihi-istartm+1, a( istartm, ihi ), 1_${ik}$, a( istartm,ihi-1 ), 1_${ik}$, c, s )
                        
              if ( ilz ) then
                 call stdlib${ii}$_${ci}$rot( nz, z( 1_${ik}$, ihi-zstart+1 ), 1_${ik}$, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, c, s )
                           
              end if
           else
              ! normal operation, move bulge down
              ! apply transformation from the right
              call stdlib${ii}$_${ci}$lartg( b( k+1, k+1 ), b( k+1, k ), c, s, temp )
              b( k+1, k+1 ) = temp
              b( k+1, k ) = czero
              call stdlib${ii}$_${ci}$rot( k+2-istartm+1, a( istartm, k+1 ), 1_${ik}$, a( istartm,k ), 1_${ik}$, c, s )
                        
              call stdlib${ii}$_${ci}$rot( k-istartm+1, b( istartm, k+1 ), 1_${ik}$, b( istartm, k ),1_${ik}$, c, s )
                        
              if ( ilz ) then
                 call stdlib${ii}$_${ci}$rot( nz, z( 1_${ik}$, k+1-zstart+1 ), 1_${ik}$, z( 1_${ik}$, k-zstart+1 ),1_${ik}$, c, s )
                           
              end if
              ! apply transformation from the left
              call stdlib${ii}$_${ci}$lartg( a( k+1, k ), a( k+2, k ), c, s, temp )
              a( k+1, k ) = temp
              a( k+2, k ) = czero
              call stdlib${ii}$_${ci}$rot( istopm-k, a( k+1, k+1 ), lda, a( k+2, k+1 ), lda, c,s )
              call stdlib${ii}$_${ci}$rot( istopm-k, b( k+1, k+1 ), ldb, b( k+2, k+1 ), ldb, c,s )
              if ( ilq ) then
                 call stdlib${ii}$_${ci}$rot( nq, q( 1_${ik}$, k+1-qstart+1 ), 1_${ik}$, q( 1_${ik}$, k+2-qstart+1 ), 1_${ik}$, c, conjg(&
                            s ) )
              end if
           end if
     end subroutine stdlib${ii}$_${ci}$laqz1

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slaqz2( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, qstart, &
     !! SLAQZ2 chases a 2x2 shift bulge in a matrix pencil down a single position
               q, ldq, nz, zstart, z, ldz )
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           logical(lk), intent( in ) :: ilq, ilz
           integer(${ik}$), intent( in ) :: k, lda, ldb, ldq, ldz, istartm, istopm,nq, nz, qstart, &
                     zstart, ihi
           real(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
           ! ================================================================
           ! local variables
           real(sp) :: h(2_${ik}$,3_${ik}$), c1, s1, c2, s2, temp
           if( k+2 == ihi ) then
              ! shift is located on the edge of the matrix, remove it
              h = b( ihi-1:ihi, ihi-2:ihi )
              ! make h upper triangular
              call stdlib${ii}$_slartg( h( 1_${ik}$, 1_${ik}$ ), h( 2_${ik}$, 1_${ik}$ ), c1, s1, temp )
              h( 2_${ik}$, 1_${ik}$ ) = zero
              h( 1_${ik}$, 1_${ik}$ ) = temp
              call stdlib${ii}$_srot( 2_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 2_${ik}$, h( 2_${ik}$, 2_${ik}$ ), 2_${ik}$, c1, s1 )
              call stdlib${ii}$_slartg( h( 2_${ik}$, 3_${ik}$ ), h( 2_${ik}$, 2_${ik}$ ), c1, s1, temp )
              call stdlib${ii}$_srot( 1_${ik}$, h( 1_${ik}$, 3_${ik}$ ), 1_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c1, s1 )
              call stdlib${ii}$_slartg( h( 1_${ik}$, 2_${ik}$ ), h( 1_${ik}$, 1_${ik}$ ), c2, s2, temp )
              call stdlib${ii}$_srot( ihi-istartm+1, b( istartm, ihi ), 1_${ik}$, b( istartm,ihi-1 ), 1_${ik}$, c1, &
                        s1 )
              call stdlib${ii}$_srot( ihi-istartm+1, b( istartm, ihi-1 ), 1_${ik}$, b( istartm,ihi-2 ), 1_${ik}$, c2, &
                        s2 )
              b( ihi-1, ihi-2 ) = zero
              b( ihi, ihi-2 ) = zero
              call stdlib${ii}$_srot( ihi-istartm+1, a( istartm, ihi ), 1_${ik}$, a( istartm,ihi-1 ), 1_${ik}$, c1, &
                        s1 )
              call stdlib${ii}$_srot( ihi-istartm+1, a( istartm, ihi-1 ), 1_${ik}$, a( istartm,ihi-2 ), 1_${ik}$, c2, &
                        s2 )
              if ( ilz ) then
                 call stdlib${ii}$_srot( nz, z( 1_${ik}$, ihi-zstart+1 ), 1_${ik}$, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, c1, s1 &
                           )
                 call stdlib${ii}$_srot( nz, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, z( 1_${ik}$,ihi-2-zstart+1 ), 1_${ik}$, c2, &
                           s2 )
              end if
              call stdlib${ii}$_slartg( a( ihi-1, ihi-2 ), a( ihi, ihi-2 ), c1, s1,temp )
              a( ihi-1, ihi-2 ) = temp
              a( ihi, ihi-2 ) = zero
              call stdlib${ii}$_srot( istopm-ihi+2, a( ihi-1, ihi-1 ), lda, a( ihi,ihi-1 ), lda, c1, s1 &
                        )
              call stdlib${ii}$_srot( istopm-ihi+2, b( ihi-1, ihi-1 ), ldb, b( ihi,ihi-1 ), ldb, c1, s1 &
                        )
              if ( ilq ) then
                 call stdlib${ii}$_srot( nq, q( 1_${ik}$, ihi-1-qstart+1 ), 1_${ik}$, q( 1_${ik}$, ihi-qstart+1 ), 1_${ik}$, c1, s1 &
                           )
              end if
              call stdlib${ii}$_slartg( b( ihi, ihi ), b( ihi, ihi-1 ), c1, s1, temp )
              b( ihi, ihi ) = temp
              b( ihi, ihi-1 ) = zero
              call stdlib${ii}$_srot( ihi-istartm, b( istartm, ihi ), 1_${ik}$, b( istartm,ihi-1 ), 1_${ik}$, c1, s1 )
                        
              call stdlib${ii}$_srot( ihi-istartm+1, a( istartm, ihi ), 1_${ik}$, a( istartm,ihi-1 ), 1_${ik}$, c1, &
                        s1 )
              if ( ilz ) then
                 call stdlib${ii}$_srot( nz, z( 1_${ik}$, ihi-zstart+1 ), 1_${ik}$, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, c1, s1 &
                           )
              end if
           else
              ! normal operation, move bulge down
              h = b( k+1:k+2, k:k+2 )
              ! make h upper triangular
              call stdlib${ii}$_slartg( h( 1_${ik}$, 1_${ik}$ ), h( 2_${ik}$, 1_${ik}$ ), c1, s1, temp )
              h( 2_${ik}$, 1_${ik}$ ) = zero
              h( 1_${ik}$, 1_${ik}$ ) = temp
              call stdlib${ii}$_srot( 2_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 2_${ik}$, h( 2_${ik}$, 2_${ik}$ ), 2_${ik}$, c1, s1 )
              ! calculate z1 and z2
              call stdlib${ii}$_slartg( h( 2_${ik}$, 3_${ik}$ ), h( 2_${ik}$, 2_${ik}$ ), c1, s1, temp )
              call stdlib${ii}$_srot( 1_${ik}$, h( 1_${ik}$, 3_${ik}$ ), 1_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c1, s1 )
              call stdlib${ii}$_slartg( h( 1_${ik}$, 2_${ik}$ ), h( 1_${ik}$, 1_${ik}$ ), c2, s2, temp )
              ! apply transformations from the right
              call stdlib${ii}$_srot( k+3-istartm+1, a( istartm, k+2 ), 1_${ik}$, a( istartm,k+1 ), 1_${ik}$, c1, s1 )
                        
              call stdlib${ii}$_srot( k+3-istartm+1, a( istartm, k+1 ), 1_${ik}$, a( istartm,k ), 1_${ik}$, c2, s2 )
                        
              call stdlib${ii}$_srot( k+2-istartm+1, b( istartm, k+2 ), 1_${ik}$, b( istartm,k+1 ), 1_${ik}$, c1, s1 )
                        
              call stdlib${ii}$_srot( k+2-istartm+1, b( istartm, k+1 ), 1_${ik}$, b( istartm,k ), 1_${ik}$, c2, s2 )
                        
              if ( ilz ) then
                 call stdlib${ii}$_srot( nz, z( 1_${ik}$, k+2-zstart+1 ), 1_${ik}$, z( 1_${ik}$, k+1-zstart+1 ), 1_${ik}$, c1, s1 )
                           
                 call stdlib${ii}$_srot( nz, z( 1_${ik}$, k+1-zstart+1 ), 1_${ik}$, z( 1_${ik}$, k-zstart+1 ),1_${ik}$, c2, s2 )
                           
              end if
              b( k+1, k ) = zero
              b( k+2, k ) = zero
              ! calculate q1 and q2
              call stdlib${ii}$_slartg( a( k+2, k ), a( k+3, k ), c1, s1, temp )
              a( k+2, k ) = temp
              a( k+3, k ) = zero
              call stdlib${ii}$_slartg( a( k+1, k ), a( k+2, k ), c2, s2, temp )
              a( k+1, k ) = temp
              a( k+2, k ) = zero
           ! apply transformations from the left
              call stdlib${ii}$_srot( istopm-k, a( k+2, k+1 ), lda, a( k+3, k+1 ), lda,c1, s1 )
              call stdlib${ii}$_srot( istopm-k, a( k+1, k+1 ), lda, a( k+2, k+1 ), lda,c2, s2 )
              call stdlib${ii}$_srot( istopm-k, b( k+2, k+1 ), ldb, b( k+3, k+1 ), ldb,c1, s1 )
              call stdlib${ii}$_srot( istopm-k, b( k+1, k+1 ), ldb, b( k+2, k+1 ), ldb,c2, s2 )
              if ( ilq ) then
                 call stdlib${ii}$_srot( nq, q( 1_${ik}$, k+2-qstart+1 ), 1_${ik}$, q( 1_${ik}$, k+3-qstart+1 ), 1_${ik}$, c1, s1 )
                           
                 call stdlib${ii}$_srot( nq, q( 1_${ik}$, k+1-qstart+1 ), 1_${ik}$, q( 1_${ik}$, k+2-qstart+1 ), 1_${ik}$, c2, s2 )
                           
              end if
           end if
     end subroutine stdlib${ii}$_slaqz2

     pure module subroutine stdlib${ii}$_dlaqz2( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, qstart, &
     !! DLAQZ2 chases a 2x2 shift bulge in a matrix pencil down a single position
               q, ldq, nz, zstart, z, ldz )
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           logical(lk), intent( in ) :: ilq, ilz
           integer(${ik}$), intent( in ) :: k, lda, ldb, ldq, ldz, istartm, istopm,nq, nz, qstart, &
                     zstart, ihi
           real(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
           ! ================================================================
           ! local variables
           real(dp) :: h(2_${ik}$,3_${ik}$), c1, s1, c2, s2, temp
           if( k+2 == ihi ) then
              ! shift is located on the edge of the matrix, remove it
              h = b( ihi-1:ihi, ihi-2:ihi )
              ! make h upper triangular
              call stdlib${ii}$_dlartg( h( 1_${ik}$, 1_${ik}$ ), h( 2_${ik}$, 1_${ik}$ ), c1, s1, temp )
              h( 2_${ik}$, 1_${ik}$ ) = zero
              h( 1_${ik}$, 1_${ik}$ ) = temp
              call stdlib${ii}$_drot( 2_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 2_${ik}$, h( 2_${ik}$, 2_${ik}$ ), 2_${ik}$, c1, s1 )
              call stdlib${ii}$_dlartg( h( 2_${ik}$, 3_${ik}$ ), h( 2_${ik}$, 2_${ik}$ ), c1, s1, temp )
              call stdlib${ii}$_drot( 1_${ik}$, h( 1_${ik}$, 3_${ik}$ ), 1_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c1, s1 )
              call stdlib${ii}$_dlartg( h( 1_${ik}$, 2_${ik}$ ), h( 1_${ik}$, 1_${ik}$ ), c2, s2, temp )
              call stdlib${ii}$_drot( ihi-istartm+1, b( istartm, ihi ), 1_${ik}$, b( istartm,ihi-1 ), 1_${ik}$, c1, &
                        s1 )
              call stdlib${ii}$_drot( ihi-istartm+1, b( istartm, ihi-1 ), 1_${ik}$, b( istartm,ihi-2 ), 1_${ik}$, c2, &
                        s2 )
              b( ihi-1, ihi-2 ) = zero
              b( ihi, ihi-2 ) = zero
              call stdlib${ii}$_drot( ihi-istartm+1, a( istartm, ihi ), 1_${ik}$, a( istartm,ihi-1 ), 1_${ik}$, c1, &
                        s1 )
              call stdlib${ii}$_drot( ihi-istartm+1, a( istartm, ihi-1 ), 1_${ik}$, a( istartm,ihi-2 ), 1_${ik}$, c2, &
                        s2 )
              if ( ilz ) then
                 call stdlib${ii}$_drot( nz, z( 1_${ik}$, ihi-zstart+1 ), 1_${ik}$, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, c1, s1 &
                           )
                 call stdlib${ii}$_drot( nz, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, z( 1_${ik}$,ihi-2-zstart+1 ), 1_${ik}$, c2, &
                           s2 )
              end if
              call stdlib${ii}$_dlartg( a( ihi-1, ihi-2 ), a( ihi, ihi-2 ), c1, s1,temp )
              a( ihi-1, ihi-2 ) = temp
              a( ihi, ihi-2 ) = zero
              call stdlib${ii}$_drot( istopm-ihi+2, a( ihi-1, ihi-1 ), lda, a( ihi,ihi-1 ), lda, c1, s1 &
                        )
              call stdlib${ii}$_drot( istopm-ihi+2, b( ihi-1, ihi-1 ), ldb, b( ihi,ihi-1 ), ldb, c1, s1 &
                        )
              if ( ilq ) then
                 call stdlib${ii}$_drot( nq, q( 1_${ik}$, ihi-1-qstart+1 ), 1_${ik}$, q( 1_${ik}$, ihi-qstart+1 ), 1_${ik}$, c1, s1 &
                           )
              end if
              call stdlib${ii}$_dlartg( b( ihi, ihi ), b( ihi, ihi-1 ), c1, s1, temp )
              b( ihi, ihi ) = temp
              b( ihi, ihi-1 ) = zero
              call stdlib${ii}$_drot( ihi-istartm, b( istartm, ihi ), 1_${ik}$, b( istartm,ihi-1 ), 1_${ik}$, c1, s1 )
                        
              call stdlib${ii}$_drot( ihi-istartm+1, a( istartm, ihi ), 1_${ik}$, a( istartm,ihi-1 ), 1_${ik}$, c1, &
                        s1 )
              if ( ilz ) then
                 call stdlib${ii}$_drot( nz, z( 1_${ik}$, ihi-zstart+1 ), 1_${ik}$, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, c1, s1 &
                           )
              end if
           else
              ! normal operation, move bulge down
              h = b( k+1:k+2, k:k+2 )
              ! make h upper triangular
              call stdlib${ii}$_dlartg( h( 1_${ik}$, 1_${ik}$ ), h( 2_${ik}$, 1_${ik}$ ), c1, s1, temp )
              h( 2_${ik}$, 1_${ik}$ ) = zero
              h( 1_${ik}$, 1_${ik}$ ) = temp
              call stdlib${ii}$_drot( 2_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 2_${ik}$, h( 2_${ik}$, 2_${ik}$ ), 2_${ik}$, c1, s1 )
              ! calculate z1 and z2
              call stdlib${ii}$_dlartg( h( 2_${ik}$, 3_${ik}$ ), h( 2_${ik}$, 2_${ik}$ ), c1, s1, temp )
              call stdlib${ii}$_drot( 1_${ik}$, h( 1_${ik}$, 3_${ik}$ ), 1_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c1, s1 )
              call stdlib${ii}$_dlartg( h( 1_${ik}$, 2_${ik}$ ), h( 1_${ik}$, 1_${ik}$ ), c2, s2, temp )
              ! apply transformations from the right
              call stdlib${ii}$_drot( k+3-istartm+1, a( istartm, k+2 ), 1_${ik}$, a( istartm,k+1 ), 1_${ik}$, c1, s1 )
                        
              call stdlib${ii}$_drot( k+3-istartm+1, a( istartm, k+1 ), 1_${ik}$, a( istartm,k ), 1_${ik}$, c2, s2 )
                        
              call stdlib${ii}$_drot( k+2-istartm+1, b( istartm, k+2 ), 1_${ik}$, b( istartm,k+1 ), 1_${ik}$, c1, s1 )
                        
              call stdlib${ii}$_drot( k+2-istartm+1, b( istartm, k+1 ), 1_${ik}$, b( istartm,k ), 1_${ik}$, c2, s2 )
                        
              if ( ilz ) then
                 call stdlib${ii}$_drot( nz, z( 1_${ik}$, k+2-zstart+1 ), 1_${ik}$, z( 1_${ik}$, k+1-zstart+1 ), 1_${ik}$, c1, s1 )
                           
                 call stdlib${ii}$_drot( nz, z( 1_${ik}$, k+1-zstart+1 ), 1_${ik}$, z( 1_${ik}$, k-zstart+1 ),1_${ik}$, c2, s2 )
                           
              end if
              b( k+1, k ) = zero
              b( k+2, k ) = zero
              ! calculate q1 and q2
              call stdlib${ii}$_dlartg( a( k+2, k ), a( k+3, k ), c1, s1, temp )
              a( k+2, k ) = temp
              a( k+3, k ) = zero
              call stdlib${ii}$_dlartg( a( k+1, k ), a( k+2, k ), c2, s2, temp )
              a( k+1, k ) = temp
              a( k+2, k ) = zero
              ! apply transformations from the left
              call stdlib${ii}$_drot( istopm-k, a( k+2, k+1 ), lda, a( k+3, k+1 ), lda,c1, s1 )
              call stdlib${ii}$_drot( istopm-k, a( k+1, k+1 ), lda, a( k+2, k+1 ), lda,c2, s2 )
              call stdlib${ii}$_drot( istopm-k, b( k+2, k+1 ), ldb, b( k+3, k+1 ), ldb,c1, s1 )
              call stdlib${ii}$_drot( istopm-k, b( k+1, k+1 ), ldb, b( k+2, k+1 ), ldb,c2, s2 )
              if ( ilq ) then
                 call stdlib${ii}$_drot( nq, q( 1_${ik}$, k+2-qstart+1 ), 1_${ik}$, q( 1_${ik}$, k+3-qstart+1 ), 1_${ik}$, c1, s1 )
                           
                 call stdlib${ii}$_drot( nq, q( 1_${ik}$, k+1-qstart+1 ), 1_${ik}$, q( 1_${ik}$, k+2-qstart+1 ), 1_${ik}$, c2, s2 )
                           
              end if
           end if
     end subroutine stdlib${ii}$_dlaqz2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$laqz2( ilq, ilz, k, istartm, istopm, ihi, a, lda, b,ldb, nq, qstart, &
     !! DLAQZ2: chases a 2x2 shift bulge in a matrix pencil down a single position
               q, ldq, nz, zstart, z, ldz )
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           logical(lk), intent( in ) :: ilq, ilz
           integer(${ik}$), intent( in ) :: k, lda, ldb, ldq, ldz, istartm, istopm,nq, nz, qstart, &
                     zstart, ihi
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
           ! ================================================================
           ! local variables
           real(${rk}$) :: h(2_${ik}$,3_${ik}$), c1, s1, c2, s2, temp
           if( k+2 == ihi ) then
              ! shift is located on the edge of the matrix, remove it
              h = b( ihi-1:ihi, ihi-2:ihi )
              ! make h upper triangular
              call stdlib${ii}$_${ri}$lartg( h( 1_${ik}$, 1_${ik}$ ), h( 2_${ik}$, 1_${ik}$ ), c1, s1, temp )
              h( 2_${ik}$, 1_${ik}$ ) = zero
              h( 1_${ik}$, 1_${ik}$ ) = temp
              call stdlib${ii}$_${ri}$rot( 2_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 2_${ik}$, h( 2_${ik}$, 2_${ik}$ ), 2_${ik}$, c1, s1 )
              call stdlib${ii}$_${ri}$lartg( h( 2_${ik}$, 3_${ik}$ ), h( 2_${ik}$, 2_${ik}$ ), c1, s1, temp )
              call stdlib${ii}$_${ri}$rot( 1_${ik}$, h( 1_${ik}$, 3_${ik}$ ), 1_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c1, s1 )
              call stdlib${ii}$_${ri}$lartg( h( 1_${ik}$, 2_${ik}$ ), h( 1_${ik}$, 1_${ik}$ ), c2, s2, temp )
              call stdlib${ii}$_${ri}$rot( ihi-istartm+1, b( istartm, ihi ), 1_${ik}$, b( istartm,ihi-1 ), 1_${ik}$, c1, &
                        s1 )
              call stdlib${ii}$_${ri}$rot( ihi-istartm+1, b( istartm, ihi-1 ), 1_${ik}$, b( istartm,ihi-2 ), 1_${ik}$, c2, &
                        s2 )
              b( ihi-1, ihi-2 ) = zero
              b( ihi, ihi-2 ) = zero
              call stdlib${ii}$_${ri}$rot( ihi-istartm+1, a( istartm, ihi ), 1_${ik}$, a( istartm,ihi-1 ), 1_${ik}$, c1, &
                        s1 )
              call stdlib${ii}$_${ri}$rot( ihi-istartm+1, a( istartm, ihi-1 ), 1_${ik}$, a( istartm,ihi-2 ), 1_${ik}$, c2, &
                        s2 )
              if ( ilz ) then
                 call stdlib${ii}$_${ri}$rot( nz, z( 1_${ik}$, ihi-zstart+1 ), 1_${ik}$, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, c1, s1 &
                           )
                 call stdlib${ii}$_${ri}$rot( nz, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, z( 1_${ik}$,ihi-2-zstart+1 ), 1_${ik}$, c2, &
                           s2 )
              end if
              call stdlib${ii}$_${ri}$lartg( a( ihi-1, ihi-2 ), a( ihi, ihi-2 ), c1, s1,temp )
              a( ihi-1, ihi-2 ) = temp
              a( ihi, ihi-2 ) = zero
              call stdlib${ii}$_${ri}$rot( istopm-ihi+2, a( ihi-1, ihi-1 ), lda, a( ihi,ihi-1 ), lda, c1, s1 &
                        )
              call stdlib${ii}$_${ri}$rot( istopm-ihi+2, b( ihi-1, ihi-1 ), ldb, b( ihi,ihi-1 ), ldb, c1, s1 &
                        )
              if ( ilq ) then
                 call stdlib${ii}$_${ri}$rot( nq, q( 1_${ik}$, ihi-1-qstart+1 ), 1_${ik}$, q( 1_${ik}$, ihi-qstart+1 ), 1_${ik}$, c1, s1 &
                           )
              end if
              call stdlib${ii}$_${ri}$lartg( b( ihi, ihi ), b( ihi, ihi-1 ), c1, s1, temp )
              b( ihi, ihi ) = temp
              b( ihi, ihi-1 ) = zero
              call stdlib${ii}$_${ri}$rot( ihi-istartm, b( istartm, ihi ), 1_${ik}$, b( istartm,ihi-1 ), 1_${ik}$, c1, s1 )
                        
              call stdlib${ii}$_${ri}$rot( ihi-istartm+1, a( istartm, ihi ), 1_${ik}$, a( istartm,ihi-1 ), 1_${ik}$, c1, &
                        s1 )
              if ( ilz ) then
                 call stdlib${ii}$_${ri}$rot( nz, z( 1_${ik}$, ihi-zstart+1 ), 1_${ik}$, z( 1_${ik}$, ihi-1-zstart+1 ), 1_${ik}$, c1, s1 &
                           )
              end if
           else
              ! normal operation, move bulge down
              h = b( k+1:k+2, k:k+2 )
              ! make h upper triangular
              call stdlib${ii}$_${ri}$lartg( h( 1_${ik}$, 1_${ik}$ ), h( 2_${ik}$, 1_${ik}$ ), c1, s1, temp )
              h( 2_${ik}$, 1_${ik}$ ) = zero
              h( 1_${ik}$, 1_${ik}$ ) = temp
              call stdlib${ii}$_${ri}$rot( 2_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 2_${ik}$, h( 2_${ik}$, 2_${ik}$ ), 2_${ik}$, c1, s1 )
              ! calculate z1 and z2
              call stdlib${ii}$_${ri}$lartg( h( 2_${ik}$, 3_${ik}$ ), h( 2_${ik}$, 2_${ik}$ ), c1, s1, temp )
              call stdlib${ii}$_${ri}$rot( 1_${ik}$, h( 1_${ik}$, 3_${ik}$ ), 1_${ik}$, h( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c1, s1 )
              call stdlib${ii}$_${ri}$lartg( h( 1_${ik}$, 2_${ik}$ ), h( 1_${ik}$, 1_${ik}$ ), c2, s2, temp )
              ! apply transformations from the right
              call stdlib${ii}$_${ri}$rot( k+3-istartm+1, a( istartm, k+2 ), 1_${ik}$, a( istartm,k+1 ), 1_${ik}$, c1, s1 )
                        
              call stdlib${ii}$_${ri}$rot( k+3-istartm+1, a( istartm, k+1 ), 1_${ik}$, a( istartm,k ), 1_${ik}$, c2, s2 )
                        
              call stdlib${ii}$_${ri}$rot( k+2-istartm+1, b( istartm, k+2 ), 1_${ik}$, b( istartm,k+1 ), 1_${ik}$, c1, s1 )
                        
              call stdlib${ii}$_${ri}$rot( k+2-istartm+1, b( istartm, k+1 ), 1_${ik}$, b( istartm,k ), 1_${ik}$, c2, s2 )
                        
              if ( ilz ) then
                 call stdlib${ii}$_${ri}$rot( nz, z( 1_${ik}$, k+2-zstart+1 ), 1_${ik}$, z( 1_${ik}$, k+1-zstart+1 ), 1_${ik}$, c1, s1 )
                           
                 call stdlib${ii}$_${ri}$rot( nz, z( 1_${ik}$, k+1-zstart+1 ), 1_${ik}$, z( 1_${ik}$, k-zstart+1 ),1_${ik}$, c2, s2 )
                           
              end if
              b( k+1, k ) = zero
              b( k+2, k ) = zero
              ! calculate q1 and q2
              call stdlib${ii}$_${ri}$lartg( a( k+2, k ), a( k+3, k ), c1, s1, temp )
              a( k+2, k ) = temp
              a( k+3, k ) = zero
              call stdlib${ii}$_${ri}$lartg( a( k+1, k ), a( k+2, k ), c2, s2, temp )
              a( k+1, k ) = temp
              a( k+2, k ) = zero
              ! apply transformations from the left
              call stdlib${ii}$_${ri}$rot( istopm-k, a( k+2, k+1 ), lda, a( k+3, k+1 ), lda,c1, s1 )
              call stdlib${ii}$_${ri}$rot( istopm-k, a( k+1, k+1 ), lda, a( k+2, k+1 ), lda,c2, s2 )
              call stdlib${ii}$_${ri}$rot( istopm-k, b( k+2, k+1 ), ldb, b( k+3, k+1 ), ldb,c1, s1 )
              call stdlib${ii}$_${ri}$rot( istopm-k, b( k+1, k+1 ), ldb, b( k+2, k+1 ), ldb,c2, s2 )
              if ( ilq ) then
                 call stdlib${ii}$_${ri}$rot( nq, q( 1_${ik}$, k+2-qstart+1 ), 1_${ik}$, q( 1_${ik}$, k+3-qstart+1 ), 1_${ik}$, c1, s1 )
                           
                 call stdlib${ii}$_${ri}$rot( nq, q( 1_${ik}$, k+1-qstart+1 ), 1_${ik}$, q( 1_${ik}$, k+2-qstart+1 ), 1_${ik}$, c2, s2 )
                           
              end if
           end if
     end subroutine stdlib${ii}$_${ri}$laqz2

#:endif
#:endfor

     recursive module subroutine stdlib${ii}$_claqz2( ilschur, ilq, ilz, n, ilo, ihi, nw,a, lda, b, ldb, q, &
     !! CLAQZ2 performs AED
               ldq, z, ldz, ns,nd, alpha, beta, qc, ldqc, zc, ldzc,work, lwork, rwork, rec, info )
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           logical(lk), intent( in ) :: ilschur, ilq, ilz
           integer(${ik}$), intent( in ) :: n, ilo, ihi, nw, lda, ldb, ldq, ldz,ldqc, ldzc, lwork, &
                     rec
           complex(sp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq, * ),z( ldz, * ), &
                     alpha( * ), beta( * )
           integer(${ik}$), intent( out ) :: ns, nd, info
           complex(sp), intent(inout) :: qc(ldqc,*), zc(ldzc,*)
           complex(sp), intent(out) :: work(*)
           real(sp), intent(out) :: rwork(*)
           
           ! ================================================================
           ! local scalars
           integer(${ik}$) :: jw, kwtop, kwbot, istopm, istartm, k, k2, ctgexc_info, ifst, ilst, &
                     lworkreq, qz_small_info
           real(sp) :: smlnum, ulp, safmin, safmax, c1, tempr
           complex(sp) :: s, s1, temp
           info = 0_${ik}$
           ! set up deflation window
           jw = min( nw, ihi-ilo+1 )
           kwtop = ihi-jw+1
           if ( kwtop == ilo ) then
              s = czero
           else
              s = a( kwtop, kwtop-1 )
           end if
           ! determine required workspace
           ifst = 1_${ik}$
           ilst = jw
           call stdlib${ii}$_claqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),&
                      ldb, alpha, beta, qc, ldqc, zc,ldzc, work, -1_${ik}$, rwork, rec+1, qz_small_info )
           lworkreq = int( work( 1_${ik}$ ),KIND=${ik}$)+2_${ik}$*jw**2_${ik}$
           lworkreq = max( lworkreq, n*nw, 2_${ik}$*nw**2_${ik}$+n )
           if ( lwork ==-1_${ik}$ ) then
              ! workspace query, quick return
              work( 1_${ik}$ ) = lworkreq
              return
           else if ( lwork < lworkreq ) then
              info = -26_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CLAQZ2', -info )
              return
           end if
           ! get machine constants
           safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' )
           safmax = one/safmin
           call stdlib${ii}$_slabad( safmin, safmax )
           ulp = stdlib${ii}$_slamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=sp)/ulp )
           if ( ihi == kwtop ) then
              ! 1 by 1 deflation window, just try a regular deflation
              alpha( kwtop ) = a( kwtop, kwtop )
              beta( kwtop ) = b( kwtop, kwtop )
              ns = 1_${ik}$
              nd = 0_${ik}$
              if ( abs( s ) <= max( smlnum, ulp*abs( a( kwtop,kwtop ) ) ) ) then
                 ns = 0_${ik}$
                 nd = 1_${ik}$
                 if ( kwtop > ilo ) then
                    a( kwtop, kwtop-1 ) = czero
                 end if
              end if
           end if
           ! store window in case of convergence failure
           call stdlib${ii}$_clacpy( 'ALL', jw, jw, a( kwtop, kwtop ), lda, work, jw )
           call stdlib${ii}$_clacpy( 'ALL', jw, jw, b( kwtop, kwtop ), ldb, work( jw**2_${ik}$+1 ), jw )
                     
           ! transform window to real schur form
           call stdlib${ii}$_claset( 'FULL', jw, jw, czero, cone, qc, ldqc )
           call stdlib${ii}$_claset( 'FULL', jw, jw, czero, cone, zc, ldzc )
           call stdlib${ii}$_claqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),&
            ldb, alpha, beta, qc, ldqc, zc,ldzc, work( 2_${ik}$*jw**2_${ik}$+1 ), lwork-2*jw**2_${ik}$, rwork,rec+1, &
                      qz_small_info )
           if( qz_small_info /= 0_${ik}$ ) then
              ! convergence failure, restore the window and exit
              nd = 0_${ik}$
              ns = jw-qz_small_info
              call stdlib${ii}$_clacpy( 'ALL', jw, jw, work, jw, a( kwtop, kwtop ), lda )
              call stdlib${ii}$_clacpy( 'ALL', jw, jw, work( jw**2_${ik}$+1 ), jw, b( kwtop,kwtop ), ldb )
                        
              return
           end if
           ! deflation detection loop
           if ( kwtop == ilo .or. s == czero ) then
              kwbot = kwtop-1
           else
              kwbot = ihi
              k = 1_${ik}$
              k2 = 1_${ik}$
              do while ( k <= jw )
                    ! try to deflate eigenvalue
                    tempr = abs( a( kwbot, kwbot ) )
                    if( tempr == zero ) then
                       tempr = abs( s )
                    end if
                    if ( ( abs( s*qc( 1_${ik}$, kwbot-kwtop+1 ) ) ) <= max( ulp*tempr, smlnum ) ) &
                              then
                       ! deflatable
                       kwbot = kwbot-1
                    else
                       ! not deflatable, move out of the way
                       ifst = kwbot-kwtop+1
                       ilst = k2
                       call stdlib${ii}$_ctgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, &
                                 kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, ctgexc_info )
                       k2 = k2+1
                    end if
                    k = k+1
              end do
           end if
           ! store eigenvalues
           nd = ihi-kwbot
           ns = jw-nd
           k = kwtop
           do while ( k <= ihi )
              alpha( k ) = a( k, k )
              beta( k ) = b( k, k )
              k = k+1
           end do
           if ( kwtop /= ilo .and. s /= czero ) then
              ! reflect spike back, this will create optimally packed bulges
              a( kwtop:kwbot, kwtop-1 ) = a( kwtop, kwtop-1 ) *conjg( qc( 1_${ik}$,1_${ik}$:jw-nd ) )
              do k = kwbot-1, kwtop, -1
                 call stdlib${ii}$_clartg( a( k, kwtop-1 ), a( k+1, kwtop-1 ), c1, s1,temp )
                 a( k, kwtop-1 ) = temp
                 a( k+1, kwtop-1 ) = czero
                 k2 = max( kwtop, k-1 )
                 call stdlib${ii}$_crot( ihi-k2+1, a( k, k2 ), lda, a( k+1, k2 ), lda, c1,s1 )
                 call stdlib${ii}$_crot( ihi-( k-1 )+1_${ik}$, b( k, k-1 ), ldb, b( k+1, k-1 ),ldb, c1, s1 )
                           
                 call stdlib${ii}$_crot( jw, qc( 1_${ik}$, k-kwtop+1 ), 1_${ik}$, qc( 1_${ik}$, k+1-kwtop+1 ),1_${ik}$, c1, conjg( &
                           s1 ) )
              end do
              ! chase bulges down
              istartm = kwtop
              istopm = ihi
              k = kwbot-1
              do while ( k >= kwtop )
                 ! move bulge down and remove it
                 do k2 = k, kwbot-1
                    call stdlib${ii}$_claqz1( .true., .true., k2, kwtop, kwtop+jw-1,kwbot, a, lda, b, &
                              ldb, jw, kwtop, qc, ldqc,jw, kwtop, zc, ldzc )
                 end do
                 k = k-1
              end do
           end if
           ! apply qc and zc to rest of the matrix
           if ( ilschur ) then
              istartm = 1_${ik}$
              istopm = n
           else
              istartm = ilo
              istopm = ihi
           end if
           if ( istopm-ihi > 0_${ik}$ ) then
              call stdlib${ii}$_cgemm( 'C', 'N', jw, istopm-ihi, jw, cone, qc, ldqc,a( kwtop, ihi+1 ), &
                        lda, czero, work, jw )
              call stdlib${ii}$_clacpy( 'ALL', jw, istopm-ihi, work, jw, a( kwtop,ihi+1 ), lda )
              call stdlib${ii}$_cgemm( 'C', 'N', jw, istopm-ihi, jw, cone, qc, ldqc,b( kwtop, ihi+1 ), &
                        ldb, czero, work, jw )
              call stdlib${ii}$_clacpy( 'ALL', jw, istopm-ihi, work, jw, b( kwtop,ihi+1 ), ldb )
           end if
           if ( ilq ) then
              call stdlib${ii}$_cgemm( 'N', 'N', n, jw, jw, cone, q( 1_${ik}$, kwtop ), ldq, qc,ldqc, czero, &
                        work, n )
              call stdlib${ii}$_clacpy( 'ALL', n, jw, work, n, q( 1_${ik}$, kwtop ), ldq )
           end if
           if ( kwtop-1-istartm+1 > 0_${ik}$ ) then
              call stdlib${ii}$_cgemm( 'N', 'N', kwtop-istartm, jw, jw, cone, a( istartm,kwtop ), lda, &
                        zc, ldzc, czero, work,kwtop-istartm )
             call stdlib${ii}$_clacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,a( istartm, kwtop )&
                       , lda )
              call stdlib${ii}$_cgemm( 'N', 'N', kwtop-istartm, jw, jw, cone, b( istartm,kwtop ), ldb, &
                        zc, ldzc, czero, work,kwtop-istartm )
             call stdlib${ii}$_clacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,b( istartm, kwtop )&
                       , ldb )
           end if
           if ( ilz ) then
              call stdlib${ii}$_cgemm( 'N', 'N', n, jw, jw, cone, z( 1_${ik}$, kwtop ), ldz, zc,ldzc, czero, &
                        work, n )
              call stdlib${ii}$_clacpy( 'ALL', n, jw, work, n, z( 1_${ik}$, kwtop ), ldz )
           end if
     end subroutine stdlib${ii}$_claqz2

     recursive module subroutine stdlib${ii}$_zlaqz2( ilschur, ilq, ilz, n, ilo, ihi, nw,a, lda, b, ldb, q, &
     !! ZLAQZ2 performs AED
               ldq, z, ldz, ns,nd, alpha, beta, qc, ldqc, zc, ldzc,work, lwork, rwork, rec, info )
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           logical(lk), intent( in ) :: ilschur, ilq, ilz
           integer(${ik}$), intent( in ) :: n, ilo, ihi, nw, lda, ldb, ldq, ldz,ldqc, ldzc, lwork, &
                     rec
           complex(dp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq,* ), z( ldz, * ), &
                     alpha( * ), beta( * )
           integer(${ik}$), intent( out ) :: ns, nd, info
           complex(dp), intent(inout) :: qc(ldqc,*), zc(ldzc,*)
           complex(dp), intent(out) :: work(*)
           real(dp), intent(out) :: rwork(*)
           
           ! ================================================================
           ! local scalars
           integer(${ik}$) :: jw, kwtop, kwbot, istopm, istartm, k, k2, ztgexc_info, ifst, ilst, &
                     lworkreq, qz_small_info
           real(dp) ::smlnum, ulp, safmin, safmax, c1, tempr
           complex(dp) :: s, s1, temp
           info = 0_${ik}$
           ! set up deflation window
           jw = min( nw, ihi-ilo+1 )
           kwtop = ihi-jw+1
           if ( kwtop == ilo ) then
              s = czero
           else
              s = a( kwtop, kwtop-1 )
           end if
           ! determine required workspace
           ifst = 1_${ik}$
           ilst = jw
           call stdlib${ii}$_zlaqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),&
                      ldb, alpha, beta, qc, ldqc, zc,ldzc, work, -1_${ik}$, rwork, rec+1, qz_small_info )
           lworkreq = int( work( 1_${ik}$ ),KIND=${ik}$)+2_${ik}$*jw**2_${ik}$
           lworkreq = max( lworkreq, n*nw, 2_${ik}$*nw**2_${ik}$+n )
           if ( lwork ==-1_${ik}$ ) then
              ! workspace query, quick return
              work( 1_${ik}$ ) = lworkreq
              return
           else if ( lwork < lworkreq ) then
              info = -26_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZLAQZ2', -info )
              return
           end if
           ! get machine constants
           safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' )
           safmax = one/safmin
           call stdlib${ii}$_dlabad( safmin, safmax )
           ulp = stdlib${ii}$_dlamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=dp)/ulp )
           if ( ihi == kwtop ) then
              ! 1 by 1 deflation window, just try a regular deflation
              alpha( kwtop ) = a( kwtop, kwtop )
              beta( kwtop ) = b( kwtop, kwtop )
              ns = 1_${ik}$
              nd = 0_${ik}$
              if ( abs( s ) <= max( smlnum, ulp*abs( a( kwtop,kwtop ) ) ) ) then
                 ns = 0_${ik}$
                 nd = 1_${ik}$
                 if ( kwtop > ilo ) then
                    a( kwtop, kwtop-1 ) = czero
                 end if
              end if
           end if
           ! store window in case of convergence failure
           call stdlib${ii}$_zlacpy( 'ALL', jw, jw, a( kwtop, kwtop ), lda, work, jw )
           call stdlib${ii}$_zlacpy( 'ALL', jw, jw, b( kwtop, kwtop ), ldb, work( jw**2_${ik}$+1 ), jw )
                     
           ! transform window to real schur form
           call stdlib${ii}$_zlaset( 'FULL', jw, jw, czero, cone, qc, ldqc )
           call stdlib${ii}$_zlaset( 'FULL', jw, jw, czero, cone, zc, ldzc )
           call stdlib${ii}$_zlaqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),&
            ldb, alpha, beta, qc, ldqc, zc,ldzc, work( 2_${ik}$*jw**2_${ik}$+1 ), lwork-2*jw**2_${ik}$, rwork,rec+1, &
                      qz_small_info )
           if( qz_small_info /= 0_${ik}$ ) then
              ! convergence failure, restore the window and exit
              nd = 0_${ik}$
              ns = jw-qz_small_info
              call stdlib${ii}$_zlacpy( 'ALL', jw, jw, work, jw, a( kwtop, kwtop ), lda )
              call stdlib${ii}$_zlacpy( 'ALL', jw, jw, work( jw**2_${ik}$+1 ), jw, b( kwtop,kwtop ), ldb )
                        
              return
           end if
           ! deflation detection loop
           if ( kwtop == ilo .or. s == czero ) then
              kwbot = kwtop-1
           else
              kwbot = ihi
              k = 1_${ik}$
              k2 = 1_${ik}$
              do while ( k <= jw )
                    ! try to deflate eigenvalue
                    tempr = abs( a( kwbot, kwbot ) )
                    if( tempr == zero ) then
                       tempr = abs( s )
                    end if
                    if ( ( abs( s*qc( 1_${ik}$, kwbot-kwtop+1 ) ) ) <= max( ulp*tempr, smlnum ) ) &
                              then
                       ! deflatable
                       kwbot = kwbot-1
                    else
                       ! not deflatable, move out of the way
                       ifst = kwbot-kwtop+1
                       ilst = k2
                       call stdlib${ii}$_ztgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, &
                                 kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, ztgexc_info )
                       k2 = k2+1
                    end if
                    k = k+1
              end do
           end if
           ! store eigenvalues
           nd = ihi-kwbot
           ns = jw-nd
           k = kwtop
           do while ( k <= ihi )
              alpha( k ) = a( k, k )
              beta( k ) = b( k, k )
              k = k+1
           end do
           if ( kwtop /= ilo .and. s /= czero ) then
              ! reflect spike back, this will create optimally packed bulges
              a( kwtop:kwbot, kwtop-1 ) = a( kwtop, kwtop-1 ) *conjg( qc( 1_${ik}$,1_${ik}$:jw-nd ) )
              do k = kwbot-1, kwtop, -1
                 call stdlib${ii}$_zlartg( a( k, kwtop-1 ), a( k+1, kwtop-1 ), c1, s1,temp )
                 a( k, kwtop-1 ) = temp
                 a( k+1, kwtop-1 ) = czero
                 k2 = max( kwtop, k-1 )
                 call stdlib${ii}$_zrot( ihi-k2+1, a( k, k2 ), lda, a( k+1, k2 ), lda, c1,s1 )
                 call stdlib${ii}$_zrot( ihi-( k-1 )+1_${ik}$, b( k, k-1 ), ldb, b( k+1, k-1 ),ldb, c1, s1 )
                           
                 call stdlib${ii}$_zrot( jw, qc( 1_${ik}$, k-kwtop+1 ), 1_${ik}$, qc( 1_${ik}$, k+1-kwtop+1 ),1_${ik}$, c1, conjg( &
                           s1 ) )
              end do
              ! chase bulges down
              istartm = kwtop
              istopm = ihi
              k = kwbot-1
              do while ( k >= kwtop )
                 ! move bulge down and remove it
                 do k2 = k, kwbot-1
                    call stdlib${ii}$_zlaqz1( .true., .true., k2, kwtop, kwtop+jw-1,kwbot, a, lda, b, &
                              ldb, jw, kwtop, qc, ldqc,jw, kwtop, zc, ldzc )
                 end do
                 k = k-1
              end do
           end if
           ! apply qc and zc to rest of the matrix
           if ( ilschur ) then
              istartm = 1_${ik}$
              istopm = n
           else
              istartm = ilo
              istopm = ihi
           end if
           if ( istopm-ihi > 0_${ik}$ ) then
              call stdlib${ii}$_zgemm( 'C', 'N', jw, istopm-ihi, jw, cone, qc, ldqc,a( kwtop, ihi+1 ), &
                        lda, czero, work, jw )
              call stdlib${ii}$_zlacpy( 'ALL', jw, istopm-ihi, work, jw, a( kwtop,ihi+1 ), lda )
              call stdlib${ii}$_zgemm( 'C', 'N', jw, istopm-ihi, jw, cone, qc, ldqc,b( kwtop, ihi+1 ), &
                        ldb, czero, work, jw )
              call stdlib${ii}$_zlacpy( 'ALL', jw, istopm-ihi, work, jw, b( kwtop,ihi+1 ), ldb )
           end if
           if ( ilq ) then
              call stdlib${ii}$_zgemm( 'N', 'N', n, jw, jw, cone, q( 1_${ik}$, kwtop ), ldq, qc,ldqc, czero, &
                        work, n )
              call stdlib${ii}$_zlacpy( 'ALL', n, jw, work, n, q( 1_${ik}$, kwtop ), ldq )
           end if
           if ( kwtop-1-istartm+1 > 0_${ik}$ ) then
              call stdlib${ii}$_zgemm( 'N', 'N', kwtop-istartm, jw, jw, cone, a( istartm,kwtop ), lda, &
                        zc, ldzc, czero, work,kwtop-istartm )
             call stdlib${ii}$_zlacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,a( istartm, kwtop )&
                       , lda )
              call stdlib${ii}$_zgemm( 'N', 'N', kwtop-istartm, jw, jw, cone, b( istartm,kwtop ), ldb, &
                        zc, ldzc, czero, work,kwtop-istartm )
             call stdlib${ii}$_zlacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,b( istartm, kwtop )&
                       , ldb )
           end if
           if ( ilz ) then
              call stdlib${ii}$_zgemm( 'N', 'N', n, jw, jw, cone, z( 1_${ik}$, kwtop ), ldz, zc,ldzc, czero, &
                        work, n )
              call stdlib${ii}$_zlacpy( 'ALL', n, jw, work, n, z( 1_${ik}$, kwtop ), ldz )
           end if
     end subroutine stdlib${ii}$_zlaqz2

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     recursive module subroutine stdlib${ii}$_${ci}$laqz2( ilschur, ilq, ilz, n, ilo, ihi, nw,a, lda, b, ldb, q, &
     !! ZLAQZ2: performs AED
               ldq, z, ldz, ns,nd, alpha, beta, qc, ldqc, zc, ldzc,work, lwork, rwork, rec, info )
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           logical(lk), intent( in ) :: ilschur, ilq, ilz
           integer(${ik}$), intent( in ) :: n, ilo, ihi, nw, lda, ldb, ldq, ldz,ldqc, ldzc, lwork, &
                     rec
           complex(${ck}$), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq,* ), z( ldz, * ), &
                     alpha( * ), beta( * )
           integer(${ik}$), intent( out ) :: ns, nd, info
           complex(${ck}$), intent(inout) :: qc(ldqc,*), zc(ldzc,*)
           complex(${ck}$), intent(out) :: work(*)
           real(${ck}$), intent(out) :: rwork(*)
           
           ! ================================================================
           ! local scalars
           integer(${ik}$) :: jw, kwtop, kwbot, istopm, istartm, k, k2, ztgexc_info, ifst, ilst, &
                     lworkreq, qz_small_info
           real(${ck}$) ::smlnum, ulp, safmin, safmax, c1, tempr
           complex(${ck}$) :: s, s1, temp
           info = 0_${ik}$
           ! set up deflation window
           jw = min( nw, ihi-ilo+1 )
           kwtop = ihi-jw+1
           if ( kwtop == ilo ) then
              s = czero
           else
              s = a( kwtop, kwtop-1 )
           end if
           ! determine required workspace
           ifst = 1_${ik}$
           ilst = jw
           call stdlib${ii}$_${ci}$laqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),&
                      ldb, alpha, beta, qc, ldqc, zc,ldzc, work, -1_${ik}$, rwork, rec+1, qz_small_info )
           lworkreq = int( work( 1_${ik}$ ),KIND=${ik}$)+2_${ik}$*jw**2_${ik}$
           lworkreq = max( lworkreq, n*nw, 2_${ik}$*nw**2_${ik}$+n )
           if ( lwork ==-1_${ik}$ ) then
              ! workspace query, quick return
              work( 1_${ik}$ ) = lworkreq
              return
           else if ( lwork < lworkreq ) then
              info = -26_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZLAQZ2', -info )
              return
           end if
           ! get machine constants
           safmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'SAFE MINIMUM' )
           safmax = one/safmin
           call stdlib${ii}$_${c2ri(ci)}$labad( safmin, safmax )
           ulp = stdlib${ii}$_${c2ri(ci)}$lamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=${ck}$)/ulp )
           if ( ihi == kwtop ) then
              ! 1 by 1 deflation window, just try a regular deflation
              alpha( kwtop ) = a( kwtop, kwtop )
              beta( kwtop ) = b( kwtop, kwtop )
              ns = 1_${ik}$
              nd = 0_${ik}$
              if ( abs( s ) <= max( smlnum, ulp*abs( a( kwtop,kwtop ) ) ) ) then
                 ns = 0_${ik}$
                 nd = 1_${ik}$
                 if ( kwtop > ilo ) then
                    a( kwtop, kwtop-1 ) = czero
                 end if
              end if
           end if
           ! store window in case of convergence failure
           call stdlib${ii}$_${ci}$lacpy( 'ALL', jw, jw, a( kwtop, kwtop ), lda, work, jw )
           call stdlib${ii}$_${ci}$lacpy( 'ALL', jw, jw, b( kwtop, kwtop ), ldb, work( jw**2_${ik}$+1 ), jw )
                     
           ! transform window to real schur form
           call stdlib${ii}$_${ci}$laset( 'FULL', jw, jw, czero, cone, qc, ldqc )
           call stdlib${ii}$_${ci}$laset( 'FULL', jw, jw, czero, cone, zc, ldzc )
           call stdlib${ii}$_${ci}$laqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),&
            ldb, alpha, beta, qc, ldqc, zc,ldzc, work( 2_${ik}$*jw**2_${ik}$+1 ), lwork-2*jw**2_${ik}$, rwork,rec+1, &
                      qz_small_info )
           if( qz_small_info /= 0_${ik}$ ) then
              ! convergence failure, restore the window and exit
              nd = 0_${ik}$
              ns = jw-qz_small_info
              call stdlib${ii}$_${ci}$lacpy( 'ALL', jw, jw, work, jw, a( kwtop, kwtop ), lda )
              call stdlib${ii}$_${ci}$lacpy( 'ALL', jw, jw, work( jw**2_${ik}$+1 ), jw, b( kwtop,kwtop ), ldb )
                        
              return
           end if
           ! deflation detection loop
           if ( kwtop == ilo .or. s == czero ) then
              kwbot = kwtop-1
           else
              kwbot = ihi
              k = 1_${ik}$
              k2 = 1_${ik}$
              do while ( k <= jw )
                    ! try to deflate eigenvalue
                    tempr = abs( a( kwbot, kwbot ) )
                    if( tempr == zero ) then
                       tempr = abs( s )
                    end if
                    if ( ( abs( s*qc( 1_${ik}$, kwbot-kwtop+1 ) ) ) <= max( ulp*tempr, smlnum ) ) &
                              then
                       ! deflatable
                       kwbot = kwbot-1
                    else
                       ! not deflatable, move out of the way
                       ifst = kwbot-kwtop+1
                       ilst = k2
                       call stdlib${ii}$_${ci}$tgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, &
                                 kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, ztgexc_info )
                       k2 = k2+1
                    end if
                    k = k+1
              end do
           end if
           ! store eigenvalues
           nd = ihi-kwbot
           ns = jw-nd
           k = kwtop
           do while ( k <= ihi )
              alpha( k ) = a( k, k )
              beta( k ) = b( k, k )
              k = k+1
           end do
           if ( kwtop /= ilo .and. s /= czero ) then
              ! reflect spike back, this will create optimally packed bulges
              a( kwtop:kwbot, kwtop-1 ) = a( kwtop, kwtop-1 ) *conjg( qc( 1_${ik}$,1_${ik}$:jw-nd ) )
              do k = kwbot-1, kwtop, -1
                 call stdlib${ii}$_${ci}$lartg( a( k, kwtop-1 ), a( k+1, kwtop-1 ), c1, s1,temp )
                 a( k, kwtop-1 ) = temp
                 a( k+1, kwtop-1 ) = czero
                 k2 = max( kwtop, k-1 )
                 call stdlib${ii}$_${ci}$rot( ihi-k2+1, a( k, k2 ), lda, a( k+1, k2 ), lda, c1,s1 )
                 call stdlib${ii}$_${ci}$rot( ihi-( k-1 )+1_${ik}$, b( k, k-1 ), ldb, b( k+1, k-1 ),ldb, c1, s1 )
                           
                 call stdlib${ii}$_${ci}$rot( jw, qc( 1_${ik}$, k-kwtop+1 ), 1_${ik}$, qc( 1_${ik}$, k+1-kwtop+1 ),1_${ik}$, c1, conjg( &
                           s1 ) )
              end do
              ! chase bulges down
              istartm = kwtop
              istopm = ihi
              k = kwbot-1
              do while ( k >= kwtop )
                 ! move bulge down and remove it
                 do k2 = k, kwbot-1
                    call stdlib${ii}$_${ci}$laqz1( .true., .true., k2, kwtop, kwtop+jw-1,kwbot, a, lda, b, &
                              ldb, jw, kwtop, qc, ldqc,jw, kwtop, zc, ldzc )
                 end do
                 k = k-1
              end do
           end if
           ! apply qc and zc to rest of the matrix
           if ( ilschur ) then
              istartm = 1_${ik}$
              istopm = n
           else
              istartm = ilo
              istopm = ihi
           end if
           if ( istopm-ihi > 0_${ik}$ ) then
              call stdlib${ii}$_${ci}$gemm( 'C', 'N', jw, istopm-ihi, jw, cone, qc, ldqc,a( kwtop, ihi+1 ), &
                        lda, czero, work, jw )
              call stdlib${ii}$_${ci}$lacpy( 'ALL', jw, istopm-ihi, work, jw, a( kwtop,ihi+1 ), lda )
              call stdlib${ii}$_${ci}$gemm( 'C', 'N', jw, istopm-ihi, jw, cone, qc, ldqc,b( kwtop, ihi+1 ), &
                        ldb, czero, work, jw )
              call stdlib${ii}$_${ci}$lacpy( 'ALL', jw, istopm-ihi, work, jw, b( kwtop,ihi+1 ), ldb )
           end if
           if ( ilq ) then
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', n, jw, jw, cone, q( 1_${ik}$, kwtop ), ldq, qc,ldqc, czero, &
                        work, n )
              call stdlib${ii}$_${ci}$lacpy( 'ALL', n, jw, work, n, q( 1_${ik}$, kwtop ), ldq )
           end if
           if ( kwtop-1-istartm+1 > 0_${ik}$ ) then
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', kwtop-istartm, jw, jw, cone, a( istartm,kwtop ), lda, &
                        zc, ldzc, czero, work,kwtop-istartm )
             call stdlib${ii}$_${ci}$lacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,a( istartm, kwtop )&
                       , lda )
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', kwtop-istartm, jw, jw, cone, b( istartm,kwtop ), ldb, &
                        zc, ldzc, czero, work,kwtop-istartm )
             call stdlib${ii}$_${ci}$lacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,b( istartm, kwtop )&
                       , ldb )
           end if
           if ( ilz ) then
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', n, jw, jw, cone, z( 1_${ik}$, kwtop ), ldz, zc,ldzc, czero, &
                        work, n )
              call stdlib${ii}$_${ci}$lacpy( 'ALL', n, jw, work, n, z( 1_${ik}$, kwtop ), ldz )
           end if
     end subroutine stdlib${ii}$_${ci}$laqz2

#:endif
#:endfor



     recursive module subroutine stdlib${ii}$_slaqz3( ilschur, ilq, ilz, n, ilo, ihi, nw,a, lda, b, ldb, q, &
     !! SLAQZ3 performs AED
               ldq, z, ldz, ns,nd, alphar, alphai, beta, qc, ldqc,zc, ldzc, work, lwork, rec, info )
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           logical(lk), intent( in ) :: ilschur, ilq, ilz
           integer(${ik}$), intent( in ) :: n, ilo, ihi, nw, lda, ldb, ldq, ldz,ldqc, ldzc, lwork, &
                     rec
           real(sp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq, * ),z( ldz, * ), alphar(&
                      * ), alphai( * ), beta( * )
           integer(${ik}$), intent( out ) :: ns, nd, info
           real(sp), intent(inout) :: qc(ldqc,*), zc(ldzc,*)
           real(sp), intent(out) :: work(*)
           
           ! ================================================================
           ! local scalars
           logical(lk) :: bulge
           integer(${ik}$) :: jw, kwtop, kwbot, istopm, istartm, k, k2, stgexc_info, ifst, ilst, &
                     lworkreq, qz_small_info
           real(sp) :: s, smlnum, ulp, safmin, safmax, c1, s1, temp
           info = 0_${ik}$
           ! set up deflation window
           jw = min( nw, ihi-ilo+1 )
           kwtop = ihi-jw+1
           if ( kwtop == ilo ) then
              s = zero
           else
              s = a( kwtop, kwtop-1 )
           end if
           ! determine required workspace
           ifst = 1_${ik}$
           ilst = jw
           call stdlib${ii}$_stgexc( .true., .true., jw, a, lda, b, ldb, qc, ldqc, zc,ldzc, ifst, ilst, &
                     work, -1_${ik}$, stgexc_info )
           lworkreq = int( work( 1_${ik}$ ),KIND=${ik}$)
           call stdlib${ii}$_slaqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),&
                      ldb, alphar, alphai, beta, qc,ldqc, zc, ldzc, work, -1_${ik}$, rec+1, qz_small_info )
           lworkreq = max( lworkreq, int( work( 1_${ik}$ ),KIND=${ik}$)+2_${ik}$*jw**2_${ik}$ )
           lworkreq = max( lworkreq, n*nw, 2_${ik}$*nw**2_${ik}$+n )
           if ( lwork ==-1_${ik}$ ) then
              ! workspace query, quick return
              work( 1_${ik}$ ) = lworkreq
              return
           else if ( lwork < lworkreq ) then
              info = -26_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLAQZ3', -info )
              return
           end if
           ! get machine constants
           safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' )
           safmax = one/safmin
           call stdlib${ii}$_slabad( safmin, safmax )
           ulp = stdlib${ii}$_slamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=sp)/ulp )
           if ( ihi == kwtop ) then
              ! 1 by 1 deflation window, just try a regular deflation
              alphar( kwtop ) = a( kwtop, kwtop )
              alphai( kwtop ) = zero
              beta( kwtop ) = b( kwtop, kwtop )
              ns = 1_${ik}$
              nd = 0_${ik}$
              if ( abs( s ) <= max( smlnum, ulp*abs( a( kwtop,kwtop ) ) ) ) then
                 ns = 0_${ik}$
                 nd = 1_${ik}$
                 if ( kwtop > ilo ) then
                    a( kwtop, kwtop-1 ) = zero
                 end if
              end if
           end if
           ! store window in case of convergence failure
           call stdlib${ii}$_slacpy( 'ALL', jw, jw, a( kwtop, kwtop ), lda, work, jw )
           call stdlib${ii}$_slacpy( 'ALL', jw, jw, b( kwtop, kwtop ), ldb, work( jw**2_${ik}$+1 ), jw )
                     
           ! transform window to real schur form
           call stdlib${ii}$_slaset( 'FULL', jw, jw, zero, one, qc, ldqc )
           call stdlib${ii}$_slaset( 'FULL', jw, jw, zero, one, zc, ldzc )
           call stdlib${ii}$_slaqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),&
            ldb, alphar, alphai, beta, qc,ldqc, zc, ldzc, work( 2_${ik}$*jw**2_${ik}$+1 ), lwork-2*jw**2_${ik}$,rec+1, &
                      qz_small_info )
           if( qz_small_info /= 0_${ik}$ ) then
              ! convergence failure, restore the window and exit
              nd = 0_${ik}$
              ns = jw-qz_small_info
              call stdlib${ii}$_slacpy( 'ALL', jw, jw, work, jw, a( kwtop, kwtop ), lda )
              call stdlib${ii}$_slacpy( 'ALL', jw, jw, work( jw**2_${ik}$+1 ), jw, b( kwtop,kwtop ), ldb )
                        
              return
           end if
           ! deflation detection loop
           if ( kwtop == ilo .or. s == zero ) then
              kwbot = kwtop-1
           else
              kwbot = ihi
              k = 1_${ik}$
              k2 = 1_${ik}$
              do while ( k <= jw )
                 bulge = .false.
                 if ( kwbot-kwtop+1 >= 2_${ik}$ ) then
                    bulge = a( kwbot, kwbot-1 ) /= zero
                 end if
                 if ( bulge ) then
                    ! try to deflate complex conjugate eigenvalue pair
                    temp = abs( a( kwbot, kwbot ) )+sqrt( abs( a( kwbot,kwbot-1 ) ) )*sqrt( abs( &
                              a( kwbot-1, kwbot ) ) )
                    if( temp == zero )then
                       temp = abs( s )
                    end if
                    if ( max( abs( s*qc( 1_${ik}$, kwbot-kwtop ) ), abs( s*qc( 1_${ik}$,kwbot-kwtop+1 ) ) ) <= &
                              max( smlnum,ulp*temp ) ) then
                       ! deflatable
                       kwbot = kwbot-2
                    else
                       ! not deflatable, move out of the way
                       ifst = kwbot-kwtop+1
                       ilst = k2
                       call stdlib${ii}$_stgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, &
                       kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, work, lwork,stgexc_info )
                                 
                       k2 = k2+2
                    end if
                    k = k+2
                 else
                    ! try to deflate real eigenvalue
                    temp = abs( a( kwbot, kwbot ) )
                    if( temp == zero ) then
                       temp = abs( s )
                    end if
                    if ( ( abs( s*qc( 1_${ik}$, kwbot-kwtop+1 ) ) ) <= max( ulp*temp, smlnum ) ) &
                              then
                       ! deflatable
                       kwbot = kwbot-1
                    else
                       ! not deflatable, move out of the way
                       ifst = kwbot-kwtop+1
                       ilst = k2
                       call stdlib${ii}$_stgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, &
                       kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, work, lwork,stgexc_info )
                                 
                       k2 = k2+1
                    end if
                    k = k+1
                 end if
              end do
           end if
           ! store eigenvalues
           nd = ihi-kwbot
           ns = jw-nd
           k = kwtop
           do while ( k <= ihi )
              bulge = .false.
              if ( k < ihi ) then
                 if ( a( k+1, k ) /= zero ) then
                    bulge = .true.
                 end if
              end if
              if ( bulge ) then
                 ! 2x2 eigenvalue block
                 call stdlib${ii}$_slag2( a( k, k ), lda, b( k, k ), ldb, safmin,beta( k ), beta( k+1 ),&
                            alphar( k ),alphar( k+1 ), alphai( k ) )
                 alphai( k+1 ) = -alphai( k )
                 k = k+2
              else
                 ! 1x1 eigenvalue block
                 alphar( k ) = a( k, k )
                 alphai( k ) = zero
                 beta( k ) = b( k, k )
                 k = k+1
              end if
           end do
           if ( kwtop /= ilo .and. s /= zero ) then
              ! reflect spike back, this will create optimally packed bulges
              a( kwtop:kwbot, kwtop-1 ) = a( kwtop, kwtop-1 )*qc( 1_${ik}$,1_${ik}$:jw-nd )
              do k = kwbot-1, kwtop, -1
                 call stdlib${ii}$_slartg( a( k, kwtop-1 ), a( k+1, kwtop-1 ), c1, s1,temp )
                 a( k, kwtop-1 ) = temp
                 a( k+1, kwtop-1 ) = zero
                 k2 = max( kwtop, k-1 )
                 call stdlib${ii}$_srot( ihi-k2+1, a( k, k2 ), lda, a( k+1, k2 ), lda, c1,s1 )
                 call stdlib${ii}$_srot( ihi-( k-1 )+1_${ik}$, b( k, k-1 ), ldb, b( k+1, k-1 ),ldb, c1, s1 )
                           
                 call stdlib${ii}$_srot( jw, qc( 1_${ik}$, k-kwtop+1 ), 1_${ik}$, qc( 1_${ik}$, k+1-kwtop+1 ),1_${ik}$, c1, s1 )
                           
              end do
              ! chase bulges down
              istartm = kwtop
              istopm = ihi
              k = kwbot-1
              do while ( k >= kwtop )
                 if ( ( k >= kwtop+1 ) .and. a( k+1, k-1 ) /= zero ) then
                    ! move double pole block down and remove it
                    do k2 = k-1, kwbot-2
                       call stdlib${ii}$_slaqz2( .true., .true., k2, kwtop, kwtop+jw-1,kwbot, a, lda, b,&
                                  ldb, jw, kwtop, qc,ldqc, jw, kwtop, zc, ldzc )
                    end do
                    k = k-2
                 else
                    ! k points to single shift
                    do k2 = k, kwbot-2
                       ! move shift down
                       call stdlib${ii}$_slartg( b( k2+1, k2+1 ), b( k2+1, k2 ), c1, s1,temp )
                       b( k2+1, k2+1 ) = temp
                       b( k2+1, k2 ) = zero
                       call stdlib${ii}$_srot( k2+2-istartm+1, a( istartm, k2+1 ), 1_${ik}$,a( istartm, k2 ), &
                                 1_${ik}$, c1, s1 )
                       call stdlib${ii}$_srot( k2-istartm+1, b( istartm, k2+1 ), 1_${ik}$,b( istartm, k2 ), 1_${ik}$, &
                                 c1, s1 )
                       call stdlib${ii}$_srot( jw, zc( 1_${ik}$, k2+1-kwtop+1 ), 1_${ik}$, zc( 1_${ik}$,k2-kwtop+1 ), 1_${ik}$, c1, &
                                 s1 )
                       call stdlib${ii}$_slartg( a( k2+1, k2 ), a( k2+2, k2 ), c1, s1,temp )
                       a( k2+1, k2 ) = temp
                       a( k2+2, k2 ) = zero
                       call stdlib${ii}$_srot( istopm-k2, a( k2+1, k2+1 ), lda, a( k2+2,k2+1 ), lda, c1,&
                                  s1 )
                       call stdlib${ii}$_srot( istopm-k2, b( k2+1, k2+1 ), ldb, b( k2+2,k2+1 ), ldb, c1,&
                                  s1 )
                       call stdlib${ii}$_srot( jw, qc( 1_${ik}$, k2+1-kwtop+1 ), 1_${ik}$, qc( 1_${ik}$,k2+2-kwtop+1 ), 1_${ik}$, &
                                 c1, s1 )
                    end do
                    ! remove the shift
                    call stdlib${ii}$_slartg( b( kwbot, kwbot ), b( kwbot, kwbot-1 ), c1,s1, temp )
                              
                    b( kwbot, kwbot ) = temp
                    b( kwbot, kwbot-1 ) = zero
                    call stdlib${ii}$_srot( kwbot-istartm, b( istartm, kwbot ), 1_${ik}$,b( istartm, kwbot-1 ),&
                               1_${ik}$, c1, s1 )
                    call stdlib${ii}$_srot( kwbot-istartm+1, a( istartm, kwbot ), 1_${ik}$,a( istartm, kwbot-1 &
                              ), 1_${ik}$, c1, s1 )
                    call stdlib${ii}$_srot( jw, zc( 1_${ik}$, kwbot-kwtop+1 ), 1_${ik}$, zc( 1_${ik}$,kwbot-1-kwtop+1 ), 1_${ik}$, &
                              c1, s1 )
                    k = k-1
                 end if
              end do
           end if
           ! apply qc and zc to rest of the matrix
           if ( ilschur ) then
              istartm = 1_${ik}$
              istopm = n
           else
              istartm = ilo
              istopm = ihi
           end if
           if ( istopm-ihi > 0_${ik}$ ) then
              call stdlib${ii}$_sgemm( 'T', 'N', jw, istopm-ihi, jw, one, qc, ldqc,a( kwtop, ihi+1 ), &
                        lda, zero, work, jw )
              call stdlib${ii}$_slacpy( 'ALL', jw, istopm-ihi, work, jw, a( kwtop,ihi+1 ), lda )
              call stdlib${ii}$_sgemm( 'T', 'N', jw, istopm-ihi, jw, one, qc, ldqc,b( kwtop, ihi+1 ), &
                        ldb, zero, work, jw )
              call stdlib${ii}$_slacpy( 'ALL', jw, istopm-ihi, work, jw, b( kwtop,ihi+1 ), ldb )
           end if
           if ( ilq ) then
              call stdlib${ii}$_sgemm( 'N', 'N', n, jw, jw, one, q( 1_${ik}$, kwtop ), ldq, qc,ldqc, zero, &
                        work, n )
              call stdlib${ii}$_slacpy( 'ALL', n, jw, work, n, q( 1_${ik}$, kwtop ), ldq )
           end if
           if ( kwtop-1-istartm+1 > 0_${ik}$ ) then
              call stdlib${ii}$_sgemm( 'N', 'N', kwtop-istartm, jw, jw, one, a( istartm,kwtop ), lda, &
                        zc, ldzc, zero, work,kwtop-istartm )
              call stdlib${ii}$_slacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,a( istartm, kwtop &
                        ), lda )
              call stdlib${ii}$_sgemm( 'N', 'N', kwtop-istartm, jw, jw, one, b( istartm,kwtop ), ldb, &
                        zc, ldzc, zero, work,kwtop-istartm )
              call stdlib${ii}$_slacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,b( istartm, kwtop &
                        ), ldb )
           end if
           if ( ilz ) then
              call stdlib${ii}$_sgemm( 'N', 'N', n, jw, jw, one, z( 1_${ik}$, kwtop ), ldz, zc,ldzc, zero, &
                        work, n )
              call stdlib${ii}$_slacpy( 'ALL', n, jw, work, n, z( 1_${ik}$, kwtop ), ldz )
           end if
     end subroutine stdlib${ii}$_slaqz3

     recursive module subroutine stdlib${ii}$_dlaqz3( ilschur, ilq, ilz, n, ilo, ihi, nw,a, lda, b, ldb, q, &
     !! DLAQZ3 performs AED
               ldq, z, ldz, ns,nd, alphar, alphai, beta, qc, ldqc,zc, ldzc, work, lwork, rec, info )
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           logical(lk), intent( in ) :: ilschur, ilq, ilz
           integer(${ik}$), intent( in ) :: n, ilo, ihi, nw, lda, ldb, ldq, ldz,ldqc, ldzc, lwork, &
                     rec
           real(dp), intent( inout ) :: a( lda, * ), b( ldb, * ),q( ldq, * ), z( ldz, * ), alphar(&
                      * ),alphai( * ), beta( * )
           integer(${ik}$), intent( out ) :: ns, nd, info
           real(dp), intent(inout) :: qc(ldqc,*), zc(ldzc,*)
           real(dp), intent(out) :: work(*)
           ! ================================================================
           ! local scalars
           logical(lk) :: bulge
           integer(${ik}$) :: jw, kwtop, kwbot, istopm, istartm, k, k2, dtgexc_info, ifst, ilst, &
                     lworkreq, qz_small_info
           real(dp) :: s, smlnum, ulp, safmin, safmax, c1, s1, temp
           info = 0_${ik}$
           ! set up deflation window
           jw = min( nw, ihi-ilo+1 )
           kwtop = ihi-jw+1
           if ( kwtop == ilo ) then
              s = zero
           else
              s = a( kwtop, kwtop-1 )
           end if
           ! determine required workspace
           ifst = 1_${ik}$
           ilst = jw
           call stdlib${ii}$_dtgexc( .true., .true., jw, a, lda, b, ldb, qc, ldqc, zc,ldzc, ifst, ilst, &
                     work, -1_${ik}$, dtgexc_info )
           lworkreq = int( work( 1_${ik}$ ),KIND=${ik}$)
           call stdlib${ii}$_dlaqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),&
                      ldb, alphar, alphai, beta, qc,ldqc, zc, ldzc, work, -1_${ik}$, rec+1, qz_small_info )
           lworkreq = max( lworkreq, int( work( 1_${ik}$ ),KIND=${ik}$)+2_${ik}$*jw**2_${ik}$ )
           lworkreq = max( lworkreq, n*nw, 2_${ik}$*nw**2_${ik}$+n )
           if ( lwork ==-1_${ik}$ ) then
              ! workspace query, quick return
              work( 1_${ik}$ ) = lworkreq
              return
           else if ( lwork < lworkreq ) then
              info = -26_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLAQZ3', -info )
              return
           end if
           ! get machine constants
           safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' )
           safmax = one/safmin
           call stdlib${ii}$_dlabad( safmin, safmax )
           ulp = stdlib${ii}$_dlamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=dp)/ulp )
           if ( ihi == kwtop ) then
              ! 1 by 1 deflation window, just try a regular deflation
              alphar( kwtop ) = a( kwtop, kwtop )
              alphai( kwtop ) = zero
              beta( kwtop ) = b( kwtop, kwtop )
              ns = 1_${ik}$
              nd = 0_${ik}$
              if ( abs( s ) <= max( smlnum, ulp*abs( a( kwtop,kwtop ) ) ) ) then
                 ns = 0_${ik}$
                 nd = 1_${ik}$
                 if ( kwtop > ilo ) then
                    a( kwtop, kwtop-1 ) = zero
                 end if
              end if
           end if
           ! store window in case of convergence failure
           call stdlib${ii}$_dlacpy( 'ALL', jw, jw, a( kwtop, kwtop ), lda, work, jw )
           call stdlib${ii}$_dlacpy( 'ALL', jw, jw, b( kwtop, kwtop ), ldb, work( jw**2_${ik}$+1 ), jw )
                     
           ! transform window to real schur form
           call stdlib${ii}$_dlaset( 'FULL', jw, jw, zero, one, qc, ldqc )
           call stdlib${ii}$_dlaset( 'FULL', jw, jw, zero, one, zc, ldzc )
           call stdlib${ii}$_dlaqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),&
            ldb, alphar, alphai, beta, qc,ldqc, zc, ldzc, work( 2_${ik}$*jw**2_${ik}$+1 ), lwork-2*jw**2_${ik}$,rec+1, &
                      qz_small_info )
           if( qz_small_info /= 0_${ik}$ ) then
              ! convergence failure, restore the window and exit
              nd = 0_${ik}$
              ns = jw-qz_small_info
              call stdlib${ii}$_dlacpy( 'ALL', jw, jw, work, jw, a( kwtop, kwtop ), lda )
              call stdlib${ii}$_dlacpy( 'ALL', jw, jw, work( jw**2_${ik}$+1 ), jw, b( kwtop,kwtop ), ldb )
                        
              return
           end if
           ! deflation detection loop
           if ( kwtop == ilo .or. s == zero ) then
              kwbot = kwtop-1
           else
              kwbot = ihi
              k = 1_${ik}$
              k2 = 1_${ik}$
              do while ( k <= jw )
                 bulge = .false.
                 if ( kwbot-kwtop+1 >= 2_${ik}$ ) then
                    bulge = a( kwbot, kwbot-1 ) /= zero
                 end if
                 if ( bulge ) then
                    ! try to deflate complex conjugate eigenvalue pair
                    temp = abs( a( kwbot, kwbot ) )+sqrt( abs( a( kwbot,kwbot-1 ) ) )*sqrt( abs( &
                              a( kwbot-1, kwbot ) ) )
                    if( temp == zero )then
                       temp = abs( s )
                    end if
                    if ( max( abs( s*qc( 1_${ik}$, kwbot-kwtop ) ), abs( s*qc( 1_${ik}$,kwbot-kwtop+1 ) ) ) <= &
                              max( smlnum,ulp*temp ) ) then
                       ! deflatable
                       kwbot = kwbot-2
                    else
                       ! not deflatable, move out of the way
                       ifst = kwbot-kwtop+1
                       ilst = k2
                       call stdlib${ii}$_dtgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, &
                       kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, work, lwork,dtgexc_info )
                                 
                       k2 = k2+2
                    end if
                    k = k+2
                 else
                    ! try to deflate real eigenvalue
                    temp = abs( a( kwbot, kwbot ) )
                    if( temp == zero ) then
                       temp = abs( s )
                    end if
                    if ( ( abs( s*qc( 1_${ik}$, kwbot-kwtop+1 ) ) ) <= max( ulp*temp, smlnum ) ) &
                              then
                       ! deflatable
                       kwbot = kwbot-1
                    else
                       ! not deflatable, move out of the way
                       ifst = kwbot-kwtop+1
                       ilst = k2
                       call stdlib${ii}$_dtgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, &
                       kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, work, lwork,dtgexc_info )
                                 
                       k2 = k2+1
                    end if
                    k = k+1
                 end if
              end do
           end if
           ! store eigenvalues
           nd = ihi-kwbot
           ns = jw-nd
           k = kwtop
           do while ( k <= ihi )
              bulge = .false.
              if ( k < ihi ) then
                 if ( a( k+1, k ) /= zero ) then
                    bulge = .true.
                 end if
              end if
              if ( bulge ) then
                 ! 2x2 eigenvalue block
                 call stdlib${ii}$_dlag2( a( k, k ), lda, b( k, k ), ldb, safmin,beta( k ), beta( k+1 ),&
                            alphar( k ),alphar( k+1 ), alphai( k ) )
                 alphai( k+1 ) = -alphai( k )
                 k = k+2
              else
                 ! 1x1 eigenvalue block
                 alphar( k ) = a( k, k )
                 alphai( k ) = zero
                 beta( k ) = b( k, k )
                 k = k+1
              end if
           end do
           if ( kwtop /= ilo .and. s /= zero ) then
              ! reflect spike back, this will create optimally packed bulges
              a( kwtop:kwbot, kwtop-1 ) = a( kwtop, kwtop-1 )*qc( 1_${ik}$,1_${ik}$:jw-nd )
              do k = kwbot-1, kwtop, -1
                 call stdlib${ii}$_dlartg( a( k, kwtop-1 ), a( k+1, kwtop-1 ), c1, s1,temp )
                 a( k, kwtop-1 ) = temp
                 a( k+1, kwtop-1 ) = zero
                 k2 = max( kwtop, k-1 )
                 call stdlib${ii}$_drot( ihi-k2+1, a( k, k2 ), lda, a( k+1, k2 ), lda, c1,s1 )
                 call stdlib${ii}$_drot( ihi-( k-1 )+1_${ik}$, b( k, k-1 ), ldb, b( k+1, k-1 ),ldb, c1, s1 )
                           
                 call stdlib${ii}$_drot( jw, qc( 1_${ik}$, k-kwtop+1 ), 1_${ik}$, qc( 1_${ik}$, k+1-kwtop+1 ),1_${ik}$, c1, s1 )
                           
              end do
              ! chase bulges down
              istartm = kwtop
              istopm = ihi
              k = kwbot-1
              do while ( k >= kwtop )
                 if ( ( k >= kwtop+1 ) .and. a( k+1, k-1 ) /= zero ) then
                    ! move double pole block down and remove it
                    do k2 = k-1, kwbot-2
                       call stdlib${ii}$_dlaqz2( .true., .true., k2, kwtop, kwtop+jw-1,kwbot, a, lda, b,&
                                  ldb, jw, kwtop, qc,ldqc, jw, kwtop, zc, ldzc )
                    end do
                    k = k-2
                 else
                    ! k points to single shift
                    do k2 = k, kwbot-2
                       ! move shift down
                       call stdlib${ii}$_dlartg( b( k2+1, k2+1 ), b( k2+1, k2 ), c1, s1,temp )
                       b( k2+1, k2+1 ) = temp
                       b( k2+1, k2 ) = zero
                       call stdlib${ii}$_drot( k2+2-istartm+1, a( istartm, k2+1 ), 1_${ik}$,a( istartm, k2 ), &
                                 1_${ik}$, c1, s1 )
                       call stdlib${ii}$_drot( k2-istartm+1, b( istartm, k2+1 ), 1_${ik}$,b( istartm, k2 ), 1_${ik}$, &
                                 c1, s1 )
                       call stdlib${ii}$_drot( jw, zc( 1_${ik}$, k2+1-kwtop+1 ), 1_${ik}$, zc( 1_${ik}$,k2-kwtop+1 ), 1_${ik}$, c1, &
                                 s1 )
                       call stdlib${ii}$_dlartg( a( k2+1, k2 ), a( k2+2, k2 ), c1, s1,temp )
                       a( k2+1, k2 ) = temp
                       a( k2+2, k2 ) = zero
                       call stdlib${ii}$_drot( istopm-k2, a( k2+1, k2+1 ), lda, a( k2+2,k2+1 ), lda, c1,&
                                  s1 )
                       call stdlib${ii}$_drot( istopm-k2, b( k2+1, k2+1 ), ldb, b( k2+2,k2+1 ), ldb, c1,&
                                  s1 )
                       call stdlib${ii}$_drot( jw, qc( 1_${ik}$, k2+1-kwtop+1 ), 1_${ik}$, qc( 1_${ik}$,k2+2-kwtop+1 ), 1_${ik}$, &
                                 c1, s1 )
                    end do
                    ! remove the shift
                    call stdlib${ii}$_dlartg( b( kwbot, kwbot ), b( kwbot, kwbot-1 ), c1,s1, temp )
                              
                    b( kwbot, kwbot ) = temp
                    b( kwbot, kwbot-1 ) = zero
                    call stdlib${ii}$_drot( kwbot-istartm, b( istartm, kwbot ), 1_${ik}$,b( istartm, kwbot-1 ),&
                               1_${ik}$, c1, s1 )
                    call stdlib${ii}$_drot( kwbot-istartm+1, a( istartm, kwbot ), 1_${ik}$,a( istartm, kwbot-1 &
                              ), 1_${ik}$, c1, s1 )
                    call stdlib${ii}$_drot( jw, zc( 1_${ik}$, kwbot-kwtop+1 ), 1_${ik}$, zc( 1_${ik}$,kwbot-1-kwtop+1 ), 1_${ik}$, &
                              c1, s1 )
                    k = k-1
                 end if
              end do
           end if
           ! apply qc and zc to rest of the matrix
           if ( ilschur ) then
              istartm = 1_${ik}$
              istopm = n
           else
              istartm = ilo
              istopm = ihi
           end if
           if ( istopm-ihi > 0_${ik}$ ) then
              call stdlib${ii}$_dgemm( 'T', 'N', jw, istopm-ihi, jw, one, qc, ldqc,a( kwtop, ihi+1 ), &
                        lda, zero, work, jw )
              call stdlib${ii}$_dlacpy( 'ALL', jw, istopm-ihi, work, jw, a( kwtop,ihi+1 ), lda )
              call stdlib${ii}$_dgemm( 'T', 'N', jw, istopm-ihi, jw, one, qc, ldqc,b( kwtop, ihi+1 ), &
                        ldb, zero, work, jw )
              call stdlib${ii}$_dlacpy( 'ALL', jw, istopm-ihi, work, jw, b( kwtop,ihi+1 ), ldb )
           end if
           if ( ilq ) then
              call stdlib${ii}$_dgemm( 'N', 'N', n, jw, jw, one, q( 1_${ik}$, kwtop ), ldq, qc,ldqc, zero, &
                        work, n )
              call stdlib${ii}$_dlacpy( 'ALL', n, jw, work, n, q( 1_${ik}$, kwtop ), ldq )
           end if
           if ( kwtop-1-istartm+1 > 0_${ik}$ ) then
              call stdlib${ii}$_dgemm( 'N', 'N', kwtop-istartm, jw, jw, one, a( istartm,kwtop ), lda, &
                        zc, ldzc, zero, work,kwtop-istartm )
              call stdlib${ii}$_dlacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,a( istartm, kwtop &
                        ), lda )
              call stdlib${ii}$_dgemm( 'N', 'N', kwtop-istartm, jw, jw, one, b( istartm,kwtop ), ldb, &
                        zc, ldzc, zero, work,kwtop-istartm )
              call stdlib${ii}$_dlacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,b( istartm, kwtop &
                        ), ldb )
           end if
           if ( ilz ) then
              call stdlib${ii}$_dgemm( 'N', 'N', n, jw, jw, one, z( 1_${ik}$, kwtop ), ldz, zc,ldzc, zero, &
                        work, n )
              call stdlib${ii}$_dlacpy( 'ALL', n, jw, work, n, z( 1_${ik}$, kwtop ), ldz )
           end if
     end subroutine stdlib${ii}$_dlaqz3

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     recursive module subroutine stdlib${ii}$_${ri}$laqz3( ilschur, ilq, ilz, n, ilo, ihi, nw,a, lda, b, ldb, q, &
     !! DLAQZ3: performs AED
               ldq, z, ldz, ns,nd, alphar, alphai, beta, qc, ldqc,zc, ldzc, work, lwork, rec, info )
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! arguments
           logical(lk), intent( in ) :: ilschur, ilq, ilz
           integer(${ik}$), intent( in ) :: n, ilo, ihi, nw, lda, ldb, ldq, ldz,ldqc, ldzc, lwork, &
                     rec
           real(${rk}$), intent( inout ) :: a( lda, * ), b( ldb, * ),q( ldq, * ), z( ldz, * ), alphar(&
                      * ),alphai( * ), beta( * )
           integer(${ik}$), intent( out ) :: ns, nd, info
           real(${rk}$), intent(inout) :: qc(ldqc,*), zc(ldzc,*)
           real(${rk}$), intent(out) :: work(*)
           ! ================================================================
           ! local scalars
           logical(lk) :: bulge
           integer(${ik}$) :: jw, kwtop, kwbot, istopm, istartm, k, k2, dtgexc_info, ifst, ilst, &
                     lworkreq, qz_small_info
           real(${rk}$) :: s, smlnum, ulp, safmin, safmax, c1, s1, temp
           info = 0_${ik}$
           ! set up deflation window
           jw = min( nw, ihi-ilo+1 )
           kwtop = ihi-jw+1
           if ( kwtop == ilo ) then
              s = zero
           else
              s = a( kwtop, kwtop-1 )
           end if
           ! determine required workspace
           ifst = 1_${ik}$
           ilst = jw
           call stdlib${ii}$_${ri}$tgexc( .true., .true., jw, a, lda, b, ldb, qc, ldqc, zc,ldzc, ifst, ilst, &
                     work, -1_${ik}$, dtgexc_info )
           lworkreq = int( work( 1_${ik}$ ),KIND=${ik}$)
           call stdlib${ii}$_${ri}$laqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),&
                      ldb, alphar, alphai, beta, qc,ldqc, zc, ldzc, work, -1_${ik}$, rec+1, qz_small_info )
           lworkreq = max( lworkreq, int( work( 1_${ik}$ ),KIND=${ik}$)+2_${ik}$*jw**2_${ik}$ )
           lworkreq = max( lworkreq, n*nw, 2_${ik}$*nw**2_${ik}$+n )
           if ( lwork ==-1_${ik}$ ) then
              ! workspace query, quick return
              work( 1_${ik}$ ) = lworkreq
              return
           else if ( lwork < lworkreq ) then
              info = -26_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLAQZ3', -info )
              return
           end if
           ! get machine constants
           safmin = stdlib${ii}$_${ri}$lamch( 'SAFE MINIMUM' )
           safmax = one/safmin
           call stdlib${ii}$_${ri}$labad( safmin, safmax )
           ulp = stdlib${ii}$_${ri}$lamch( 'PRECISION' )
           smlnum = safmin*( real( n,KIND=${rk}$)/ulp )
           if ( ihi == kwtop ) then
              ! 1 by 1 deflation window, just try a regular deflation
              alphar( kwtop ) = a( kwtop, kwtop )
              alphai( kwtop ) = zero
              beta( kwtop ) = b( kwtop, kwtop )
              ns = 1_${ik}$
              nd = 0_${ik}$
              if ( abs( s ) <= max( smlnum, ulp*abs( a( kwtop,kwtop ) ) ) ) then
                 ns = 0_${ik}$
                 nd = 1_${ik}$
                 if ( kwtop > ilo ) then
                    a( kwtop, kwtop-1 ) = zero
                 end if
              end if
           end if
           ! store window in case of convergence failure
           call stdlib${ii}$_${ri}$lacpy( 'ALL', jw, jw, a( kwtop, kwtop ), lda, work, jw )
           call stdlib${ii}$_${ri}$lacpy( 'ALL', jw, jw, b( kwtop, kwtop ), ldb, work( jw**2_${ik}$+1 ), jw )
                     
           ! transform window to real schur form
           call stdlib${ii}$_${ri}$laset( 'FULL', jw, jw, zero, one, qc, ldqc )
           call stdlib${ii}$_${ri}$laset( 'FULL', jw, jw, zero, one, zc, ldzc )
           call stdlib${ii}$_${ri}$laqz0( 'S', 'V', 'V', jw, 1_${ik}$, jw, a( kwtop, kwtop ), lda,b( kwtop, kwtop ),&
            ldb, alphar, alphai, beta, qc,ldqc, zc, ldzc, work( 2_${ik}$*jw**2_${ik}$+1 ), lwork-2*jw**2_${ik}$,rec+1, &
                      qz_small_info )
           if( qz_small_info /= 0_${ik}$ ) then
              ! convergence failure, restore the window and exit
              nd = 0_${ik}$
              ns = jw-qz_small_info
              call stdlib${ii}$_${ri}$lacpy( 'ALL', jw, jw, work, jw, a( kwtop, kwtop ), lda )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', jw, jw, work( jw**2_${ik}$+1 ), jw, b( kwtop,kwtop ), ldb )
                        
              return
           end if
           ! deflation detection loop
           if ( kwtop == ilo .or. s == zero ) then
              kwbot = kwtop-1
           else
              kwbot = ihi
              k = 1_${ik}$
              k2 = 1_${ik}$
              do while ( k <= jw )
                 bulge = .false.
                 if ( kwbot-kwtop+1 >= 2_${ik}$ ) then
                    bulge = a( kwbot, kwbot-1 ) /= zero
                 end if
                 if ( bulge ) then
                    ! try to deflate complex conjugate eigenvalue pair
                    temp = abs( a( kwbot, kwbot ) )+sqrt( abs( a( kwbot,kwbot-1 ) ) )*sqrt( abs( &
                              a( kwbot-1, kwbot ) ) )
                    if( temp == zero )then
                       temp = abs( s )
                    end if
                    if ( max( abs( s*qc( 1_${ik}$, kwbot-kwtop ) ), abs( s*qc( 1_${ik}$,kwbot-kwtop+1 ) ) ) <= &
                              max( smlnum,ulp*temp ) ) then
                       ! deflatable
                       kwbot = kwbot-2
                    else
                       ! not deflatable, move out of the way
                       ifst = kwbot-kwtop+1
                       ilst = k2
                       call stdlib${ii}$_${ri}$tgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, &
                       kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, work, lwork,dtgexc_info )
                                 
                       k2 = k2+2
                    end if
                    k = k+2
                 else
                    ! try to deflate real eigenvalue
                    temp = abs( a( kwbot, kwbot ) )
                    if( temp == zero ) then
                       temp = abs( s )
                    end if
                    if ( ( abs( s*qc( 1_${ik}$, kwbot-kwtop+1 ) ) ) <= max( ulp*temp, smlnum ) ) &
                              then
                       ! deflatable
                       kwbot = kwbot-1
                    else
                       ! not deflatable, move out of the way
                       ifst = kwbot-kwtop+1
                       ilst = k2
                       call stdlib${ii}$_${ri}$tgexc( .true., .true., jw, a( kwtop, kwtop ),lda, b( kwtop, &
                       kwtop ), ldb, qc, ldqc,zc, ldzc, ifst, ilst, work, lwork,dtgexc_info )
                                 
                       k2 = k2+1
                    end if
                    k = k+1
                 end if
              end do
           end if
           ! store eigenvalues
           nd = ihi-kwbot
           ns = jw-nd
           k = kwtop
           do while ( k <= ihi )
              bulge = .false.
              if ( k < ihi ) then
                 if ( a( k+1, k ) /= zero ) then
                    bulge = .true.
                 end if
              end if
              if ( bulge ) then
                 ! 2x2 eigenvalue block
                 call stdlib${ii}$_${ri}$lag2( a( k, k ), lda, b( k, k ), ldb, safmin,beta( k ), beta( k+1 ),&
                            alphar( k ),alphar( k+1 ), alphai( k ) )
                 alphai( k+1 ) = -alphai( k )
                 k = k+2
              else
                 ! 1x1 eigenvalue block
                 alphar( k ) = a( k, k )
                 alphai( k ) = zero
                 beta( k ) = b( k, k )
                 k = k+1
              end if
           end do
           if ( kwtop /= ilo .and. s /= zero ) then
              ! reflect spike back, this will create optimally packed bulges
              a( kwtop:kwbot, kwtop-1 ) = a( kwtop, kwtop-1 )*qc( 1_${ik}$,1_${ik}$:jw-nd )
              do k = kwbot-1, kwtop, -1
                 call stdlib${ii}$_${ri}$lartg( a( k, kwtop-1 ), a( k+1, kwtop-1 ), c1, s1,temp )
                 a( k, kwtop-1 ) = temp
                 a( k+1, kwtop-1 ) = zero
                 k2 = max( kwtop, k-1 )
                 call stdlib${ii}$_${ri}$rot( ihi-k2+1, a( k, k2 ), lda, a( k+1, k2 ), lda, c1,s1 )
                 call stdlib${ii}$_${ri}$rot( ihi-( k-1 )+1_${ik}$, b( k, k-1 ), ldb, b( k+1, k-1 ),ldb, c1, s1 )
                           
                 call stdlib${ii}$_${ri}$rot( jw, qc( 1_${ik}$, k-kwtop+1 ), 1_${ik}$, qc( 1_${ik}$, k+1-kwtop+1 ),1_${ik}$, c1, s1 )
                           
              end do
              ! chase bulges down
              istartm = kwtop
              istopm = ihi
              k = kwbot-1
              do while ( k >= kwtop )
                 if ( ( k >= kwtop+1 ) .and. a( k+1, k-1 ) /= zero ) then
                    ! move double pole block down and remove it
                    do k2 = k-1, kwbot-2
                       call stdlib${ii}$_${ri}$laqz2( .true., .true., k2, kwtop, kwtop+jw-1,kwbot, a, lda, b,&
                                  ldb, jw, kwtop, qc,ldqc, jw, kwtop, zc, ldzc )
                    end do
                    k = k-2
                 else
                    ! k points to single shift
                    do k2 = k, kwbot-2
                       ! move shift down
                       call stdlib${ii}$_${ri}$lartg( b( k2+1, k2+1 ), b( k2+1, k2 ), c1, s1,temp )
                       b( k2+1, k2+1 ) = temp
                       b( k2+1, k2 ) = zero
                       call stdlib${ii}$_${ri}$rot( k2+2-istartm+1, a( istartm, k2+1 ), 1_${ik}$,a( istartm, k2 ), &
                                 1_${ik}$, c1, s1 )
                       call stdlib${ii}$_${ri}$rot( k2-istartm+1, b( istartm, k2+1 ), 1_${ik}$,b( istartm, k2 ), 1_${ik}$, &
                                 c1, s1 )
                       call stdlib${ii}$_${ri}$rot( jw, zc( 1_${ik}$, k2+1-kwtop+1 ), 1_${ik}$, zc( 1_${ik}$,k2-kwtop+1 ), 1_${ik}$, c1, &
                                 s1 )
                       call stdlib${ii}$_${ri}$lartg( a( k2+1, k2 ), a( k2+2, k2 ), c1, s1,temp )
                       a( k2+1, k2 ) = temp
                       a( k2+2, k2 ) = zero
                       call stdlib${ii}$_${ri}$rot( istopm-k2, a( k2+1, k2+1 ), lda, a( k2+2,k2+1 ), lda, c1,&
                                  s1 )
                       call stdlib${ii}$_${ri}$rot( istopm-k2, b( k2+1, k2+1 ), ldb, b( k2+2,k2+1 ), ldb, c1,&
                                  s1 )
                       call stdlib${ii}$_${ri}$rot( jw, qc( 1_${ik}$, k2+1-kwtop+1 ), 1_${ik}$, qc( 1_${ik}$,k2+2-kwtop+1 ), 1_${ik}$, &
                                 c1, s1 )
                    end do
                    ! remove the shift
                    call stdlib${ii}$_${ri}$lartg( b( kwbot, kwbot ), b( kwbot, kwbot-1 ), c1,s1, temp )
                              
                    b( kwbot, kwbot ) = temp
                    b( kwbot, kwbot-1 ) = zero
                    call stdlib${ii}$_${ri}$rot( kwbot-istartm, b( istartm, kwbot ), 1_${ik}$,b( istartm, kwbot-1 ),&
                               1_${ik}$, c1, s1 )
                    call stdlib${ii}$_${ri}$rot( kwbot-istartm+1, a( istartm, kwbot ), 1_${ik}$,a( istartm, kwbot-1 &
                              ), 1_${ik}$, c1, s1 )
                    call stdlib${ii}$_${ri}$rot( jw, zc( 1_${ik}$, kwbot-kwtop+1 ), 1_${ik}$, zc( 1_${ik}$,kwbot-1-kwtop+1 ), 1_${ik}$, &
                              c1, s1 )
                    k = k-1
                 end if
              end do
           end if
           ! apply qc and zc to rest of the matrix
           if ( ilschur ) then
              istartm = 1_${ik}$
              istopm = n
           else
              istartm = ilo
              istopm = ihi
           end if
           if ( istopm-ihi > 0_${ik}$ ) then
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', jw, istopm-ihi, jw, one, qc, ldqc,a( kwtop, ihi+1 ), &
                        lda, zero, work, jw )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', jw, istopm-ihi, work, jw, a( kwtop,ihi+1 ), lda )
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', jw, istopm-ihi, jw, one, qc, ldqc,b( kwtop, ihi+1 ), &
                        ldb, zero, work, jw )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', jw, istopm-ihi, work, jw, b( kwtop,ihi+1 ), ldb )
           end if
           if ( ilq ) then
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, jw, jw, one, q( 1_${ik}$, kwtop ), ldq, qc,ldqc, zero, &
                        work, n )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', n, jw, work, n, q( 1_${ik}$, kwtop ), ldq )
           end if
           if ( kwtop-1-istartm+1 > 0_${ik}$ ) then
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', kwtop-istartm, jw, jw, one, a( istartm,kwtop ), lda, &
                        zc, ldzc, zero, work,kwtop-istartm )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,a( istartm, kwtop &
                        ), lda )
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', kwtop-istartm, jw, jw, one, b( istartm,kwtop ), ldb, &
                        zc, ldzc, zero, work,kwtop-istartm )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', kwtop-istartm, jw, work, kwtop-istartm,b( istartm, kwtop &
                        ), ldb )
           end if
           if ( ilz ) then
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, jw, jw, one, z( 1_${ik}$, kwtop ), ldz, zc,ldzc, zero, &
                        work, n )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', n, jw, work, n, z( 1_${ik}$, kwtop ), ldz )
           end if
     end subroutine stdlib${ii}$_${ri}$laqz3

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_claqz3( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_desired, alpha,&
     !! CLAQZ3 Executes a single multishift QZ sweep
                beta, a, lda, b, ldb,q, ldq, z, ldz, qc, ldqc, zc, ldzc, work,lwork, info )
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! function arguments
           logical(lk), intent( in ) :: ilschur, ilq, ilz
           integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,nshifts, &
                     nblock_desired, ldqc, ldzc
           complex(sp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq, * ),z( ldz, * ), qc( &
                     ldqc, * ), zc( ldzc, * ), work( * ),alpha( * ), beta( * )
           integer(${ik}$), intent( out ) :: info
           
           ! ================================================================
           ! local scalars
           integer(${ik}$) :: i, j, ns, istartm, istopm, sheight, swidth, k, np, istartb, istopb, &
                     ishift, nblock, npos
           real(sp) :: safmin, safmax, c, scale
           complex(sp) :: temp, temp2, temp3, s
           info = 0_${ik}$
           if ( nblock_desired < nshifts+1 ) then
              info = -8_${ik}$
           end if
           if ( lwork ==-1_${ik}$ ) then
              ! workspace query, quick return
              work( 1_${ik}$ ) = n*nblock_desired
              return
           else if ( lwork < n*nblock_desired ) then
              info = -25_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CLAQZ3', -info )
              return
           end if
           ! executable statements
           ! get machine constants
           safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' )
           safmax = one/safmin
           call stdlib${ii}$_slabad( safmin, safmax )
           if ( ilo >= ihi ) then
              return
           end if
           if ( ilschur ) then
              istartm = 1_${ik}$
              istopm = n
           else
              istartm = ilo
              istopm = ihi
           end if
           ns = nshifts
           npos = max( nblock_desired-ns, 1_${ik}$ )
           ! the following block introduces the shifts and chases
           ! them down one by one just enough to make space for
           ! the other shifts. the near-the-diagonal block is
           ! of size (ns+1) x ns.
           call stdlib${ii}$_claset( 'FULL', ns+1, ns+1, czero, cone, qc, ldqc )
           call stdlib${ii}$_claset( 'FULL', ns, ns, czero, cone, zc, ldzc )
           do i = 1, ns
              ! introduce the shift
              scale = sqrt( abs( alpha( i ) ) ) * sqrt( abs( beta( i ) ) )
              if( scale >= safmin .and. scale <= safmax ) then
                 alpha( i ) = alpha( i )/scale
                 beta( i ) = beta( i )/scale
              end if
              temp2 = beta( i )*a( ilo, ilo )-alpha( i )*b( ilo, ilo )
              temp3 = beta( i )*a( ilo+1, ilo )
              if ( abs( temp2 ) > safmax .or.abs( temp3 ) > safmax ) then
                 temp2 = cone
                 temp3 = czero
              end if
              call stdlib${ii}$_clartg( temp2, temp3, c, s, temp )
              call stdlib${ii}$_crot( ns, a( ilo, ilo ), lda, a( ilo+1, ilo ), lda, c,s )
              call stdlib${ii}$_crot( ns, b( ilo, ilo ), ldb, b( ilo+1, ilo ), ldb, c,s )
              call stdlib${ii}$_crot( ns+1, qc( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, qc( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c, conjg( s ) )
              ! chase the shift down
              do j = 1, ns-i
                 call stdlib${ii}$_claqz1( .true., .true., j, 1_${ik}$, ns, ihi-ilo+1, a( ilo,ilo ), lda, b( &
                           ilo, ilo ), ldb, ns+1, 1_${ik}$, qc,ldqc, ns, 1_${ik}$, zc, ldzc )
              end do
           end do
           ! update the rest of the pencil
           ! update a(ilo:ilo+ns,ilo+ns:istopm) and b(ilo:ilo+ns,ilo+ns:istopm)
           ! from the left with qc(1:ns+1,1:ns+1)'
           sheight = ns+1
           swidth = istopm-( ilo+ns )+1_${ik}$
           if ( swidth > 0_${ik}$ ) then
              call stdlib${ii}$_cgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,a( ilo, ilo+&
                        ns ), lda, czero, work, sheight )
              call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight, a( ilo,ilo+ns ), lda )
                        
              call stdlib${ii}$_cgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,b( ilo, ilo+&
                        ns ), ldb, czero, work, sheight )
              call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight, b( ilo,ilo+ns ), ldb )
                        
           end if
           if ( ilq ) then
             call stdlib${ii}$_cgemm( 'N', 'N', n, sheight, sheight, cone, q( 1_${ik}$, ilo ),ldq, qc, ldqc, &
                       czero, work, n )
              call stdlib${ii}$_clacpy( 'ALL', n, sheight, work, n, q( 1_${ik}$, ilo ), ldq )
           end if
           ! update a(istartm:ilo-1,ilo:ilo+ns-1) and b(istartm:ilo-1,ilo:ilo+ns-1)
           ! from the right with zc(1:ns,1:ns)
           sheight = ilo-1-istartm+1
           swidth = ns
           if ( sheight > 0_${ik}$ ) then
              call stdlib${ii}$_cgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, ilo ), lda, &
                        zc, ldzc, czero, work,sheight )
              call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ilo ), lda )
                        
              call stdlib${ii}$_cgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, ilo ), ldb, &
                        zc, ldzc, czero, work,sheight )
              call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ilo ), ldb )
                        
           end if
           if ( ilz ) then
              call stdlib${ii}$_cgemm( 'N', 'N', n, swidth, swidth, cone, z( 1_${ik}$, ilo ),ldz, zc, ldzc, &
                        czero, work, n )
              call stdlib${ii}$_clacpy( 'ALL', n, swidth, work, n, z( 1_${ik}$, ilo ), ldz )
           end if
           ! the following block chases the shifts down to the bottom
           ! right block. if possible, a shift is moved down npos
           ! positions at a time
           k = ilo
           do while ( k < ihi-ns )
              np = min( ihi-ns-k, npos )
              ! size of the near-the-diagonal block
              nblock = ns+np
              ! istartb points to the first row we will be updating
              istartb = k+1
              ! istopb points to the last column we will be updating
              istopb = k+nblock-1
              call stdlib${ii}$_claset( 'FULL', ns+np, ns+np, czero, cone, qc, ldqc )
              call stdlib${ii}$_claset( 'FULL', ns+np, ns+np, czero, cone, zc, ldzc )
              ! near the diagonal shift chase
              do i = ns-1, 0, -1
                 do j = 0, np-1
                    ! move down the block with index k+i+j, updating
                    ! the (ns+np x ns+np) block:
                    ! (k:k+ns+np,k:k+ns+np-1)
                    call stdlib${ii}$_claqz1( .true., .true., k+i+j, istartb, istopb, ihi,a, lda, b, &
                              ldb, nblock, k+1, qc, ldqc,nblock, k, zc, ldzc )
                 end do
              end do
              ! update rest of the pencil
              ! update a(k+1:k+ns+np, k+ns+np:istopm) and
              ! b(k+1:k+ns+np, k+ns+np:istopm)
              ! from the left with qc(1:ns+np,1:ns+np)'
              sheight = ns+np
              swidth = istopm-( k+ns+np )+1_${ik}$
              if ( swidth > 0_${ik}$ ) then
                 call stdlib${ii}$_cgemm( 'C', 'N', sheight, swidth, sheight, cone, qc,ldqc, a( k+1, k+&
                           ns+np ), lda, czero, work,sheight )
                 call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight, a( k+1,k+ns+np ), lda &
                           )
                 call stdlib${ii}$_cgemm( 'C', 'N', sheight, swidth, sheight, cone, qc,ldqc, b( k+1, k+&
                           ns+np ), ldb, czero, work,sheight )
                 call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight, b( k+1,k+ns+np ), ldb &
                           )
              end if
              if ( ilq ) then
                 call stdlib${ii}$_cgemm( 'N', 'N', n, nblock, nblock, cone, q( 1_${ik}$, k+1 ),ldq, qc, ldqc, &
                           czero, work, n )
                 call stdlib${ii}$_clacpy( 'ALL', n, nblock, work, n, q( 1_${ik}$, k+1 ), ldq )
              end if
              ! update a(istartm:k,k:k+ns+npos-1) and b(istartm:k,k:k+ns+npos-1)
              ! from the right with zc(1:ns+np,1:ns+np)
              sheight = k-istartm+1
              swidth = nblock
              if ( sheight > 0_${ik}$ ) then
                 call stdlib${ii}$_cgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, k ), lda, &
                           zc, ldzc, czero, work,sheight )
                 call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight,a( istartm, k ), lda )
                           
                 call stdlib${ii}$_cgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, k ), ldb, &
                           zc, ldzc, czero, work,sheight )
                 call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight,b( istartm, k ), ldb )
                           
              end if
              if ( ilz ) then
                 call stdlib${ii}$_cgemm( 'N', 'N', n, nblock, nblock, cone, z( 1_${ik}$, k ),ldz, zc, ldzc, &
                           czero, work, n )
                 call stdlib${ii}$_clacpy( 'ALL', n, nblock, work, n, z( 1_${ik}$, k ), ldz )
              end if
              k = k+np
           end do
           ! the following block removes the shifts from the bottom right corner
           ! one by one. updates are initially applied to a(ihi-ns+1:ihi,ihi-ns:ihi).
           call stdlib${ii}$_claset( 'FULL', ns, ns, czero, cone, qc, ldqc )
           call stdlib${ii}$_claset( 'FULL', ns+1, ns+1, czero, cone, zc, ldzc )
           ! istartb points to the first row we will be updating
           istartb = ihi-ns+1
           ! istopb points to the last column we will be updating
           istopb = ihi
           do i = 1, ns
              ! chase the shift down to the bottom right corner
              do ishift = ihi-i, ihi-1
                 call stdlib${ii}$_claqz1( .true., .true., ishift, istartb, istopb, ihi,a, lda, b, ldb, &
                           ns, ihi-ns+1, qc, ldqc, ns+1,ihi-ns, zc, ldzc )
              end do
           end do
           ! update rest of the pencil
           ! update a(ihi-ns+1:ihi, ihi+1:istopm)
           ! from the left with qc(1:ns,1:ns)'
           sheight = ns
           swidth = istopm-( ihi+1 )+1_${ik}$
           if ( swidth > 0_${ik}$ ) then
              call stdlib${ii}$_cgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,a( ihi-ns+1, &
                        ihi+1 ), lda, czero, work, sheight )
              call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight,a( ihi-ns+1, ihi+1 ), lda &
                        )
              call stdlib${ii}$_cgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,b( ihi-ns+1, &
                        ihi+1 ), ldb, czero, work, sheight )
              call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight,b( ihi-ns+1, ihi+1 ), ldb &
                        )
           end if
           if ( ilq ) then
              call stdlib${ii}$_cgemm( 'N', 'N', n, ns, ns, cone, q( 1_${ik}$, ihi-ns+1 ), ldq,qc, ldqc, czero,&
                         work, n )
              call stdlib${ii}$_clacpy( 'ALL', n, ns, work, n, q( 1_${ik}$, ihi-ns+1 ), ldq )
           end if
           ! update a(istartm:ihi-ns,ihi-ns:ihi)
           ! from the right with zc(1:ns+1,1:ns+1)
           sheight = ihi-ns-istartm+1
           swidth = ns+1
           if ( sheight > 0_${ik}$ ) then
              call stdlib${ii}$_cgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, ihi-ns ), &
                        lda, zc, ldzc, czero, work,sheight )
              call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ihi-ns ), lda &
                        )
              call stdlib${ii}$_cgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, ihi-ns ), &
                        ldb, zc, ldzc, czero, work,sheight )
              call stdlib${ii}$_clacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ihi-ns ), ldb &
                        )
           end if
           if ( ilz ) then
              call stdlib${ii}$_cgemm( 'N', 'N', n, ns+1, ns+1, cone, z( 1_${ik}$, ihi-ns ), ldz,zc, ldzc, &
                        czero, work, n )
              call stdlib${ii}$_clacpy( 'ALL', n, ns+1, work, n, z( 1_${ik}$, ihi-ns ), ldz )
           end if
     end subroutine stdlib${ii}$_claqz3

     pure module subroutine stdlib${ii}$_zlaqz3( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_desired, alpha,&
     !! ZLAQZ3 Executes a single multishift QZ sweep
                beta, a, lda, b, ldb,q, ldq, z, ldz, qc, ldqc, zc, ldzc, work,lwork, info )
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! function arguments
           logical(lk), intent( in ) :: ilschur, ilq, ilz
           integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,nshifts, &
                     nblock_desired, ldqc, ldzc
           complex(dp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq,* ), z( ldz, * ), qc( &
                     ldqc, * ), zc( ldzc, * ), work( * ),alpha( * ), beta( * )
           integer(${ik}$), intent( out ) :: info
           
           ! ================================================================
           ! local scalars
           integer(${ik}$) :: i, j, ns, istartm, istopm, sheight, swidth, k, np, istartb, istopb, &
                     ishift, nblock, npos
           real(dp) :: safmin, safmax, c, scale
           complex(dp) :: temp, temp2, temp3, s
           info = 0_${ik}$
           if ( nblock_desired < nshifts+1 ) then
              info = -8_${ik}$
           end if
           if ( lwork ==-1_${ik}$ ) then
              ! workspace query, quick return
              work( 1_${ik}$ ) = n*nblock_desired
              return
           else if ( lwork < n*nblock_desired ) then
              info = -25_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZLAQZ3', -info )
              return
           end if
           ! executable statements
           ! get machine constants
           safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' )
           safmax = one/safmin
           call stdlib${ii}$_dlabad( safmin, safmax )
           if ( ilo >= ihi ) then
              return
           end if
           if ( ilschur ) then
              istartm = 1_${ik}$
              istopm = n
           else
              istartm = ilo
              istopm = ihi
           end if
           ns = nshifts
           npos = max( nblock_desired-ns, 1_${ik}$ )
           ! the following block introduces the shifts and chases
           ! them down one by one just enough to make space for
           ! the other shifts. the near-the-diagonal block is
           ! of size (ns+1) x ns.
           call stdlib${ii}$_zlaset( 'FULL', ns+1, ns+1, czero, cone, qc, ldqc )
           call stdlib${ii}$_zlaset( 'FULL', ns, ns, czero, cone, zc, ldzc )
           do i = 1, ns
              ! introduce the shift
              scale = sqrt( abs( alpha( i ) ) ) * sqrt( abs( beta( i ) ) )
              if( scale >= safmin .and. scale <= safmax ) then
                 alpha( i ) = alpha( i )/scale
                 beta( i ) = beta( i )/scale
              end if
              temp2 = beta( i )*a( ilo, ilo )-alpha( i )*b( ilo, ilo )
              temp3 = beta( i )*a( ilo+1, ilo )
              if ( abs( temp2 ) > safmax .or.abs( temp3 ) > safmax ) then
                 temp2 = cone
                 temp3 = czero
              end if
              call stdlib${ii}$_zlartg( temp2, temp3, c, s, temp )
              call stdlib${ii}$_zrot( ns, a( ilo, ilo ), lda, a( ilo+1, ilo ), lda, c,s )
              call stdlib${ii}$_zrot( ns, b( ilo, ilo ), ldb, b( ilo+1, ilo ), ldb, c,s )
              call stdlib${ii}$_zrot( ns+1, qc( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, qc( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c,conjg( s ) )
              ! chase the shift down
              do j = 1, ns-i
                 call stdlib${ii}$_zlaqz1( .true., .true., j, 1_${ik}$, ns, ihi-ilo+1, a( ilo,ilo ), lda, b( &
                           ilo, ilo ), ldb, ns+1, 1_${ik}$, qc,ldqc, ns, 1_${ik}$, zc, ldzc )
              end do
           end do
           ! update the rest of the pencil
           ! update a(ilo:ilo+ns,ilo+ns:istopm) and b(ilo:ilo+ns,ilo+ns:istopm)
           ! from the left with qc(1:ns+1,1:ns+1)'
           sheight = ns+1
           swidth = istopm-( ilo+ns )+1_${ik}$
           if ( swidth > 0_${ik}$ ) then
              call stdlib${ii}$_zgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,a( ilo, ilo+&
                        ns ), lda, czero, work, sheight )
              call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight, a( ilo,ilo+ns ), lda )
                        
              call stdlib${ii}$_zgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,b( ilo, ilo+&
                        ns ), ldb, czero, work, sheight )
              call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight, b( ilo,ilo+ns ), ldb )
                        
           end if
           if ( ilq ) then
              call stdlib${ii}$_zgemm( 'N', 'N', n, sheight, sheight, cone, q( 1_${ik}$, ilo ),ldq, qc, ldqc, &
                        czero, work, n )
              call stdlib${ii}$_zlacpy( 'ALL', n, sheight, work, n, q( 1_${ik}$, ilo ), ldq )
           end if
           ! update a(istartm:ilo-1,ilo:ilo+ns-1) and b(istartm:ilo-1,ilo:ilo+ns-1)
           ! from the right with zc(1:ns,1:ns)
           sheight = ilo-1-istartm+1
           swidth = ns
           if ( sheight > 0_${ik}$ ) then
              call stdlib${ii}$_zgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, ilo ), lda, &
                        zc, ldzc, czero, work,sheight )
              call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ilo ), lda )
                        
              call stdlib${ii}$_zgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, ilo ), ldb, &
                        zc, ldzc, czero, work,sheight )
              call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ilo ), ldb )
                        
           end if
           if ( ilz ) then
              call stdlib${ii}$_zgemm( 'N', 'N', n, swidth, swidth, cone, z( 1_${ik}$, ilo ),ldz, zc, ldzc, &
                        czero, work, n )
              call stdlib${ii}$_zlacpy( 'ALL', n, swidth, work, n, z( 1_${ik}$, ilo ), ldz )
           end if
           ! the following block chases the shifts down to the bottom
           ! right block. if possible, a shift is moved down npos
           ! positions at a time
           k = ilo
           do while ( k < ihi-ns )
              np = min( ihi-ns-k, npos )
              ! size of the near-the-diagonal block
              nblock = ns+np
              ! istartb points to the first row we will be updating
              istartb = k+1
              ! istopb points to the last column we will be updating
              istopb = k+nblock-1
              call stdlib${ii}$_zlaset( 'FULL', ns+np, ns+np, czero, cone, qc, ldqc )
              call stdlib${ii}$_zlaset( 'FULL', ns+np, ns+np, czero, cone, zc, ldzc )
              ! near the diagonal shift chase
              do i = ns-1, 0, -1
                 do j = 0, np-1
                    ! move down the block with index k+i+j, updating
                    ! the (ns+np x ns+np) block:
                    ! (k:k+ns+np,k:k+ns+np-1)
                    call stdlib${ii}$_zlaqz1( .true., .true., k+i+j, istartb, istopb, ihi,a, lda, b, &
                              ldb, nblock, k+1, qc, ldqc,nblock, k, zc, ldzc )
                 end do
              end do
              ! update rest of the pencil
              ! update a(k+1:k+ns+np, k+ns+np:istopm) and
              ! b(k+1:k+ns+np, k+ns+np:istopm)
              ! from the left with qc(1:ns+np,1:ns+np)'
              sheight = ns+np
              swidth = istopm-( k+ns+np )+1_${ik}$
              if ( swidth > 0_${ik}$ ) then
                 call stdlib${ii}$_zgemm( 'C', 'N', sheight, swidth, sheight, cone, qc,ldqc, a( k+1, k+&
                           ns+np ), lda, czero, work,sheight )
                 call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight, a( k+1,k+ns+np ), lda &
                           )
                 call stdlib${ii}$_zgemm( 'C', 'N', sheight, swidth, sheight, cone, qc,ldqc, b( k+1, k+&
                           ns+np ), ldb, czero, work,sheight )
                 call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight, b( k+1,k+ns+np ), ldb &
                           )
              end if
              if ( ilq ) then
                 call stdlib${ii}$_zgemm( 'N', 'N', n, nblock, nblock, cone, q( 1_${ik}$, k+1 ),ldq, qc, ldqc, &
                           czero, work, n )
                 call stdlib${ii}$_zlacpy( 'ALL', n, nblock, work, n, q( 1_${ik}$, k+1 ), ldq )
              end if
              ! update a(istartm:k,k:k+ns+npos-1) and b(istartm:k,k:k+ns+npos-1)
              ! from the right with zc(1:ns+np,1:ns+np)
              sheight = k-istartm+1
              swidth = nblock
              if ( sheight > 0_${ik}$ ) then
                 call stdlib${ii}$_zgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, k ), lda, &
                           zc, ldzc, czero, work,sheight )
                 call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight,a( istartm, k ), lda )
                           
                 call stdlib${ii}$_zgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, k ), ldb, &
                           zc, ldzc, czero, work,sheight )
                 call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight,b( istartm, k ), ldb )
                           
              end if
              if ( ilz ) then
                 call stdlib${ii}$_zgemm( 'N', 'N', n, nblock, nblock, cone, z( 1_${ik}$, k ),ldz, zc, ldzc, &
                           czero, work, n )
                 call stdlib${ii}$_zlacpy( 'ALL', n, nblock, work, n, z( 1_${ik}$, k ), ldz )
              end if
              k = k+np
           end do
           ! the following block removes the shifts from the bottom right corner
           ! one by one. updates are initially applied to a(ihi-ns+1:ihi,ihi-ns:ihi).
           call stdlib${ii}$_zlaset( 'FULL', ns, ns, czero, cone, qc, ldqc )
           call stdlib${ii}$_zlaset( 'FULL', ns+1, ns+1, czero, cone, zc, ldzc )
           ! istartb points to the first row we will be updating
           istartb = ihi-ns+1
           ! istopb points to the last column we will be updating
           istopb = ihi
           do i = 1, ns
              ! chase the shift down to the bottom right corner
              do ishift = ihi-i, ihi-1
                 call stdlib${ii}$_zlaqz1( .true., .true., ishift, istartb, istopb, ihi,a, lda, b, ldb, &
                           ns, ihi-ns+1, qc, ldqc, ns+1,ihi-ns, zc, ldzc )
              end do
           end do
           ! update rest of the pencil
           ! update a(ihi-ns+1:ihi, ihi+1:istopm)
           ! from the left with qc(1:ns,1:ns)'
           sheight = ns
           swidth = istopm-( ihi+1 )+1_${ik}$
           if ( swidth > 0_${ik}$ ) then
              call stdlib${ii}$_zgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,a( ihi-ns+1, &
                        ihi+1 ), lda, czero, work, sheight )
              call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight,a( ihi-ns+1, ihi+1 ), lda &
                        )
              call stdlib${ii}$_zgemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,b( ihi-ns+1, &
                        ihi+1 ), ldb, czero, work, sheight )
              call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight,b( ihi-ns+1, ihi+1 ), ldb &
                        )
           end if
           if ( ilq ) then
              call stdlib${ii}$_zgemm( 'N', 'N', n, ns, ns, cone, q( 1_${ik}$, ihi-ns+1 ), ldq,qc, ldqc, czero,&
                         work, n )
              call stdlib${ii}$_zlacpy( 'ALL', n, ns, work, n, q( 1_${ik}$, ihi-ns+1 ), ldq )
           end if
           ! update a(istartm:ihi-ns,ihi-ns:ihi)
           ! from the right with zc(1:ns+1,1:ns+1)
           sheight = ihi-ns-istartm+1
           swidth = ns+1
           if ( sheight > 0_${ik}$ ) then
              call stdlib${ii}$_zgemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, ihi-ns ), &
                        lda, zc, ldzc, czero, work,sheight )
              call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ihi-ns ), lda &
                        )
              call stdlib${ii}$_zgemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, ihi-ns ), &
                        ldb, zc, ldzc, czero, work,sheight )
              call stdlib${ii}$_zlacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ihi-ns ), ldb &
                        )
           end if
           if ( ilz ) then
              call stdlib${ii}$_zgemm( 'N', 'N', n, ns+1, ns+1, cone, z( 1_${ik}$, ihi-ns ), ldz,zc, ldzc, &
                        czero, work, n )
              call stdlib${ii}$_zlacpy( 'ALL', n, ns+1, work, n, z( 1_${ik}$, ihi-ns ), ldz )
           end if
     end subroutine stdlib${ii}$_zlaqz3

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$laqz3( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_qesired, alpha,&
     !! ZLAQZ3: Executes a single multishift QZ sweep
                beta, a, lda, b, ldb,q, ldq, z, ldz, qc, ldqc, zc, ldzc, work,lwork, info )
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! function arguments
           logical(lk), intent( in ) :: ilschur, ilq, ilz
           integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,nshifts, &
                     nblock_qesired, ldqc, ldzc
           complex(${ck}$), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq,* ), z( ldz, * ), qc( &
                     ldqc, * ), zc( ldzc, * ), work( * ),alpha( * ), beta( * )
           integer(${ik}$), intent( out ) :: info
           
           ! ================================================================
           ! local scalars
           integer(${ik}$) :: i, j, ns, istartm, istopm, sheight, swidth, k, np, istartb, istopb, &
                     ishift, nblock, npos
           real(${ck}$) :: safmin, safmax, c, scale
           complex(${ck}$) :: temp, temp2, temp3, s
           info = 0_${ik}$
           if ( nblock_qesired < nshifts+1 ) then
              info = -8_${ik}$
           end if
           if ( lwork ==-1_${ik}$ ) then
              ! workspace query, quick return
              work( 1_${ik}$ ) = n*nblock_qesired
              return
           else if ( lwork < n*nblock_qesired ) then
              info = -25_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZLAQZ3', -info )
              return
           end if
           ! executable statements
           ! get machine constants
           safmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'SAFE MINIMUM' )
           safmax = one/safmin
           call stdlib${ii}$_${c2ri(ci)}$labad( safmin, safmax )
           if ( ilo >= ihi ) then
              return
           end if
           if ( ilschur ) then
              istartm = 1_${ik}$
              istopm = n
           else
              istartm = ilo
              istopm = ihi
           end if
           ns = nshifts
           npos = max( nblock_qesired-ns, 1_${ik}$ )
           ! the following block introduces the shifts and chases
           ! them down one by one just enough to make space for
           ! the other shifts. the near-the-diagonal block is
           ! of size (ns+1) x ns.
           call stdlib${ii}$_${ci}$laset( 'FULL', ns+1, ns+1, czero, cone, qc, ldqc )
           call stdlib${ii}$_${ci}$laset( 'FULL', ns, ns, czero, cone, zc, ldzc )
           do i = 1, ns
              ! introduce the shift
              scale = sqrt( abs( alpha( i ) ) ) * sqrt( abs( beta( i ) ) )
              if( scale >= safmin .and. scale <= safmax ) then
                 alpha( i ) = alpha( i )/scale
                 beta( i ) = beta( i )/scale
              end if
              temp2 = beta( i )*a( ilo, ilo )-alpha( i )*b( ilo, ilo )
              temp3 = beta( i )*a( ilo+1, ilo )
              if ( abs( temp2 ) > safmax .or.abs( temp3 ) > safmax ) then
                 temp2 = cone
                 temp3 = czero
              end if
              call stdlib${ii}$_${ci}$lartg( temp2, temp3, c, s, temp )
              call stdlib${ii}$_${ci}$rot( ns, a( ilo, ilo ), lda, a( ilo+1, ilo ), lda, c,s )
              call stdlib${ii}$_${ci}$rot( ns, b( ilo, ilo ), ldb, b( ilo+1, ilo ), ldb, c,s )
              call stdlib${ii}$_${ci}$rot( ns+1, qc( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, qc( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c,conjg( s ) )
              ! chase the shift down
              do j = 1, ns-i
                 call stdlib${ii}$_${ci}$laqz1( .true., .true., j, 1_${ik}$, ns, ihi-ilo+1, a( ilo,ilo ), lda, b( &
                           ilo, ilo ), ldb, ns+1, 1_${ik}$, qc,ldqc, ns, 1_${ik}$, zc, ldzc )
              end do
           end do
           ! update the rest of the pencil
           ! update a(ilo:ilo+ns,ilo+ns:istopm) and b(ilo:ilo+ns,ilo+ns:istopm)
           ! from the left with qc(1:ns+1,1:ns+1)'
           sheight = ns+1
           swidth = istopm-( ilo+ns )+1_${ik}$
           if ( swidth > 0_${ik}$ ) then
              call stdlib${ii}$_${ci}$gemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,a( ilo, ilo+&
                        ns ), lda, czero, work, sheight )
              call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight, a( ilo,ilo+ns ), lda )
                        
              call stdlib${ii}$_${ci}$gemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,b( ilo, ilo+&
                        ns ), ldb, czero, work, sheight )
              call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight, b( ilo,ilo+ns ), ldb )
                        
           end if
           if ( ilq ) then
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', n, sheight, sheight, cone, q( 1_${ik}$, ilo ),ldq, qc, ldqc, &
                        czero, work, n )
              call stdlib${ii}$_${ci}$lacpy( 'ALL', n, sheight, work, n, q( 1_${ik}$, ilo ), ldq )
           end if
           ! update a(istartm:ilo-1,ilo:ilo+ns-1) and b(istartm:ilo-1,ilo:ilo+ns-1)
           ! from the right with zc(1:ns,1:ns)
           sheight = ilo-1-istartm+1
           swidth = ns
           if ( sheight > 0_${ik}$ ) then
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, ilo ), lda, &
                        zc, ldzc, czero, work,sheight )
              call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ilo ), lda )
                        
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, ilo ), ldb, &
                        zc, ldzc, czero, work,sheight )
              call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ilo ), ldb )
                        
           end if
           if ( ilz ) then
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', n, swidth, swidth, cone, z( 1_${ik}$, ilo ),ldz, zc, ldzc, &
                        czero, work, n )
              call stdlib${ii}$_${ci}$lacpy( 'ALL', n, swidth, work, n, z( 1_${ik}$, ilo ), ldz )
           end if
           ! the following block chases the shifts down to the bottom
           ! right block. if possible, a shift is moved down npos
           ! positions at a time
           k = ilo
           do while ( k < ihi-ns )
              np = min( ihi-ns-k, npos )
              ! size of the near-the-diagonal block
              nblock = ns+np
              ! istartb points to the first row we will be updating
              istartb = k+1
              ! istopb points to the last column we will be updating
              istopb = k+nblock-1
              call stdlib${ii}$_${ci}$laset( 'FULL', ns+np, ns+np, czero, cone, qc, ldqc )
              call stdlib${ii}$_${ci}$laset( 'FULL', ns+np, ns+np, czero, cone, zc, ldzc )
              ! near the diagonal shift chase
              do i = ns-1, 0, -1
                 do j = 0, np-1
                    ! move down the block with index k+i+j, updating
                    ! the (ns+np x ns+np) block:
                    ! (k:k+ns+np,k:k+ns+np-1)
                    call stdlib${ii}$_${ci}$laqz1( .true., .true., k+i+j, istartb, istopb, ihi,a, lda, b, &
                              ldb, nblock, k+1, qc, ldqc,nblock, k, zc, ldzc )
                 end do
              end do
              ! update rest of the pencil
              ! update a(k+1:k+ns+np, k+ns+np:istopm) and
              ! b(k+1:k+ns+np, k+ns+np:istopm)
              ! from the left with qc(1:ns+np,1:ns+np)'
              sheight = ns+np
              swidth = istopm-( k+ns+np )+1_${ik}$
              if ( swidth > 0_${ik}$ ) then
                 call stdlib${ii}$_${ci}$gemm( 'C', 'N', sheight, swidth, sheight, cone, qc,ldqc, a( k+1, k+&
                           ns+np ), lda, czero, work,sheight )
                 call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight, a( k+1,k+ns+np ), lda &
                           )
                 call stdlib${ii}$_${ci}$gemm( 'C', 'N', sheight, swidth, sheight, cone, qc,ldqc, b( k+1, k+&
                           ns+np ), ldb, czero, work,sheight )
                 call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight, b( k+1,k+ns+np ), ldb &
                           )
              end if
              if ( ilq ) then
                 call stdlib${ii}$_${ci}$gemm( 'N', 'N', n, nblock, nblock, cone, q( 1_${ik}$, k+1 ),ldq, qc, ldqc, &
                           czero, work, n )
                 call stdlib${ii}$_${ci}$lacpy( 'ALL', n, nblock, work, n, q( 1_${ik}$, k+1 ), ldq )
              end if
              ! update a(istartm:k,k:k+ns+npos-1) and b(istartm:k,k:k+ns+npos-1)
              ! from the right with zc(1:ns+np,1:ns+np)
              sheight = k-istartm+1
              swidth = nblock
              if ( sheight > 0_${ik}$ ) then
                 call stdlib${ii}$_${ci}$gemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, k ), lda, &
                           zc, ldzc, czero, work,sheight )
                 call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight,a( istartm, k ), lda )
                           
                 call stdlib${ii}$_${ci}$gemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, k ), ldb, &
                           zc, ldzc, czero, work,sheight )
                 call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight,b( istartm, k ), ldb )
                           
              end if
              if ( ilz ) then
                 call stdlib${ii}$_${ci}$gemm( 'N', 'N', n, nblock, nblock, cone, z( 1_${ik}$, k ),ldz, zc, ldzc, &
                           czero, work, n )
                 call stdlib${ii}$_${ci}$lacpy( 'ALL', n, nblock, work, n, z( 1_${ik}$, k ), ldz )
              end if
              k = k+np
           end do
           ! the following block removes the shifts from the bottom right corner
           ! one by one. updates are initially applied to a(ihi-ns+1:ihi,ihi-ns:ihi).
           call stdlib${ii}$_${ci}$laset( 'FULL', ns, ns, czero, cone, qc, ldqc )
           call stdlib${ii}$_${ci}$laset( 'FULL', ns+1, ns+1, czero, cone, zc, ldzc )
           ! istartb points to the first row we will be updating
           istartb = ihi-ns+1
           ! istopb points to the last column we will be updating
           istopb = ihi
           do i = 1, ns
              ! chase the shift down to the bottom right corner
              do ishift = ihi-i, ihi-1
                 call stdlib${ii}$_${ci}$laqz1( .true., .true., ishift, istartb, istopb, ihi,a, lda, b, ldb, &
                           ns, ihi-ns+1, qc, ldqc, ns+1,ihi-ns, zc, ldzc )
              end do
           end do
           ! update rest of the pencil
           ! update a(ihi-ns+1:ihi, ihi+1:istopm)
           ! from the left with qc(1:ns,1:ns)'
           sheight = ns
           swidth = istopm-( ihi+1 )+1_${ik}$
           if ( swidth > 0_${ik}$ ) then
              call stdlib${ii}$_${ci}$gemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,a( ihi-ns+1, &
                        ihi+1 ), lda, czero, work, sheight )
              call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight,a( ihi-ns+1, ihi+1 ), lda &
                        )
              call stdlib${ii}$_${ci}$gemm( 'C', 'N', sheight, swidth, sheight, cone, qc, ldqc,b( ihi-ns+1, &
                        ihi+1 ), ldb, czero, work, sheight )
              call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight,b( ihi-ns+1, ihi+1 ), ldb &
                        )
           end if
           if ( ilq ) then
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', n, ns, ns, cone, q( 1_${ik}$, ihi-ns+1 ), ldq,qc, ldqc, czero,&
                         work, n )
              call stdlib${ii}$_${ci}$lacpy( 'ALL', n, ns, work, n, q( 1_${ik}$, ihi-ns+1 ), ldq )
           end if
           ! update a(istartm:ihi-ns,ihi-ns:ihi)
           ! from the right with zc(1:ns+1,1:ns+1)
           sheight = ihi-ns-istartm+1
           swidth = ns+1
           if ( sheight > 0_${ik}$ ) then
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', sheight, swidth, swidth, cone,a( istartm, ihi-ns ), &
                        lda, zc, ldzc, czero, work,sheight )
              call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ihi-ns ), lda &
                        )
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', sheight, swidth, swidth, cone,b( istartm, ihi-ns ), &
                        ldb, zc, ldzc, czero, work,sheight )
              call stdlib${ii}$_${ci}$lacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ihi-ns ), ldb &
                        )
           end if
           if ( ilz ) then
              call stdlib${ii}$_${ci}$gemm( 'N', 'N', n, ns+1, ns+1, cone, z( 1_${ik}$, ihi-ns ), ldz,zc, ldzc, &
                        czero, work, n )
              call stdlib${ii}$_${ci}$lacpy( 'ALL', n, ns+1, work, n, z( 1_${ik}$, ihi-ns ), ldz )
           end if
     end subroutine stdlib${ii}$_${ci}$laqz3

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slaqz4( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_desired, sr, &
     !! SLAQZ4 Executes a single multishift QZ sweep
               si, ss, a, lda, b, ldb, q,ldq, z, ldz, qc, ldqc, zc, ldzc, work, lwork,info )
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! function arguments
           logical(lk), intent( in ) :: ilschur, ilq, ilz
           integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,nshifts, &
                     nblock_desired, ldqc, ldzc
           real(sp), intent( inout ) :: a( lda, * ), b( ldb, * ), q( ldq, * ),z( ldz, * ), qc( &
                     ldqc, * ), zc( ldzc, * ), work( * ), sr( * ),si( * ), ss( * )
           integer(${ik}$), intent( out ) :: info
           ! ================================================================
           ! local variables
           integer(${ik}$) :: i, j, ns, istartm, istopm, sheight, swidth, k, np, istartb, istopb, &
                     ishift, nblock, npos
           real(sp) :: temp, v(3_${ik}$), c1, s1, c2, s2, swap
           info = 0_${ik}$
           if ( nblock_desired < nshifts+1 ) then
              info = -8_${ik}$
           end if
           if ( lwork ==-1_${ik}$ ) then
              ! workspace query, quick return
              work( 1_${ik}$ ) = n*nblock_desired
              return
           else if ( lwork < n*nblock_desired ) then
              info = -25_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SLAQZ4', -info )
              return
           end if
           ! executable statements
           if ( nshifts < 2_${ik}$ ) then
              return
           end if
           if ( ilo >= ihi ) then
              return
           end if
           if ( ilschur ) then
              istartm = 1_${ik}$
              istopm = n
           else
              istartm = ilo
              istopm = ihi
           end if
           ! shuffle shifts into pairs of real shifts and pairs
           ! of complex conjugate shifts assuming complex
           ! conjugate shifts are already adjacent to one
           ! another
           do i = 1, nshifts-2, 2
              if( si( i )/=-si( i+1 ) ) then
                 swap = sr( i )
                 sr( i ) = sr( i+1 )
                 sr( i+1 ) = sr( i+2 )
                 sr( i+2 ) = swap
                 swap = si( i )
                 si( i ) = si( i+1 )
                 si( i+1 ) = si( i+2 )
                 si( i+2 ) = swap
                 swap = ss( i )
                 ss( i ) = ss( i+1 )
                 ss( i+1 ) = ss( i+2 )
                 ss( i+2 ) = swap
              end if
           end do
           ! nshfts is supposed to be even, but if it is odd,
           ! then simply reduce it by one.  the shuffle above
           ! ensures that the dropped shift is real and that
           ! the remaining shifts are paired.
           ns = nshifts-mod( nshifts, 2_${ik}$ )
           npos = max( nblock_desired-ns, 1_${ik}$ )
           ! the following block introduces the shifts and chases
           ! them down one by one just enough to make space for
           ! the other shifts. the near-the-diagonal block is
           ! of size (ns+1) x ns.
           call stdlib${ii}$_slaset( 'FULL', ns+1, ns+1, zero, one, qc, ldqc )
           call stdlib${ii}$_slaset( 'FULL', ns, ns, zero, one, zc, ldzc )
           do i = 1, ns, 2
              ! introduce the shift
              call stdlib${ii}$_slaqz1( a( ilo, ilo ), lda, b( ilo, ilo ), ldb, sr( i ),sr( i+1 ), si( &
                        i ), ss( i ), ss( i+1 ), v )
              temp = v( 2_${ik}$ )
              call stdlib${ii}$_slartg( temp, v( 3_${ik}$ ), c1, s1, v( 2_${ik}$ ) )
              call stdlib${ii}$_slartg( v( 1_${ik}$ ), v( 2_${ik}$ ), c2, s2, temp )
              call stdlib${ii}$_srot( ns, a( ilo+1, ilo ), lda, a( ilo+2, ilo ), lda, c1,s1 )
              call stdlib${ii}$_srot( ns, a( ilo, ilo ), lda, a( ilo+1, ilo ), lda, c2,s2 )
              call stdlib${ii}$_srot( ns, b( ilo+1, ilo ), ldb, b( ilo+2, ilo ), ldb, c1,s1 )
              call stdlib${ii}$_srot( ns, b( ilo, ilo ), ldb, b( ilo+1, ilo ), ldb, c2,s2 )
              call stdlib${ii}$_srot( ns+1, qc( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, qc( 1_${ik}$, 3_${ik}$ ), 1_${ik}$, c1, s1 )
              call stdlib${ii}$_srot( ns+1, qc( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, qc( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c2, s2 )
              ! chase the shift down
              do j = 1, ns-1-i
                 call stdlib${ii}$_slaqz2( .true., .true., j, 1_${ik}$, ns, ihi-ilo+1, a( ilo,ilo ), lda, b( &
                           ilo, ilo ), ldb, ns+1, 1_${ik}$, qc,ldqc, ns, 1_${ik}$, zc, ldzc )
              end do
           end do
           ! update the rest of the pencil
           ! update a(ilo:ilo+ns,ilo+ns:istopm) and b(ilo:ilo+ns,ilo+ns:istopm)
           ! from the left with qc(1:ns+1,1:ns+1)'
           sheight = ns+1
           swidth = istopm-( ilo+ns )+1_${ik}$
           if ( swidth > 0_${ik}$ ) then
              call stdlib${ii}$_sgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,a( ilo, ilo+ns &
                        ), lda, zero, work, sheight )
              call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight, a( ilo,ilo+ns ), lda )
                        
              call stdlib${ii}$_sgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,b( ilo, ilo+ns &
                        ), ldb, zero, work, sheight )
              call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight, b( ilo,ilo+ns ), ldb )
                        
           end if
           if ( ilq ) then
              call stdlib${ii}$_sgemm( 'N', 'N', n, sheight, sheight, one, q( 1_${ik}$, ilo ),ldq, qc, ldqc, &
                        zero, work, n )
              call stdlib${ii}$_slacpy( 'ALL', n, sheight, work, n, q( 1_${ik}$, ilo ), ldq )
           end if
           ! update a(istartm:ilo-1,ilo:ilo+ns-1) and b(istartm:ilo-1,ilo:ilo+ns-1)
           ! from the right with zc(1:ns,1:ns)
           sheight = ilo-1-istartm+1
           swidth = ns
           if ( sheight > 0_${ik}$ ) then
              call stdlib${ii}$_sgemm( 'N', 'N', sheight, swidth, swidth, one, a( istartm,ilo ), lda, &
                        zc, ldzc, zero, work, sheight )
              call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ilo ), lda )
                        
              call stdlib${ii}$_sgemm( 'N', 'N', sheight, swidth, swidth, one, b( istartm,ilo ), ldb, &
                        zc, ldzc, zero, work, sheight )
              call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ilo ), ldb )
                        
           end if
           if ( ilz ) then
              call stdlib${ii}$_sgemm( 'N', 'N', n, swidth, swidth, one, z( 1_${ik}$, ilo ), ldz,zc, ldzc, &
                        zero, work, n )
              call stdlib${ii}$_slacpy( 'ALL', n, swidth, work, n, z( 1_${ik}$, ilo ), ldz )
           end if
           ! the following block chases the shifts down to the bottom
           ! right block. if possible, a shift is moved down npos
           ! positions at a time
           k = ilo
           do while ( k < ihi-ns )
              np = min( ihi-ns-k, npos )
              ! size of the near-the-diagonal block
              nblock = ns+np
              ! istartb points to the first row we will be updating
              istartb = k+1
              ! istopb points to the last column we will be updating
              istopb = k+nblock-1
              call stdlib${ii}$_slaset( 'FULL', ns+np, ns+np, zero, one, qc, ldqc )
              call stdlib${ii}$_slaset( 'FULL', ns+np, ns+np, zero, one, zc, ldzc )
              ! near the diagonal shift chase
              do i = ns-1, 0, -2
                 do j = 0, np-1
                    ! move down the block with index k+i+j-1, updating
                    ! the (ns+np x ns+np) block:
                    ! (k:k+ns+np,k:k+ns+np-1)
                    call stdlib${ii}$_slaqz2( .true., .true., k+i+j-1, istartb, istopb,ihi, a, lda, b, &
                              ldb, nblock, k+1, qc, ldqc,nblock, k, zc, ldzc )
                 end do
              end do
              ! update rest of the pencil
              ! update a(k+1:k+ns+np, k+ns+np:istopm) and
              ! b(k+1:k+ns+np, k+ns+np:istopm)
              ! from the left with qc(1:ns+np,1:ns+np)'
              sheight = ns+np
              swidth = istopm-( k+ns+np )+1_${ik}$
              if ( swidth > 0_${ik}$ ) then
                 call stdlib${ii}$_sgemm( 'T', 'N', sheight, swidth, sheight, one, qc,ldqc, a( k+1, k+&
                           ns+np ), lda, zero, work,sheight )
                 call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight, a( k+1,k+ns+np ), lda &
                           )
                 call stdlib${ii}$_sgemm( 'T', 'N', sheight, swidth, sheight, one, qc,ldqc, b( k+1, k+&
                           ns+np ), ldb, zero, work,sheight )
                 call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight, b( k+1,k+ns+np ), ldb &
                           )
              end if
              if ( ilq ) then
                 call stdlib${ii}$_sgemm( 'N', 'N', n, nblock, nblock, one, q( 1_${ik}$, k+1 ),ldq, qc, ldqc, &
                           zero, work, n )
                 call stdlib${ii}$_slacpy( 'ALL', n, nblock, work, n, q( 1_${ik}$, k+1 ), ldq )
              end if
              ! update a(istartm:k,k:k+ns+npos-1) and b(istartm:k,k:k+ns+npos-1)
              ! from the right with zc(1:ns+np,1:ns+np)
              sheight = k-istartm+1
              swidth = nblock
              if ( sheight > 0_${ik}$ ) then
                 call stdlib${ii}$_sgemm( 'N', 'N', sheight, swidth, swidth, one,a( istartm, k ), lda, &
                           zc, ldzc, zero, work,sheight )
                 call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight,a( istartm, k ), lda )
                           
                 call stdlib${ii}$_sgemm( 'N', 'N', sheight, swidth, swidth, one,b( istartm, k ), ldb, &
                           zc, ldzc, zero, work,sheight )
                 call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight,b( istartm, k ), ldb )
                           
              end if
              if ( ilz ) then
                 call stdlib${ii}$_sgemm( 'N', 'N', n, nblock, nblock, one, z( 1_${ik}$, k ),ldz, zc, ldzc, &
                           zero, work, n )
                 call stdlib${ii}$_slacpy( 'ALL', n, nblock, work, n, z( 1_${ik}$, k ), ldz )
              end if
              k = k+np
           end do
           ! the following block removes the shifts from the bottom right corner
           ! one by one. updates are initially applied to a(ihi-ns+1:ihi,ihi-ns:ihi).
           call stdlib${ii}$_slaset( 'FULL', ns, ns, zero, one, qc, ldqc )
           call stdlib${ii}$_slaset( 'FULL', ns+1, ns+1, zero, one, zc, ldzc )
           ! istartb points to the first row we will be updating
           istartb = ihi-ns+1
           ! istopb points to the last column we will be updating
           istopb = ihi
           do i = 1, ns, 2
              ! chase the shift down to the bottom right corner
              do ishift = ihi-i-1, ihi-2
                 call stdlib${ii}$_slaqz2( .true., .true., ishift, istartb, istopb, ihi,a, lda, b, ldb, &
                           ns, ihi-ns+1, qc, ldqc, ns+1,ihi-ns, zc, ldzc )
              end do
           end do
           ! update rest of the pencil
           ! update a(ihi-ns+1:ihi, ihi+1:istopm)
           ! from the left with qc(1:ns,1:ns)'
           sheight = ns
           swidth = istopm-( ihi+1 )+1_${ik}$
           if ( swidth > 0_${ik}$ ) then
              call stdlib${ii}$_sgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,a( ihi-ns+1, &
                        ihi+1 ), lda, zero, work, sheight )
              call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight,a( ihi-ns+1, ihi+1 ), lda &
                        )
              call stdlib${ii}$_sgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,b( ihi-ns+1, &
                        ihi+1 ), ldb, zero, work, sheight )
              call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight,b( ihi-ns+1, ihi+1 ), ldb &
                        )
           end if
           if ( ilq ) then
              call stdlib${ii}$_sgemm( 'N', 'N', n, ns, ns, one, q( 1_${ik}$, ihi-ns+1 ), ldq,qc, ldqc, zero, &
                        work, n )
              call stdlib${ii}$_slacpy( 'ALL', n, ns, work, n, q( 1_${ik}$, ihi-ns+1 ), ldq )
           end if
           ! update a(istartm:ihi-ns,ihi-ns:ihi)
           ! from the right with zc(1:ns+1,1:ns+1)
           sheight = ihi-ns-istartm+1
           swidth = ns+1
           if ( sheight > 0_${ik}$ ) then
              call stdlib${ii}$_sgemm( 'N', 'N', sheight, swidth, swidth, one, a( istartm,ihi-ns ), lda,&
                         zc, ldzc, zero, work, sheight )
              call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ihi-ns ), lda &
                        )
              call stdlib${ii}$_sgemm( 'N', 'N', sheight, swidth, swidth, one, b( istartm,ihi-ns ), ldb,&
                         zc, ldzc, zero, work, sheight )
              call stdlib${ii}$_slacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ihi-ns ), ldb &
                        )
           end if
           if ( ilz ) then
           call stdlib${ii}$_sgemm( 'N', 'N', n, ns+1, ns+1, one, z( 1_${ik}$, ihi-ns ), ldz, zc,ldzc, zero, &
                     work, n )
              call stdlib${ii}$_slacpy( 'ALL', n, ns+1, work, n, z( 1_${ik}$, ihi-ns ), ldz )
           end if
     end subroutine stdlib${ii}$_slaqz4

     pure module subroutine stdlib${ii}$_dlaqz4( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_desired, sr, &
     !! DLAQZ4 Executes a single multishift QZ sweep
               si, ss, a, lda, b, ldb, q,ldq, z, ldz, qc, ldqc, zc, ldzc, work, lwork,info )
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! function arguments
           logical(lk), intent( in ) :: ilschur, ilq, ilz
           integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,nshifts, &
                     nblock_desired, ldqc, ldzc
           real(dp), intent( inout ) :: a( lda, * ), b( ldb, * ),q( ldq, * ), z( ldz, * ), qc( &
                     ldqc, * ),zc( ldzc, * ), work( * ), sr( * ), si( * ),ss( * )
           integer(${ik}$), intent( out ) :: info
           ! ================================================================
           ! local scalars
           integer(${ik}$) :: i, j, ns, istartm, istopm, sheight, swidth, k, np, istartb, istopb, &
                     ishift, nblock, npos
           real(dp) :: temp, v(3_${ik}$), c1, s1, c2, s2, swap
           info = 0_${ik}$
           if ( nblock_desired < nshifts+1 ) then
              info = -8_${ik}$
           end if
           if ( lwork ==-1_${ik}$ ) then
              ! workspace query, quick return
              work( 1_${ik}$ ) = n*nblock_desired
              return
           else if ( lwork < n*nblock_desired ) then
              info = -25_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLAQZ4', -info )
              return
           end if
           ! executable statements
           if ( nshifts < 2_${ik}$ ) then
              return
           end if
           if ( ilo >= ihi ) then
              return
           end if
           if ( ilschur ) then
              istartm = 1_${ik}$
              istopm = n
           else
              istartm = ilo
              istopm = ihi
           end if
           ! shuffle shifts into pairs of real shifts and pairs
           ! of complex conjugate shifts assuming complex
           ! conjugate shifts are already adjacent to one
           ! another
           do i = 1, nshifts-2, 2
              if( si( i )/=-si( i+1 ) ) then
                 swap = sr( i )
                 sr( i ) = sr( i+1 )
                 sr( i+1 ) = sr( i+2 )
                 sr( i+2 ) = swap
                 swap = si( i )
                 si( i ) = si( i+1 )
                 si( i+1 ) = si( i+2 )
                 si( i+2 ) = swap
                 swap = ss( i )
                 ss( i ) = ss( i+1 )
                 ss( i+1 ) = ss( i+2 )
                 ss( i+2 ) = swap
              end if
           end do
           ! nshfts is supposed to be even, but if it is odd,
           ! then simply reduce it by one.  the shuffle above
           ! ensures that the dropped shift is real and that
           ! the remaining shifts are paired.
           ns = nshifts-mod( nshifts, 2_${ik}$ )
           npos = max( nblock_desired-ns, 1_${ik}$ )
           ! the following block introduces the shifts and chases
           ! them down one by one just enough to make space for
           ! the other shifts. the near-the-diagonal block is
           ! of size (ns+1) x ns.
           call stdlib${ii}$_dlaset( 'FULL', ns+1, ns+1, zero, one, qc, ldqc )
           call stdlib${ii}$_dlaset( 'FULL', ns, ns, zero, one, zc, ldzc )
           do i = 1, ns, 2
              ! introduce the shift
              call stdlib${ii}$_dlaqz1( a( ilo, ilo ), lda, b( ilo, ilo ), ldb, sr( i ),sr( i+1 ), si( &
                        i ), ss( i ), ss( i+1 ), v )
              temp = v( 2_${ik}$ )
              call stdlib${ii}$_dlartg( temp, v( 3_${ik}$ ), c1, s1, v( 2_${ik}$ ) )
              call stdlib${ii}$_dlartg( v( 1_${ik}$ ), v( 2_${ik}$ ), c2, s2, temp )
              call stdlib${ii}$_drot( ns, a( ilo+1, ilo ), lda, a( ilo+2, ilo ), lda, c1,s1 )
              call stdlib${ii}$_drot( ns, a( ilo, ilo ), lda, a( ilo+1, ilo ), lda, c2,s2 )
              call stdlib${ii}$_drot( ns, b( ilo+1, ilo ), ldb, b( ilo+2, ilo ), ldb, c1,s1 )
              call stdlib${ii}$_drot( ns, b( ilo, ilo ), ldb, b( ilo+1, ilo ), ldb, c2,s2 )
              call stdlib${ii}$_drot( ns+1, qc( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, qc( 1_${ik}$, 3_${ik}$ ), 1_${ik}$, c1, s1 )
              call stdlib${ii}$_drot( ns+1, qc( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, qc( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c2, s2 )
              ! chase the shift down
              do j = 1, ns-1-i
                 call stdlib${ii}$_dlaqz2( .true., .true., j, 1_${ik}$, ns, ihi-ilo+1, a( ilo,ilo ), lda, b( &
                           ilo, ilo ), ldb, ns+1, 1_${ik}$, qc,ldqc, ns, 1_${ik}$, zc, ldzc )
              end do
           end do
           ! update the rest of the pencil
           ! update a(ilo:ilo+ns,ilo+ns:istopm) and b(ilo:ilo+ns,ilo+ns:istopm)
           ! from the left with qc(1:ns+1,1:ns+1)'
           sheight = ns+1
           swidth = istopm-( ilo+ns )+1_${ik}$
           if ( swidth > 0_${ik}$ ) then
              call stdlib${ii}$_dgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,a( ilo, ilo+ns &
                        ), lda, zero, work, sheight )
              call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight, a( ilo,ilo+ns ), lda )
                        
              call stdlib${ii}$_dgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,b( ilo, ilo+ns &
                        ), ldb, zero, work, sheight )
              call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight, b( ilo,ilo+ns ), ldb )
                        
           end if
           if ( ilq ) then
              call stdlib${ii}$_dgemm( 'N', 'N', n, sheight, sheight, one, q( 1_${ik}$, ilo ),ldq, qc, ldqc, &
                        zero, work, n )
              call stdlib${ii}$_dlacpy( 'ALL', n, sheight, work, n, q( 1_${ik}$, ilo ), ldq )
           end if
           ! update a(istartm:ilo-1,ilo:ilo+ns-1) and b(istartm:ilo-1,ilo:ilo+ns-1)
           ! from the right with zc(1:ns,1:ns)
           sheight = ilo-1-istartm+1
           swidth = ns
           if ( sheight > 0_${ik}$ ) then
              call stdlib${ii}$_dgemm( 'N', 'N', sheight, swidth, swidth, one, a( istartm,ilo ), lda, &
                        zc, ldzc, zero, work, sheight )
              call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ilo ), lda )
                        
              call stdlib${ii}$_dgemm( 'N', 'N', sheight, swidth, swidth, one, b( istartm,ilo ), ldb, &
                        zc, ldzc, zero, work, sheight )
              call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ilo ), ldb )
                        
           end if
           if ( ilz ) then
              call stdlib${ii}$_dgemm( 'N', 'N', n, swidth, swidth, one, z( 1_${ik}$, ilo ), ldz,zc, ldzc, &
                        zero, work, n )
              call stdlib${ii}$_dlacpy( 'ALL', n, swidth, work, n, z( 1_${ik}$, ilo ), ldz )
           end if
           ! the following block chases the shifts down to the bottom
           ! right block. if possible, a shift is moved down npos
           ! positions at a time
           k = ilo
           do while ( k < ihi-ns )
              np = min( ihi-ns-k, npos )
              ! size of the near-the-diagonal block
              nblock = ns+np
              ! istartb points to the first row we will be updating
              istartb = k+1
              ! istopb points to the last column we will be updating
              istopb = k+nblock-1
              call stdlib${ii}$_dlaset( 'FULL', ns+np, ns+np, zero, one, qc, ldqc )
              call stdlib${ii}$_dlaset( 'FULL', ns+np, ns+np, zero, one, zc, ldzc )
              ! near the diagonal shift chase
              do i = ns-1, 0, -2
                 do j = 0, np-1
                    ! move down the block with index k+i+j-1, updating
                    ! the (ns+np x ns+np) block:
                    ! (k:k+ns+np,k:k+ns+np-1)
                    call stdlib${ii}$_dlaqz2( .true., .true., k+i+j-1, istartb, istopb,ihi, a, lda, b, &
                              ldb, nblock, k+1, qc, ldqc,nblock, k, zc, ldzc )
                 end do
              end do
              ! update rest of the pencil
              ! update a(k+1:k+ns+np, k+ns+np:istopm) and
              ! b(k+1:k+ns+np, k+ns+np:istopm)
              ! from the left with qc(1:ns+np,1:ns+np)'
              sheight = ns+np
              swidth = istopm-( k+ns+np )+1_${ik}$
              if ( swidth > 0_${ik}$ ) then
                 call stdlib${ii}$_dgemm( 'T', 'N', sheight, swidth, sheight, one, qc,ldqc, a( k+1, k+&
                           ns+np ), lda, zero, work,sheight )
                 call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight, a( k+1,k+ns+np ), lda &
                           )
                 call stdlib${ii}$_dgemm( 'T', 'N', sheight, swidth, sheight, one, qc,ldqc, b( k+1, k+&
                           ns+np ), ldb, zero, work,sheight )
                 call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight, b( k+1,k+ns+np ), ldb &
                           )
              end if
              if ( ilq ) then
                 call stdlib${ii}$_dgemm( 'N', 'N', n, nblock, nblock, one, q( 1_${ik}$, k+1 ),ldq, qc, ldqc, &
                           zero, work, n )
                 call stdlib${ii}$_dlacpy( 'ALL', n, nblock, work, n, q( 1_${ik}$, k+1 ), ldq )
              end if
              ! update a(istartm:k,k:k+ns+npos-1) and b(istartm:k,k:k+ns+npos-1)
              ! from the right with zc(1:ns+np,1:ns+np)
              sheight = k-istartm+1
              swidth = nblock
              if ( sheight > 0_${ik}$ ) then
                 call stdlib${ii}$_dgemm( 'N', 'N', sheight, swidth, swidth, one,a( istartm, k ), lda, &
                           zc, ldzc, zero, work,sheight )
                 call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight,a( istartm, k ), lda )
                           
                 call stdlib${ii}$_dgemm( 'N', 'N', sheight, swidth, swidth, one,b( istartm, k ), ldb, &
                           zc, ldzc, zero, work,sheight )
                 call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight,b( istartm, k ), ldb )
                           
              end if
              if ( ilz ) then
                 call stdlib${ii}$_dgemm( 'N', 'N', n, nblock, nblock, one, z( 1_${ik}$, k ),ldz, zc, ldzc, &
                           zero, work, n )
                 call stdlib${ii}$_dlacpy( 'ALL', n, nblock, work, n, z( 1_${ik}$, k ), ldz )
              end if
              k = k+np
           end do
           ! the following block removes the shifts from the bottom right corner
           ! one by one. updates are initially applied to a(ihi-ns+1:ihi,ihi-ns:ihi).
           call stdlib${ii}$_dlaset( 'FULL', ns, ns, zero, one, qc, ldqc )
           call stdlib${ii}$_dlaset( 'FULL', ns+1, ns+1, zero, one, zc, ldzc )
           ! istartb points to the first row we will be updating
           istartb = ihi-ns+1
           ! istopb points to the last column we will be updating
           istopb = ihi
           do i = 1, ns, 2
              ! chase the shift down to the bottom right corner
              do ishift = ihi-i-1, ihi-2
                 call stdlib${ii}$_dlaqz2( .true., .true., ishift, istartb, istopb, ihi,a, lda, b, ldb, &
                           ns, ihi-ns+1, qc, ldqc, ns+1,ihi-ns, zc, ldzc )
              end do
           end do
           ! update rest of the pencil
           ! update a(ihi-ns+1:ihi, ihi+1:istopm)
           ! from the left with qc(1:ns,1:ns)'
           sheight = ns
           swidth = istopm-( ihi+1 )+1_${ik}$
           if ( swidth > 0_${ik}$ ) then
              call stdlib${ii}$_dgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,a( ihi-ns+1, &
                        ihi+1 ), lda, zero, work, sheight )
              call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight,a( ihi-ns+1, ihi+1 ), lda &
                        )
              call stdlib${ii}$_dgemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,b( ihi-ns+1, &
                        ihi+1 ), ldb, zero, work, sheight )
              call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight,b( ihi-ns+1, ihi+1 ), ldb &
                        )
           end if
           if ( ilq ) then
              call stdlib${ii}$_dgemm( 'N', 'N', n, ns, ns, one, q( 1_${ik}$, ihi-ns+1 ), ldq,qc, ldqc, zero, &
                        work, n )
              call stdlib${ii}$_dlacpy( 'ALL', n, ns, work, n, q( 1_${ik}$, ihi-ns+1 ), ldq )
           end if
           ! update a(istartm:ihi-ns,ihi-ns:ihi)
           ! from the right with zc(1:ns+1,1:ns+1)
           sheight = ihi-ns-istartm+1
           swidth = ns+1
           if ( sheight > 0_${ik}$ ) then
              call stdlib${ii}$_dgemm( 'N', 'N', sheight, swidth, swidth, one, a( istartm,ihi-ns ), lda,&
                         zc, ldzc, zero, work, sheight )
              call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ihi-ns ), lda &
                        )
              call stdlib${ii}$_dgemm( 'N', 'N', sheight, swidth, swidth, one, b( istartm,ihi-ns ), ldb,&
                         zc, ldzc, zero, work, sheight )
              call stdlib${ii}$_dlacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ihi-ns ), ldb &
                        )
           end if
           if ( ilz ) then
              call stdlib${ii}$_dgemm( 'N', 'N', n, ns+1, ns+1, one, z( 1_${ik}$, ihi-ns ), ldz,zc, ldzc, zero,&
                         work, n )
              call stdlib${ii}$_dlacpy( 'ALL', n, ns+1, work, n, z( 1_${ik}$, ihi-ns ), ldz )
           end if
     end subroutine stdlib${ii}$_dlaqz4

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$laqz4( ilschur, ilq, ilz, n, ilo, ihi, nshifts,nblock_qesired, sr, &
     !! DLAQZ4: Executes a single multishift QZ sweep
               si, ss, a, lda, b, ldb, q,ldq, z, ldz, qc, ldqc, zc, ldzc, work, lwork,info )
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! function arguments
           logical(lk), intent( in ) :: ilschur, ilq, ilz
           integer(${ik}$), intent( in ) :: n, ilo, ihi, lda, ldb, ldq, ldz, lwork,nshifts, &
                     nblock_qesired, ldqc, ldzc
           real(${rk}$), intent( inout ) :: a( lda, * ), b( ldb, * ),q( ldq, * ), z( ldz, * ), qc( &
                     ldqc, * ),zc( ldzc, * ), work( * ), sr( * ), si( * ),ss( * )
           integer(${ik}$), intent( out ) :: info
           ! ================================================================
           ! local scalars
           integer(${ik}$) :: i, j, ns, istartm, istopm, sheight, swidth, k, np, istartb, istopb, &
                     ishift, nblock, npos
           real(${rk}$) :: temp, v(3_${ik}$), c1, s1, c2, s2, swap
           info = 0_${ik}$
           if ( nblock_qesired < nshifts+1 ) then
              info = -8_${ik}$
           end if
           if ( lwork ==-1_${ik}$ ) then
              ! workspace query, quick return
              work( 1_${ik}$ ) = n*nblock_qesired
              return
           else if ( lwork < n*nblock_qesired ) then
              info = -25_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DLAQZ4', -info )
              return
           end if
           ! executable statements
           if ( nshifts < 2_${ik}$ ) then
              return
           end if
           if ( ilo >= ihi ) then
              return
           end if
           if ( ilschur ) then
              istartm = 1_${ik}$
              istopm = n
           else
              istartm = ilo
              istopm = ihi
           end if
           ! shuffle shifts into pairs of real shifts and pairs
           ! of complex conjugate shifts assuming complex
           ! conjugate shifts are already adjacent to one
           ! another
           do i = 1, nshifts-2, 2
              if( si( i )/=-si( i+1 ) ) then
                 swap = sr( i )
                 sr( i ) = sr( i+1 )
                 sr( i+1 ) = sr( i+2 )
                 sr( i+2 ) = swap
                 swap = si( i )
                 si( i ) = si( i+1 )
                 si( i+1 ) = si( i+2 )
                 si( i+2 ) = swap
                 swap = ss( i )
                 ss( i ) = ss( i+1 )
                 ss( i+1 ) = ss( i+2 )
                 ss( i+2 ) = swap
              end if
           end do
           ! nshfts is supposed to be even, but if it is odd,
           ! then simply reduce it by one.  the shuffle above
           ! ensures that the dropped shift is real and that
           ! the remaining shifts are paired.
           ns = nshifts-mod( nshifts, 2_${ik}$ )
           npos = max( nblock_qesired-ns, 1_${ik}$ )
           ! the following block introduces the shifts and chases
           ! them down one by one just enough to make space for
           ! the other shifts. the near-the-diagonal block is
           ! of size (ns+1) x ns.
           call stdlib${ii}$_${ri}$laset( 'FULL', ns+1, ns+1, zero, one, qc, ldqc )
           call stdlib${ii}$_${ri}$laset( 'FULL', ns, ns, zero, one, zc, ldzc )
           do i = 1, ns, 2
              ! introduce the shift
              call stdlib${ii}$_${ri}$laqz1( a( ilo, ilo ), lda, b( ilo, ilo ), ldb, sr( i ),sr( i+1 ), si( &
                        i ), ss( i ), ss( i+1 ), v )
              temp = v( 2_${ik}$ )
              call stdlib${ii}$_${ri}$lartg( temp, v( 3_${ik}$ ), c1, s1, v( 2_${ik}$ ) )
              call stdlib${ii}$_${ri}$lartg( v( 1_${ik}$ ), v( 2_${ik}$ ), c2, s2, temp )
              call stdlib${ii}$_${ri}$rot( ns, a( ilo+1, ilo ), lda, a( ilo+2, ilo ), lda, c1,s1 )
              call stdlib${ii}$_${ri}$rot( ns, a( ilo, ilo ), lda, a( ilo+1, ilo ), lda, c2,s2 )
              call stdlib${ii}$_${ri}$rot( ns, b( ilo+1, ilo ), ldb, b( ilo+2, ilo ), ldb, c1,s1 )
              call stdlib${ii}$_${ri}$rot( ns, b( ilo, ilo ), ldb, b( ilo+1, ilo ), ldb, c2,s2 )
              call stdlib${ii}$_${ri}$rot( ns+1, qc( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, qc( 1_${ik}$, 3_${ik}$ ), 1_${ik}$, c1, s1 )
              call stdlib${ii}$_${ri}$rot( ns+1, qc( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, qc( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, c2, s2 )
              ! chase the shift down
              do j = 1, ns-1-i
                 call stdlib${ii}$_${ri}$laqz2( .true., .true., j, 1_${ik}$, ns, ihi-ilo+1, a( ilo,ilo ), lda, b( &
                           ilo, ilo ), ldb, ns+1, 1_${ik}$, qc,ldqc, ns, 1_${ik}$, zc, ldzc )
              end do
           end do
           ! update the rest of the pencil
           ! update a(ilo:ilo+ns,ilo+ns:istopm) and b(ilo:ilo+ns,ilo+ns:istopm)
           ! from the left with qc(1:ns+1,1:ns+1)'
           sheight = ns+1
           swidth = istopm-( ilo+ns )+1_${ik}$
           if ( swidth > 0_${ik}$ ) then
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,a( ilo, ilo+ns &
                        ), lda, zero, work, sheight )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight, a( ilo,ilo+ns ), lda )
                        
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,b( ilo, ilo+ns &
                        ), ldb, zero, work, sheight )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight, b( ilo,ilo+ns ), ldb )
                        
           end if
           if ( ilq ) then
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, sheight, sheight, one, q( 1_${ik}$, ilo ),ldq, qc, ldqc, &
                        zero, work, n )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', n, sheight, work, n, q( 1_${ik}$, ilo ), ldq )
           end if
           ! update a(istartm:ilo-1,ilo:ilo+ns-1) and b(istartm:ilo-1,ilo:ilo+ns-1)
           ! from the right with zc(1:ns,1:ns)
           sheight = ilo-1-istartm+1
           swidth = ns
           if ( sheight > 0_${ik}$ ) then
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', sheight, swidth, swidth, one, a( istartm,ilo ), lda, &
                        zc, ldzc, zero, work, sheight )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ilo ), lda )
                        
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', sheight, swidth, swidth, one, b( istartm,ilo ), ldb, &
                        zc, ldzc, zero, work, sheight )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ilo ), ldb )
                        
           end if
           if ( ilz ) then
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, swidth, swidth, one, z( 1_${ik}$, ilo ), ldz,zc, ldzc, &
                        zero, work, n )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', n, swidth, work, n, z( 1_${ik}$, ilo ), ldz )
           end if
           ! the following block chases the shifts down to the bottom
           ! right block. if possible, a shift is moved down npos
           ! positions at a time
           k = ilo
           do while ( k < ihi-ns )
              np = min( ihi-ns-k, npos )
              ! size of the near-the-diagonal block
              nblock = ns+np
              ! istartb points to the first row we will be updating
              istartb = k+1
              ! istopb points to the last column we will be updating
              istopb = k+nblock-1
              call stdlib${ii}$_${ri}$laset( 'FULL', ns+np, ns+np, zero, one, qc, ldqc )
              call stdlib${ii}$_${ri}$laset( 'FULL', ns+np, ns+np, zero, one, zc, ldzc )
              ! near the diagonal shift chase
              do i = ns-1, 0, -2
                 do j = 0, np-1
                    ! move down the block with index k+i+j-1, updating
                    ! the (ns+np x ns+np) block:
                    ! (k:k+ns+np,k:k+ns+np-1)
                    call stdlib${ii}$_${ri}$laqz2( .true., .true., k+i+j-1, istartb, istopb,ihi, a, lda, b, &
                              ldb, nblock, k+1, qc, ldqc,nblock, k, zc, ldzc )
                 end do
              end do
              ! update rest of the pencil
              ! update a(k+1:k+ns+np, k+ns+np:istopm) and
              ! b(k+1:k+ns+np, k+ns+np:istopm)
              ! from the left with qc(1:ns+np,1:ns+np)'
              sheight = ns+np
              swidth = istopm-( k+ns+np )+1_${ik}$
              if ( swidth > 0_${ik}$ ) then
                 call stdlib${ii}$_${ri}$gemm( 'T', 'N', sheight, swidth, sheight, one, qc,ldqc, a( k+1, k+&
                           ns+np ), lda, zero, work,sheight )
                 call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight, a( k+1,k+ns+np ), lda &
                           )
                 call stdlib${ii}$_${ri}$gemm( 'T', 'N', sheight, swidth, sheight, one, qc,ldqc, b( k+1, k+&
                           ns+np ), ldb, zero, work,sheight )
                 call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight, b( k+1,k+ns+np ), ldb &
                           )
              end if
              if ( ilq ) then
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, nblock, nblock, one, q( 1_${ik}$, k+1 ),ldq, qc, ldqc, &
                           zero, work, n )
                 call stdlib${ii}$_${ri}$lacpy( 'ALL', n, nblock, work, n, q( 1_${ik}$, k+1 ), ldq )
              end if
              ! update a(istartm:k,k:k+ns+npos-1) and b(istartm:k,k:k+ns+npos-1)
              ! from the right with zc(1:ns+np,1:ns+np)
              sheight = k-istartm+1
              swidth = nblock
              if ( sheight > 0_${ik}$ ) then
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', sheight, swidth, swidth, one,a( istartm, k ), lda, &
                           zc, ldzc, zero, work,sheight )
                 call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight,a( istartm, k ), lda )
                           
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', sheight, swidth, swidth, one,b( istartm, k ), ldb, &
                           zc, ldzc, zero, work,sheight )
                 call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight,b( istartm, k ), ldb )
                           
              end if
              if ( ilz ) then
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, nblock, nblock, one, z( 1_${ik}$, k ),ldz, zc, ldzc, &
                           zero, work, n )
                 call stdlib${ii}$_${ri}$lacpy( 'ALL', n, nblock, work, n, z( 1_${ik}$, k ), ldz )
              end if
              k = k+np
           end do
           ! the following block removes the shifts from the bottom right corner
           ! one by one. updates are initially applied to a(ihi-ns+1:ihi,ihi-ns:ihi).
           call stdlib${ii}$_${ri}$laset( 'FULL', ns, ns, zero, one, qc, ldqc )
           call stdlib${ii}$_${ri}$laset( 'FULL', ns+1, ns+1, zero, one, zc, ldzc )
           ! istartb points to the first row we will be updating
           istartb = ihi-ns+1
           ! istopb points to the last column we will be updating
           istopb = ihi
           do i = 1, ns, 2
              ! chase the shift down to the bottom right corner
              do ishift = ihi-i-1, ihi-2
                 call stdlib${ii}$_${ri}$laqz2( .true., .true., ishift, istartb, istopb, ihi,a, lda, b, ldb, &
                           ns, ihi-ns+1, qc, ldqc, ns+1,ihi-ns, zc, ldzc )
              end do
           end do
           ! update rest of the pencil
           ! update a(ihi-ns+1:ihi, ihi+1:istopm)
           ! from the left with qc(1:ns,1:ns)'
           sheight = ns
           swidth = istopm-( ihi+1 )+1_${ik}$
           if ( swidth > 0_${ik}$ ) then
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,a( ihi-ns+1, &
                        ihi+1 ), lda, zero, work, sheight )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight,a( ihi-ns+1, ihi+1 ), lda &
                        )
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', sheight, swidth, sheight, one, qc, ldqc,b( ihi-ns+1, &
                        ihi+1 ), ldb, zero, work, sheight )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight,b( ihi-ns+1, ihi+1 ), ldb &
                        )
           end if
           if ( ilq ) then
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, ns, ns, one, q( 1_${ik}$, ihi-ns+1 ), ldq,qc, ldqc, zero, &
                        work, n )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', n, ns, work, n, q( 1_${ik}$, ihi-ns+1 ), ldq )
           end if
           ! update a(istartm:ihi-ns,ihi-ns:ihi)
           ! from the right with zc(1:ns+1,1:ns+1)
           sheight = ihi-ns-istartm+1
           swidth = ns+1
           if ( sheight > 0_${ik}$ ) then
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', sheight, swidth, swidth, one, a( istartm,ihi-ns ), lda,&
                         zc, ldzc, zero, work, sheight )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight, a( istartm,ihi-ns ), lda &
                        )
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', sheight, swidth, swidth, one, b( istartm,ihi-ns ), ldb,&
                         zc, ldzc, zero, work, sheight )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', sheight, swidth, work, sheight, b( istartm,ihi-ns ), ldb &
                        )
           end if
           if ( ilz ) then
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, ns+1, ns+1, one, z( 1_${ik}$, ihi-ns ), ldz,zc, ldzc, zero,&
                         work, n )
              call stdlib${ii}$_${ri}$lacpy( 'ALL', n, ns+1, work, n, z( 1_${ik}$, ihi-ns ), ldz )
           end if
     end subroutine stdlib${ii}$_${ri}$laqz4

#:endif
#:endfor


#:endfor
end submodule stdlib_lapack_eigv_gen3