stdlib_lapack_eigv_comp2.fypp Source File


Source Code

#:include "common.fypp" 
submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_eigv_comp2
  implicit none


  contains
#:for ik,it,ii in LINALG_INT_KINDS_TYPES

     pure module subroutine stdlib${ii}$_stgsen( ijob, wantq, wantz, select, n, a, lda, b, ldb,alphar, alphai, &
     !! STGSEN reorders the generalized real Schur decomposition of a real
     !! matrix pair (A, B) (in terms of an orthonormal equivalence trans-
     !! formation Q**T * (A, B) * Z), so that a selected cluster of eigenvalues
     !! appears in the leading diagonal blocks of the upper quasi-triangular
     !! matrix A and the upper triangular B. The leading columns of Q and
     !! Z form orthonormal bases of the corresponding left and right eigen-
     !! spaces (deflating subspaces). (A, B) must be in generalized real
     !! Schur canonical form (as returned by SGGES), i.e. A is block upper
     !! triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper
     !! triangular.
     !! STGSEN also computes the generalized eigenvalues
     !! w(j) = (ALPHAR(j) + i*ALPHAI(j))/BETA(j)
     !! of the reordered matrix pair (A, B).
     !! Optionally, STGSEN computes the estimates of reciprocal condition
     !! numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11),
     !! (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s)
     !! between the matrix pairs (A11, B11) and (A22,B22) that correspond to
     !! the selected cluster and the eigenvalues outside the cluster, resp.,
     !! and norms of "projections" onto left and right eigenspaces w.r.t.
     !! the selected cluster in the (1,1)-block.
               beta, q, ldq, z, ldz, m, pl,pr, dif, work, lwork, iwork, liwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: wantq, wantz
           integer(${ik}$), intent(in) :: ijob, lda, ldb, ldq, ldz, liwork, lwork, n
           integer(${ik}$), intent(out) :: info, m
           real(sp), intent(out) :: pl, pr
           ! Array Arguments 
           logical(lk), intent(in) :: select(*)
           integer(${ik}$), intent(out) :: iwork(*)
           real(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
           real(sp), intent(out) :: alphai(*), alphar(*), beta(*), dif(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: idifjb = 3_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: lquery, pair, swap, wantd, wantd1, wantd2, wantp
           integer(${ik}$) :: i, ierr, ijb, k, kase, kk, ks, liwmin, lwmin, mn2, n1, n2
           real(sp) :: dscale, dsum, eps, rdscal, smlnum
           ! Local Arrays 
           integer(${ik}$) :: isave(3_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ .or. liwork==-1_${ik}$ )
           if( ijob<0_${ik}$ .or. ijob>5_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -9_${ik}$
           else if( ldq<1_${ik}$ .or. ( wantq .and. ldq<n ) ) then
              info = -14_${ik}$
           else if( ldz<1_${ik}$ .or. ( wantz .and. ldz<n ) ) then
              info = -16_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'STGSEN', -info )
              return
           end if
           ! get machine constants
           eps = stdlib${ii}$_slamch( 'P' )
           smlnum = stdlib${ii}$_slamch( 'S' ) / eps
           ierr = 0_${ik}$
           wantp = ijob==1_${ik}$ .or. ijob>=4_${ik}$
           wantd1 = ijob==2_${ik}$ .or. ijob==4_${ik}$
           wantd2 = ijob==3_${ik}$ .or. ijob==5_${ik}$
           wantd = wantd1 .or. wantd2
           ! set m to the dimension of the specified pair of deflating
           ! subspaces.
           m = 0_${ik}$
           pair = .false.
           if( .not.lquery .or. ijob/=0_${ik}$ ) then
           do k = 1, n
              if( pair ) then
                 pair = .false.
              else
                 if( k<n ) then
                    if( a( k+1, k )==zero ) then
                       if( select( k ) )m = m + 1_${ik}$
                    else
                       pair = .true.
                       if( select( k ) .or. select( k+1 ) )m = m + 2_${ik}$
                    end if
                 else
                    if( select( n ) )m = m + 1_${ik}$
                 end if
              end if
           end do
           end if
           if( ijob==1_${ik}$ .or. ijob==2_${ik}$ .or. ijob==4_${ik}$ ) then
              lwmin = max( 1_${ik}$, 4_${ik}$*n+16, 2_${ik}$*m*(n-m) )
              liwmin = max( 1_${ik}$, n+6 )
           else if( ijob==3_${ik}$ .or. ijob==5_${ik}$ ) then
              lwmin = max( 1_${ik}$, 4_${ik}$*n+16, 4_${ik}$*m*(n-m) )
              liwmin = max( 1_${ik}$, 2_${ik}$*m*(n-m), n+6 )
           else
              lwmin = max( 1_${ik}$, 4_${ik}$*n+16 )
              liwmin = 1_${ik}$
           end if
           work( 1_${ik}$ ) = lwmin
           iwork( 1_${ik}$ ) = liwmin
           if( lwork<lwmin .and. .not.lquery ) then
              info = -22_${ik}$
           else if( liwork<liwmin .and. .not.lquery ) then
              info = -24_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'STGSEN', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible.
           if( m==n .or. m==0_${ik}$ ) then
              if( wantp ) then
                 pl = one
                 pr = one
              end if
              if( wantd ) then
                 dscale = zero
                 dsum = one
                 do i = 1, n
                    call stdlib${ii}$_slassq( n, a( 1_${ik}$, i ), 1_${ik}$, dscale, dsum )
                    call stdlib${ii}$_slassq( n, b( 1_${ik}$, i ), 1_${ik}$, dscale, dsum )
                 end do
                 dif( 1_${ik}$ ) = dscale*sqrt( dsum )
                 dif( 2_${ik}$ ) = dif( 1_${ik}$ )
              end if
              go to 60
           end if
           ! collect the selected blocks at the top-left corner of (a, b).
           ks = 0_${ik}$
           pair = .false.
           loop_30: do k = 1, n
              if( pair ) then
                 pair = .false.
              else
                 swap = select( k )
                 if( k<n ) then
                    if( a( k+1, k )/=zero ) then
                       pair = .true.
                       swap = swap .or. select( k+1 )
                    end if
                 end if
                 if( swap ) then
                    ks = ks + 1_${ik}$
                    ! swap the k-th block to position ks.
                    ! perform the reordering of diagonal blocks in (a, b)
                    ! by orthogonal transformation matrices and update
                    ! q and z accordingly (if requested):
                    kk = k
                    if( k/=ks )call stdlib${ii}$_stgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz,&
                               kk, ks, work, lwork, ierr )
                    if( ierr>0_${ik}$ ) then
                       ! swap is rejected: exit.
                       info = 1_${ik}$
                       if( wantp ) then
                          pl = zero
                          pr = zero
                       end if
                       if( wantd ) then
                          dif( 1_${ik}$ ) = zero
                          dif( 2_${ik}$ ) = zero
                       end if
                       go to 60
                    end if
                    if( pair )ks = ks + 1_${ik}$
                 end if
              end if
           end do loop_30
           if( wantp ) then
              ! solve generalized sylvester equation for r and l
              ! and compute pl and pr.
              n1 = m
              n2 = n - m
              i = n1 + 1_${ik}$
              ijb = 0_${ik}$
              call stdlib${ii}$_slacpy( 'FULL', n1, n2, a( 1_${ik}$, i ), lda, work, n1 )
              call stdlib${ii}$_slacpy( 'FULL', n1, n2, b( 1_${ik}$, i ), ldb, work( n1*n2+1 ),n1 )
              call stdlib${ii}$_stgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b( i,&
               i ), ldb, work( n1*n2+1 ), n1,dscale, dif( 1_${ik}$ ), work( n1*n2*2_${ik}$+1 ),lwork-2*n1*n2, &
                         iwork, ierr )
              ! estimate the reciprocal of norms of "projections" onto left
              ! and right eigenspaces.
              rdscal = zero
              dsum = one
              call stdlib${ii}$_slassq( n1*n2, work, 1_${ik}$, rdscal, dsum )
              pl = rdscal*sqrt( dsum )
              if( pl==zero ) then
                 pl = one
              else
                 pl = dscale / ( sqrt( dscale*dscale / pl+pl )*sqrt( pl ) )
              end if
              rdscal = zero
              dsum = one
              call stdlib${ii}$_slassq( n1*n2, work( n1*n2+1 ), 1_${ik}$, rdscal, dsum )
              pr = rdscal*sqrt( dsum )
              if( pr==zero ) then
                 pr = one
              else
                 pr = dscale / ( sqrt( dscale*dscale / pr+pr )*sqrt( pr ) )
              end if
           end if
           if( wantd ) then
              ! compute estimates of difu and difl.
              if( wantd1 ) then
                 n1 = m
                 n2 = n - m
                 i = n1 + 1_${ik}$
                 ijb = idifjb
                 ! frobenius norm-based difu-estimate.
                 call stdlib${ii}$_stgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b(&
                  i, i ), ldb, work( n1*n2+1 ),n1, dscale, dif( 1_${ik}$ ), work( 2_${ik}$*n1*n2+1 ),lwork-&
                            2_${ik}$*n1*n2, iwork, ierr )
                 ! frobenius norm-based difl-estimate.
                 call stdlib${ii}$_stgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda, work,n2, b( i, i ),&
                  ldb, b, ldb, work( n1*n2+1 ),n2, dscale, dif( 2_${ik}$ ), work( 2_${ik}$*n1*n2+1 ),lwork-&
                            2_${ik}$*n1*n2, iwork, ierr )
              else
                 ! compute 1-norm-based estimates of difu and difl using
                 ! reversed communication with stdlib${ii}$_slacn2. in each step a
                 ! generalized sylvester equation or a transposed variant
                 ! is solved.
                 kase = 0_${ik}$
                 n1 = m
                 n2 = n - m
                 i = n1 + 1_${ik}$
                 ijb = 0_${ik}$
                 mn2 = 2_${ik}$*n1*n2
                 ! 1-norm-based estimate of difu.
                 40 continue
                 call stdlib${ii}$_slacn2( mn2, work( mn2+1 ), work, iwork, dif( 1_${ik}$ ),kase, isave )
                           
                 if( kase/=0_${ik}$ ) then
                    if( kase==1_${ik}$ ) then
                       ! solve generalized sylvester equation.
                       call stdlib${ii}$_stgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, &
                       ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1_${ik}$ ),work( 2_${ik}$*n1*n2+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    else
                       ! solve the transposed variant.
                       call stdlib${ii}$_stgsyl( 'T', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, &
                       ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1_${ik}$ ),work( 2_${ik}$*n1*n2+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    end if
                    go to 40
                 end if
                 dif( 1_${ik}$ ) = dscale / dif( 1_${ik}$ )
                 ! 1-norm-based estimate of difl.
                 50 continue
                 call stdlib${ii}$_slacn2( mn2, work( mn2+1 ), work, iwork, dif( 2_${ik}$ ),kase, isave )
                           
                 if( kase/=0_${ik}$ ) then
                    if( kase==1_${ik}$ ) then
                       ! solve generalized sylvester equation.
                       call stdlib${ii}$_stgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b( &
                       i, i ), ldb, b, ldb,work( n1*n2+1 ), n2, dscale, dif( 2_${ik}$ ),work( 2_${ik}$*n1*n2+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    else
                       ! solve the transposed variant.
                       call stdlib${ii}$_stgsyl( 'T', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b( &
                       i, i ), ldb, b, ldb,work( n1*n2+1 ), n2, dscale, dif( 2_${ik}$ ),work( 2_${ik}$*n1*n2+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    end if
                    go to 50
                 end if
                 dif( 2_${ik}$ ) = dscale / dif( 2_${ik}$ )
              end if
           end if
           60 continue
           ! compute generalized eigenvalues of reordered pair (a, b) and
           ! normalize the generalized schur form.
           pair = .false.
           loop_70: do k = 1, n
              if( pair ) then
                 pair = .false.
              else
                 if( k<n ) then
                    if( a( k+1, k )/=zero ) then
                       pair = .true.
                    end if
                 end if
                 if( pair ) then
                   ! compute the eigenvalue(s) at position k.
                    work( 1_${ik}$ ) = a( k, k )
                    work( 2_${ik}$ ) = a( k+1, k )
                    work( 3_${ik}$ ) = a( k, k+1 )
                    work( 4_${ik}$ ) = a( k+1, k+1 )
                    work( 5_${ik}$ ) = b( k, k )
                    work( 6_${ik}$ ) = b( k+1, k )
                    work( 7_${ik}$ ) = b( k, k+1 )
                    work( 8_${ik}$ ) = b( k+1, k+1 )
                    call stdlib${ii}$_slag2( work, 2_${ik}$, work( 5_${ik}$ ), 2_${ik}$, smlnum*eps, beta( k ),beta( k+1 ), &
                              alphar( k ), alphar( k+1 ),alphai( k ) )
                    alphai( k+1 ) = -alphai( k )
                 else
                    if( sign( one, b( k, k ) )<zero ) then
                       ! if b(k,k) is negative, make it positive
                       do i = 1, n
                          a( k, i ) = -a( k, i )
                          b( k, i ) = -b( k, i )
                          if( wantq ) q( i, k ) = -q( i, k )
                       end do
                    end if
                    alphar( k ) = a( k, k )
                    alphai( k ) = zero
                    beta( k ) = b( k, k )
                 end if
              end if
           end do loop_70
           work( 1_${ik}$ ) = lwmin
           iwork( 1_${ik}$ ) = liwmin
           return
     end subroutine stdlib${ii}$_stgsen

     pure module subroutine stdlib${ii}$_dtgsen( ijob, wantq, wantz, select, n, a, lda, b, ldb,alphar, alphai, &
     !! DTGSEN reorders the generalized real Schur decomposition of a real
     !! matrix pair (A, B) (in terms of an orthonormal equivalence trans-
     !! formation Q**T * (A, B) * Z), so that a selected cluster of eigenvalues
     !! appears in the leading diagonal blocks of the upper quasi-triangular
     !! matrix A and the upper triangular B. The leading columns of Q and
     !! Z form orthonormal bases of the corresponding left and right eigen-
     !! spaces (deflating subspaces). (A, B) must be in generalized real
     !! Schur canonical form (as returned by DGGES), i.e. A is block upper
     !! triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper
     !! triangular.
     !! DTGSEN also computes the generalized eigenvalues
     !! w(j) = (ALPHAR(j) + i*ALPHAI(j))/BETA(j)
     !! of the reordered matrix pair (A, B).
     !! Optionally, DTGSEN computes the estimates of reciprocal condition
     !! numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11),
     !! (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s)
     !! between the matrix pairs (A11, B11) and (A22,B22) that correspond to
     !! the selected cluster and the eigenvalues outside the cluster, resp.,
     !! and norms of "projections" onto left and right eigenspaces w.r.t.
     !! the selected cluster in the (1,1)-block.
               beta, q, ldq, z, ldz, m, pl,pr, dif, work, lwork, iwork, liwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: wantq, wantz
           integer(${ik}$), intent(in) :: ijob, lda, ldb, ldq, ldz, liwork, lwork, n
           integer(${ik}$), intent(out) :: info, m
           real(dp), intent(out) :: pl, pr
           ! Array Arguments 
           logical(lk), intent(in) :: select(*)
           integer(${ik}$), intent(out) :: iwork(*)
           real(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
           real(dp), intent(out) :: alphai(*), alphar(*), beta(*), dif(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: idifjb = 3_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: lquery, pair, swap, wantd, wantd1, wantd2, wantp
           integer(${ik}$) :: i, ierr, ijb, k, kase, kk, ks, liwmin, lwmin, mn2, n1, n2
           real(dp) :: dscale, dsum, eps, rdscal, smlnum
           ! Local Arrays 
           integer(${ik}$) :: isave(3_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ .or. liwork==-1_${ik}$ )
           if( ijob<0_${ik}$ .or. ijob>5_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -9_${ik}$
           else if( ldq<1_${ik}$ .or. ( wantq .and. ldq<n ) ) then
              info = -14_${ik}$
           else if( ldz<1_${ik}$ .or. ( wantz .and. ldz<n ) ) then
              info = -16_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTGSEN', -info )
              return
           end if
           ! get machine constants
           eps = stdlib${ii}$_dlamch( 'P' )
           smlnum = stdlib${ii}$_dlamch( 'S' ) / eps
           ierr = 0_${ik}$
           wantp = ijob==1_${ik}$ .or. ijob>=4_${ik}$
           wantd1 = ijob==2_${ik}$ .or. ijob==4_${ik}$
           wantd2 = ijob==3_${ik}$ .or. ijob==5_${ik}$
           wantd = wantd1 .or. wantd2
           ! set m to the dimension of the specified pair of deflating
           ! subspaces.
           m = 0_${ik}$
           pair = .false.
           if( .not.lquery .or. ijob/=0_${ik}$ ) then
           do k = 1, n
              if( pair ) then
                 pair = .false.
              else
                 if( k<n ) then
                    if( a( k+1, k )==zero ) then
                       if( select( k ) )m = m + 1_${ik}$
                    else
                       pair = .true.
                       if( select( k ) .or. select( k+1 ) )m = m + 2_${ik}$
                    end if
                 else
                    if( select( n ) )m = m + 1_${ik}$
                 end if
              end if
           end do
           end if
           if( ijob==1_${ik}$ .or. ijob==2_${ik}$ .or. ijob==4_${ik}$ ) then
              lwmin = max( 1_${ik}$, 4_${ik}$*n+16, 2_${ik}$*m*( n-m ) )
              liwmin = max( 1_${ik}$, n+6 )
           else if( ijob==3_${ik}$ .or. ijob==5_${ik}$ ) then
              lwmin = max( 1_${ik}$, 4_${ik}$*n+16, 4_${ik}$*m*( n-m ) )
              liwmin = max( 1_${ik}$, 2_${ik}$*m*( n-m ), n+6 )
           else
              lwmin = max( 1_${ik}$, 4_${ik}$*n+16 )
              liwmin = 1_${ik}$
           end if
           work( 1_${ik}$ ) = lwmin
           iwork( 1_${ik}$ ) = liwmin
           if( lwork<lwmin .and. .not.lquery ) then
              info = -22_${ik}$
           else if( liwork<liwmin .and. .not.lquery ) then
              info = -24_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTGSEN', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible.
           if( m==n .or. m==0_${ik}$ ) then
              if( wantp ) then
                 pl = one
                 pr = one
              end if
              if( wantd ) then
                 dscale = zero
                 dsum = one
                 do i = 1, n
                    call stdlib${ii}$_dlassq( n, a( 1_${ik}$, i ), 1_${ik}$, dscale, dsum )
                    call stdlib${ii}$_dlassq( n, b( 1_${ik}$, i ), 1_${ik}$, dscale, dsum )
                 end do
                 dif( 1_${ik}$ ) = dscale*sqrt( dsum )
                 dif( 2_${ik}$ ) = dif( 1_${ik}$ )
              end if
              go to 60
           end if
           ! collect the selected blocks at the top-left corner of (a, b).
           ks = 0_${ik}$
           pair = .false.
           loop_30: do k = 1, n
              if( pair ) then
                 pair = .false.
              else
                 swap = select( k )
                 if( k<n ) then
                    if( a( k+1, k )/=zero ) then
                       pair = .true.
                       swap = swap .or. select( k+1 )
                    end if
                 end if
                 if( swap ) then
                    ks = ks + 1_${ik}$
                    ! swap the k-th block to position ks.
                    ! perform the reordering of diagonal blocks in (a, b)
                    ! by orthogonal transformation matrices and update
                    ! q and z accordingly (if requested):
                    kk = k
                    if( k/=ks )call stdlib${ii}$_dtgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz,&
                               kk, ks, work, lwork, ierr )
                    if( ierr>0_${ik}$ ) then
                       ! swap is rejected: exit.
                       info = 1_${ik}$
                       if( wantp ) then
                          pl = zero
                          pr = zero
                       end if
                       if( wantd ) then
                          dif( 1_${ik}$ ) = zero
                          dif( 2_${ik}$ ) = zero
                       end if
                       go to 60
                    end if
                    if( pair )ks = ks + 1_${ik}$
                 end if
              end if
           end do loop_30
           if( wantp ) then
              ! solve generalized sylvester equation for r and l
              ! and compute pl and pr.
              n1 = m
              n2 = n - m
              i = n1 + 1_${ik}$
              ijb = 0_${ik}$
              call stdlib${ii}$_dlacpy( 'FULL', n1, n2, a( 1_${ik}$, i ), lda, work, n1 )
              call stdlib${ii}$_dlacpy( 'FULL', n1, n2, b( 1_${ik}$, i ), ldb, work( n1*n2+1 ),n1 )
              call stdlib${ii}$_dtgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b( i,&
               i ), ldb, work( n1*n2+1 ), n1,dscale, dif( 1_${ik}$ ), work( n1*n2*2_${ik}$+1 ),lwork-2*n1*n2, &
                         iwork, ierr )
              ! estimate the reciprocal of norms of "projections" onto left
              ! and right eigenspaces.
              rdscal = zero
              dsum = one
              call stdlib${ii}$_dlassq( n1*n2, work, 1_${ik}$, rdscal, dsum )
              pl = rdscal*sqrt( dsum )
              if( pl==zero ) then
                 pl = one
              else
                 pl = dscale / ( sqrt( dscale*dscale / pl+pl )*sqrt( pl ) )
              end if
              rdscal = zero
              dsum = one
              call stdlib${ii}$_dlassq( n1*n2, work( n1*n2+1 ), 1_${ik}$, rdscal, dsum )
              pr = rdscal*sqrt( dsum )
              if( pr==zero ) then
                 pr = one
              else
                 pr = dscale / ( sqrt( dscale*dscale / pr+pr )*sqrt( pr ) )
              end if
           end if
           if( wantd ) then
              ! compute estimates of difu and difl.
              if( wantd1 ) then
                 n1 = m
                 n2 = n - m
                 i = n1 + 1_${ik}$
                 ijb = idifjb
                 ! frobenius norm-based difu-estimate.
                 call stdlib${ii}$_dtgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b(&
                  i, i ), ldb, work( n1*n2+1 ),n1, dscale, dif( 1_${ik}$ ), work( 2_${ik}$*n1*n2+1 ),lwork-&
                            2_${ik}$*n1*n2, iwork, ierr )
                 ! frobenius norm-based difl-estimate.
                 call stdlib${ii}$_dtgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda, work,n2, b( i, i ),&
                  ldb, b, ldb, work( n1*n2+1 ),n2, dscale, dif( 2_${ik}$ ), work( 2_${ik}$*n1*n2+1 ),lwork-&
                            2_${ik}$*n1*n2, iwork, ierr )
              else
                 ! compute 1-norm-based estimates of difu and difl using
                 ! reversed communication with stdlib${ii}$_dlacn2. in each step a
                 ! generalized sylvester equation or a transposed variant
                 ! is solved.
                 kase = 0_${ik}$
                 n1 = m
                 n2 = n - m
                 i = n1 + 1_${ik}$
                 ijb = 0_${ik}$
                 mn2 = 2_${ik}$*n1*n2
                 ! 1-norm-based estimate of difu.
                 40 continue
                 call stdlib${ii}$_dlacn2( mn2, work( mn2+1 ), work, iwork, dif( 1_${ik}$ ),kase, isave )
                           
                 if( kase/=0_${ik}$ ) then
                    if( kase==1_${ik}$ ) then
                       ! solve generalized sylvester equation.
                       call stdlib${ii}$_dtgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, &
                       ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1_${ik}$ ),work( 2_${ik}$*n1*n2+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    else
                       ! solve the transposed variant.
                       call stdlib${ii}$_dtgsyl( 'T', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, &
                       ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1_${ik}$ ),work( 2_${ik}$*n1*n2+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    end if
                    go to 40
                 end if
                 dif( 1_${ik}$ ) = dscale / dif( 1_${ik}$ )
                 ! 1-norm-based estimate of difl.
                 50 continue
                 call stdlib${ii}$_dlacn2( mn2, work( mn2+1 ), work, iwork, dif( 2_${ik}$ ),kase, isave )
                           
                 if( kase/=0_${ik}$ ) then
                    if( kase==1_${ik}$ ) then
                       ! solve generalized sylvester equation.
                       call stdlib${ii}$_dtgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b( &
                       i, i ), ldb, b, ldb,work( n1*n2+1 ), n2, dscale, dif( 2_${ik}$ ),work( 2_${ik}$*n1*n2+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    else
                       ! solve the transposed variant.
                       call stdlib${ii}$_dtgsyl( 'T', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b( &
                       i, i ), ldb, b, ldb,work( n1*n2+1 ), n2, dscale, dif( 2_${ik}$ ),work( 2_${ik}$*n1*n2+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    end if
                    go to 50
                 end if
                 dif( 2_${ik}$ ) = dscale / dif( 2_${ik}$ )
              end if
           end if
           60 continue
           ! compute generalized eigenvalues of reordered pair (a, b) and
           ! normalize the generalized schur form.
           pair = .false.
           loop_80: do k = 1, n
              if( pair ) then
                 pair = .false.
              else
                 if( k<n ) then
                    if( a( k+1, k )/=zero ) then
                       pair = .true.
                    end if
                 end if
                 if( pair ) then
                   ! compute the eigenvalue(s) at position k.
                    work( 1_${ik}$ ) = a( k, k )
                    work( 2_${ik}$ ) = a( k+1, k )
                    work( 3_${ik}$ ) = a( k, k+1 )
                    work( 4_${ik}$ ) = a( k+1, k+1 )
                    work( 5_${ik}$ ) = b( k, k )
                    work( 6_${ik}$ ) = b( k+1, k )
                    work( 7_${ik}$ ) = b( k, k+1 )
                    work( 8_${ik}$ ) = b( k+1, k+1 )
                    call stdlib${ii}$_dlag2( work, 2_${ik}$, work( 5_${ik}$ ), 2_${ik}$, smlnum*eps, beta( k ),beta( k+1 ), &
                              alphar( k ), alphar( k+1 ),alphai( k ) )
                    alphai( k+1 ) = -alphai( k )
                 else
                    if( sign( one, b( k, k ) )<zero ) then
                       ! if b(k,k) is negative, make it positive
                       do i = 1, n
                          a( k, i ) = -a( k, i )
                          b( k, i ) = -b( k, i )
                          if( wantq ) q( i, k ) = -q( i, k )
                       end do
                    end if
                    alphar( k ) = a( k, k )
                    alphai( k ) = zero
                    beta( k ) = b( k, k )
                 end if
              end if
           end do loop_80
           work( 1_${ik}$ ) = lwmin
           iwork( 1_${ik}$ ) = liwmin
           return
     end subroutine stdlib${ii}$_dtgsen

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$tgsen( ijob, wantq, wantz, select, n, a, lda, b, ldb,alphar, alphai, &
     !! DTGSEN: reorders the generalized real Schur decomposition of a real
     !! matrix pair (A, B) (in terms of an orthonormal equivalence trans-
     !! formation Q**T * (A, B) * Z), so that a selected cluster of eigenvalues
     !! appears in the leading diagonal blocks of the upper quasi-triangular
     !! matrix A and the upper triangular B. The leading columns of Q and
     !! Z form orthonormal bases of the corresponding left and right eigen-
     !! spaces (deflating subspaces). (A, B) must be in generalized real
     !! Schur canonical form (as returned by DGGES), i.e. A is block upper
     !! triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper
     !! triangular.
     !! DTGSEN also computes the generalized eigenvalues
     !! w(j) = (ALPHAR(j) + i*ALPHAI(j))/BETA(j)
     !! of the reordered matrix pair (A, B).
     !! Optionally, DTGSEN computes the estimates of reciprocal condition
     !! numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11),
     !! (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s)
     !! between the matrix pairs (A11, B11) and (A22,B22) that correspond to
     !! the selected cluster and the eigenvalues outside the cluster, resp.,
     !! and norms of "projections" onto left and right eigenspaces w.r.t.
     !! the selected cluster in the (1,1)-block.
               beta, q, ldq, z, ldz, m, pl,pr, dif, work, lwork, iwork, liwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: wantq, wantz
           integer(${ik}$), intent(in) :: ijob, lda, ldb, ldq, ldz, liwork, lwork, n
           integer(${ik}$), intent(out) :: info, m
           real(${rk}$), intent(out) :: pl, pr
           ! Array Arguments 
           logical(lk), intent(in) :: select(*)
           integer(${ik}$), intent(out) :: iwork(*)
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
           real(${rk}$), intent(out) :: alphai(*), alphar(*), beta(*), dif(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: idifjb = 3_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: lquery, pair, swap, wantd, wantd1, wantd2, wantp
           integer(${ik}$) :: i, ierr, ijb, k, kase, kk, ks, liwmin, lwmin, mn2, n1, n2
           real(${rk}$) :: dscale, dsum, eps, rdscal, smlnum
           ! Local Arrays 
           integer(${ik}$) :: isave(3_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ .or. liwork==-1_${ik}$ )
           if( ijob<0_${ik}$ .or. ijob>5_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -9_${ik}$
           else if( ldq<1_${ik}$ .or. ( wantq .and. ldq<n ) ) then
              info = -14_${ik}$
           else if( ldz<1_${ik}$ .or. ( wantz .and. ldz<n ) ) then
              info = -16_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTGSEN', -info )
              return
           end if
           ! get machine constants
           eps = stdlib${ii}$_${ri}$lamch( 'P' )
           smlnum = stdlib${ii}$_${ri}$lamch( 'S' ) / eps
           ierr = 0_${ik}$
           wantp = ijob==1_${ik}$ .or. ijob>=4_${ik}$
           wantd1 = ijob==2_${ik}$ .or. ijob==4_${ik}$
           wantd2 = ijob==3_${ik}$ .or. ijob==5_${ik}$
           wantd = wantd1 .or. wantd2
           ! set m to the dimension of the specified pair of deflating
           ! subspaces.
           m = 0_${ik}$
           pair = .false.
           if( .not.lquery .or. ijob/=0_${ik}$ ) then
           do k = 1, n
              if( pair ) then
                 pair = .false.
              else
                 if( k<n ) then
                    if( a( k+1, k )==zero ) then
                       if( select( k ) )m = m + 1_${ik}$
                    else
                       pair = .true.
                       if( select( k ) .or. select( k+1 ) )m = m + 2_${ik}$
                    end if
                 else
                    if( select( n ) )m = m + 1_${ik}$
                 end if
              end if
           end do
           end if
           if( ijob==1_${ik}$ .or. ijob==2_${ik}$ .or. ijob==4_${ik}$ ) then
              lwmin = max( 1_${ik}$, 4_${ik}$*n+16, 2_${ik}$*m*( n-m ) )
              liwmin = max( 1_${ik}$, n+6 )
           else if( ijob==3_${ik}$ .or. ijob==5_${ik}$ ) then
              lwmin = max( 1_${ik}$, 4_${ik}$*n+16, 4_${ik}$*m*( n-m ) )
              liwmin = max( 1_${ik}$, 2_${ik}$*m*( n-m ), n+6 )
           else
              lwmin = max( 1_${ik}$, 4_${ik}$*n+16 )
              liwmin = 1_${ik}$
           end if
           work( 1_${ik}$ ) = lwmin
           iwork( 1_${ik}$ ) = liwmin
           if( lwork<lwmin .and. .not.lquery ) then
              info = -22_${ik}$
           else if( liwork<liwmin .and. .not.lquery ) then
              info = -24_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTGSEN', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible.
           if( m==n .or. m==0_${ik}$ ) then
              if( wantp ) then
                 pl = one
                 pr = one
              end if
              if( wantd ) then
                 dscale = zero
                 dsum = one
                 do i = 1, n
                    call stdlib${ii}$_${ri}$lassq( n, a( 1_${ik}$, i ), 1_${ik}$, dscale, dsum )
                    call stdlib${ii}$_${ri}$lassq( n, b( 1_${ik}$, i ), 1_${ik}$, dscale, dsum )
                 end do
                 dif( 1_${ik}$ ) = dscale*sqrt( dsum )
                 dif( 2_${ik}$ ) = dif( 1_${ik}$ )
              end if
              go to 60
           end if
           ! collect the selected blocks at the top-left corner of (a, b).
           ks = 0_${ik}$
           pair = .false.
           loop_30: do k = 1, n
              if( pair ) then
                 pair = .false.
              else
                 swap = select( k )
                 if( k<n ) then
                    if( a( k+1, k )/=zero ) then
                       pair = .true.
                       swap = swap .or. select( k+1 )
                    end if
                 end if
                 if( swap ) then
                    ks = ks + 1_${ik}$
                    ! swap the k-th block to position ks.
                    ! perform the reordering of diagonal blocks in (a, b)
                    ! by orthogonal transformation matrices and update
                    ! q and z accordingly (if requested):
                    kk = k
                    if( k/=ks )call stdlib${ii}$_${ri}$tgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz,&
                               kk, ks, work, lwork, ierr )
                    if( ierr>0_${ik}$ ) then
                       ! swap is rejected: exit.
                       info = 1_${ik}$
                       if( wantp ) then
                          pl = zero
                          pr = zero
                       end if
                       if( wantd ) then
                          dif( 1_${ik}$ ) = zero
                          dif( 2_${ik}$ ) = zero
                       end if
                       go to 60
                    end if
                    if( pair )ks = ks + 1_${ik}$
                 end if
              end if
           end do loop_30
           if( wantp ) then
              ! solve generalized sylvester equation for r and l
              ! and compute pl and pr.
              n1 = m
              n2 = n - m
              i = n1 + 1_${ik}$
              ijb = 0_${ik}$
              call stdlib${ii}$_${ri}$lacpy( 'FULL', n1, n2, a( 1_${ik}$, i ), lda, work, n1 )
              call stdlib${ii}$_${ri}$lacpy( 'FULL', n1, n2, b( 1_${ik}$, i ), ldb, work( n1*n2+1 ),n1 )
              call stdlib${ii}$_${ri}$tgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b( i,&
               i ), ldb, work( n1*n2+1 ), n1,dscale, dif( 1_${ik}$ ), work( n1*n2*2_${ik}$+1 ),lwork-2*n1*n2, &
                         iwork, ierr )
              ! estimate the reciprocal of norms of "projections" onto left
              ! and right eigenspaces.
              rdscal = zero
              dsum = one
              call stdlib${ii}$_${ri}$lassq( n1*n2, work, 1_${ik}$, rdscal, dsum )
              pl = rdscal*sqrt( dsum )
              if( pl==zero ) then
                 pl = one
              else
                 pl = dscale / ( sqrt( dscale*dscale / pl+pl )*sqrt( pl ) )
              end if
              rdscal = zero
              dsum = one
              call stdlib${ii}$_${ri}$lassq( n1*n2, work( n1*n2+1 ), 1_${ik}$, rdscal, dsum )
              pr = rdscal*sqrt( dsum )
              if( pr==zero ) then
                 pr = one
              else
                 pr = dscale / ( sqrt( dscale*dscale / pr+pr )*sqrt( pr ) )
              end if
           end if
           if( wantd ) then
              ! compute estimates of difu and difl.
              if( wantd1 ) then
                 n1 = m
                 n2 = n - m
                 i = n1 + 1_${ik}$
                 ijb = idifjb
                 ! frobenius norm-based difu-estimate.
                 call stdlib${ii}$_${ri}$tgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b(&
                  i, i ), ldb, work( n1*n2+1 ),n1, dscale, dif( 1_${ik}$ ), work( 2_${ik}$*n1*n2+1 ),lwork-&
                            2_${ik}$*n1*n2, iwork, ierr )
                 ! frobenius norm-based difl-estimate.
                 call stdlib${ii}$_${ri}$tgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda, work,n2, b( i, i ),&
                  ldb, b, ldb, work( n1*n2+1 ),n2, dscale, dif( 2_${ik}$ ), work( 2_${ik}$*n1*n2+1 ),lwork-&
                            2_${ik}$*n1*n2, iwork, ierr )
              else
                 ! compute 1-norm-based estimates of difu and difl using
                 ! reversed communication with stdlib${ii}$_${ri}$lacn2. in each step a
                 ! generalized sylvester equation or a transposed variant
                 ! is solved.
                 kase = 0_${ik}$
                 n1 = m
                 n2 = n - m
                 i = n1 + 1_${ik}$
                 ijb = 0_${ik}$
                 mn2 = 2_${ik}$*n1*n2
                 ! 1-norm-based estimate of difu.
                 40 continue
                 call stdlib${ii}$_${ri}$lacn2( mn2, work( mn2+1 ), work, iwork, dif( 1_${ik}$ ),kase, isave )
                           
                 if( kase/=0_${ik}$ ) then
                    if( kase==1_${ik}$ ) then
                       ! solve generalized sylvester equation.
                       call stdlib${ii}$_${ri}$tgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, &
                       ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1_${ik}$ ),work( 2_${ik}$*n1*n2+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    else
                       ! solve the transposed variant.
                       call stdlib${ii}$_${ri}$tgsyl( 'T', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, &
                       ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1_${ik}$ ),work( 2_${ik}$*n1*n2+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    end if
                    go to 40
                 end if
                 dif( 1_${ik}$ ) = dscale / dif( 1_${ik}$ )
                 ! 1-norm-based estimate of difl.
                 50 continue
                 call stdlib${ii}$_${ri}$lacn2( mn2, work( mn2+1 ), work, iwork, dif( 2_${ik}$ ),kase, isave )
                           
                 if( kase/=0_${ik}$ ) then
                    if( kase==1_${ik}$ ) then
                       ! solve generalized sylvester equation.
                       call stdlib${ii}$_${ri}$tgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b( &
                       i, i ), ldb, b, ldb,work( n1*n2+1 ), n2, dscale, dif( 2_${ik}$ ),work( 2_${ik}$*n1*n2+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    else
                       ! solve the transposed variant.
                       call stdlib${ii}$_${ri}$tgsyl( 'T', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b( &
                       i, i ), ldb, b, ldb,work( n1*n2+1 ), n2, dscale, dif( 2_${ik}$ ),work( 2_${ik}$*n1*n2+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    end if
                    go to 50
                 end if
                 dif( 2_${ik}$ ) = dscale / dif( 2_${ik}$ )
              end if
           end if
           60 continue
           ! compute generalized eigenvalues of reordered pair (a, b) and
           ! normalize the generalized schur form.
           pair = .false.
           loop_80: do k = 1, n
              if( pair ) then
                 pair = .false.
              else
                 if( k<n ) then
                    if( a( k+1, k )/=zero ) then
                       pair = .true.
                    end if
                 end if
                 if( pair ) then
                   ! compute the eigenvalue(s) at position k.
                    work( 1_${ik}$ ) = a( k, k )
                    work( 2_${ik}$ ) = a( k+1, k )
                    work( 3_${ik}$ ) = a( k, k+1 )
                    work( 4_${ik}$ ) = a( k+1, k+1 )
                    work( 5_${ik}$ ) = b( k, k )
                    work( 6_${ik}$ ) = b( k+1, k )
                    work( 7_${ik}$ ) = b( k, k+1 )
                    work( 8_${ik}$ ) = b( k+1, k+1 )
                    call stdlib${ii}$_${ri}$lag2( work, 2_${ik}$, work( 5_${ik}$ ), 2_${ik}$, smlnum*eps, beta( k ),beta( k+1 ), &
                              alphar( k ), alphar( k+1 ),alphai( k ) )
                    alphai( k+1 ) = -alphai( k )
                 else
                    if( sign( one, b( k, k ) )<zero ) then
                       ! if b(k,k) is negative, make it positive
                       do i = 1, n
                          a( k, i ) = -a( k, i )
                          b( k, i ) = -b( k, i )
                          if( wantq ) q( i, k ) = -q( i, k )
                       end do
                    end if
                    alphar( k ) = a( k, k )
                    alphai( k ) = zero
                    beta( k ) = b( k, k )
                 end if
              end if
           end do loop_80
           work( 1_${ik}$ ) = lwmin
           iwork( 1_${ik}$ ) = liwmin
           return
     end subroutine stdlib${ii}$_${ri}$tgsen

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_ctgsen( ijob, wantq, wantz, select, n, a, lda, b, ldb,alpha, beta, q, &
     !! CTGSEN reorders the generalized Schur decomposition of a complex
     !! matrix pair (A, B) (in terms of an unitary equivalence trans-
     !! formation Q**H * (A, B) * Z), so that a selected cluster of eigenvalues
     !! appears in the leading diagonal blocks of the pair (A,B). The leading
     !! columns of Q and Z form unitary bases of the corresponding left and
     !! right eigenspaces (deflating subspaces). (A, B) must be in
     !! generalized Schur canonical form, that is, A and B are both upper
     !! triangular.
     !! CTGSEN also computes the generalized eigenvalues
     !! w(j)= ALPHA(j) / BETA(j)
     !! of the reordered matrix pair (A, B).
     !! Optionally, the routine computes estimates of reciprocal condition
     !! numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11),
     !! (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s)
     !! between the matrix pairs (A11, B11) and (A22,B22) that correspond to
     !! the selected cluster and the eigenvalues outside the cluster, resp.,
     !! and norms of "projections" onto left and right eigenspaces w.r.t.
     !! the selected cluster in the (1,1)-block.
               ldq, z, ldz, m, pl, pr, dif,work, lwork, iwork, liwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: wantq, wantz
           integer(${ik}$), intent(in) :: ijob, lda, ldb, ldq, ldz, liwork, lwork, n
           integer(${ik}$), intent(out) :: info, m
           real(sp), intent(out) :: pl, pr
           ! Array Arguments 
           logical(lk), intent(in) :: select(*)
           integer(${ik}$), intent(out) :: iwork(*)
           real(sp), intent(out) :: dif(*)
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
           complex(sp), intent(out) :: alpha(*), beta(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: idifjb = 3_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: lquery, swap, wantd, wantd1, wantd2, wantp
           integer(${ik}$) :: i, ierr, ijb, k, kase, ks, liwmin, lwmin, mn2, n1, n2
           real(sp) :: dscale, dsum, rdscal, safmin
           complex(sp) :: temp1, temp2
           ! Local Arrays 
           integer(${ik}$) :: isave(3_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ .or. liwork==-1_${ik}$ )
           if( ijob<0_${ik}$ .or. ijob>5_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -9_${ik}$
           else if( ldq<1_${ik}$ .or. ( wantq .and. ldq<n ) ) then
              info = -13_${ik}$
           else if( ldz<1_${ik}$ .or. ( wantz .and. ldz<n ) ) then
              info = -15_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CTGSEN', -info )
              return
           end if
           ierr = 0_${ik}$
           wantp = ijob==1_${ik}$ .or. ijob>=4_${ik}$
           wantd1 = ijob==2_${ik}$ .or. ijob==4_${ik}$
           wantd2 = ijob==3_${ik}$ .or. ijob==5_${ik}$
           wantd = wantd1 .or. wantd2
           ! set m to the dimension of the specified pair of deflating
           ! subspaces.
           m = 0_${ik}$
           if( .not.lquery .or. ijob/=0_${ik}$ ) then
           do k = 1, n
              alpha( k ) = a( k, k )
              beta( k ) = b( k, k )
              if( k<n ) then
                 if( select( k ) )m = m + 1_${ik}$
              else
                 if( select( n ) )m = m + 1_${ik}$
              end if
           end do
           end if
           if( ijob==1_${ik}$ .or. ijob==2_${ik}$ .or. ijob==4_${ik}$ ) then
              lwmin = max( 1_${ik}$, 2_${ik}$*m*(n-m) )
              liwmin = max( 1_${ik}$, n+2 )
           else if( ijob==3_${ik}$ .or. ijob==5_${ik}$ ) then
              lwmin = max( 1_${ik}$, 4_${ik}$*m*(n-m) )
              liwmin = max( 1_${ik}$, 2_${ik}$*m*(n-m), n+2 )
           else
              lwmin = 1_${ik}$
              liwmin = 1_${ik}$
           end if
           work( 1_${ik}$ ) = lwmin
           iwork( 1_${ik}$ ) = liwmin
           if( lwork<lwmin .and. .not.lquery ) then
              info = -21_${ik}$
           else if( liwork<liwmin .and. .not.lquery ) then
              info = -23_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CTGSEN', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible.
           if( m==n .or. m==0_${ik}$ ) then
              if( wantp ) then
                 pl = one
                 pr = one
              end if
              if( wantd ) then
                 dscale = zero
                 dsum = one
                 do i = 1, n
                    call stdlib${ii}$_classq( n, a( 1_${ik}$, i ), 1_${ik}$, dscale, dsum )
                    call stdlib${ii}$_classq( n, b( 1_${ik}$, i ), 1_${ik}$, dscale, dsum )
                 end do
                 dif( 1_${ik}$ ) = dscale*sqrt( dsum )
                 dif( 2_${ik}$ ) = dif( 1_${ik}$ )
              end if
              go to 70
           end if
           ! get machine constant
           safmin = stdlib${ii}$_slamch( 'S' )
           ! collect the selected blocks at the top-left corner of (a, b).
           ks = 0_${ik}$
           do k = 1, n
              swap = select( k )
              if( swap ) then
                 ks = ks + 1_${ik}$
                 ! swap the k-th block to position ks. compute unitary q
                 ! and z that will swap adjacent diagonal blocks in (a, b).
                 if( k/=ks )call stdlib${ii}$_ctgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, k,&
                            ks, ierr )
                 if( ierr>0_${ik}$ ) then
                    ! swap is rejected: exit.
                    info = 1_${ik}$
                    if( wantp ) then
                       pl = zero
                       pr = zero
                    end if
                    if( wantd ) then
                       dif( 1_${ik}$ ) = zero
                       dif( 2_${ik}$ ) = zero
                    end if
                    go to 70
                 end if
              end if
           end do
           if( wantp ) then
              ! solve generalized sylvester equation for r and l:
                         ! a11 * r - l * a22 = a12
                         ! b11 * r - l * b22 = b12
              n1 = m
              n2 = n - m
              i = n1 + 1_${ik}$
              call stdlib${ii}$_clacpy( 'FULL', n1, n2, a( 1_${ik}$, i ), lda, work, n1 )
              call stdlib${ii}$_clacpy( 'FULL', n1, n2, b( 1_${ik}$, i ), ldb, work( n1*n2+1 ),n1 )
              ijb = 0_${ik}$
              call stdlib${ii}$_ctgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b( i,&
               i ), ldb, work( n1*n2+1 ), n1,dscale, dif( 1_${ik}$ ), work( n1*n2*2_${ik}$+1 ),lwork-2*n1*n2, &
                         iwork, ierr )
              ! estimate the reciprocal of norms of "projections" onto
              ! left and right eigenspaces
              rdscal = zero
              dsum = one
              call stdlib${ii}$_classq( n1*n2, work, 1_${ik}$, rdscal, dsum )
              pl = rdscal*sqrt( dsum )
              if( pl==zero ) then
                 pl = one
              else
                 pl = dscale / ( sqrt( dscale*dscale / pl+pl )*sqrt( pl ) )
              end if
              rdscal = zero
              dsum = one
              call stdlib${ii}$_classq( n1*n2, work( n1*n2+1 ), 1_${ik}$, rdscal, dsum )
              pr = rdscal*sqrt( dsum )
              if( pr==zero ) then
                 pr = one
              else
                 pr = dscale / ( sqrt( dscale*dscale / pr+pr )*sqrt( pr ) )
              end if
           end if
           if( wantd ) then
              ! compute estimates difu and difl.
              if( wantd1 ) then
                 n1 = m
                 n2 = n - m
                 i = n1 + 1_${ik}$
                 ijb = idifjb
                 ! frobenius norm-based difu estimate.
                 call stdlib${ii}$_ctgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b(&
                  i, i ), ldb, work( n1*n2+1 ),n1, dscale, dif( 1_${ik}$ ), work( n1*n2*2_${ik}$+1 ),lwork-&
                            2_${ik}$*n1*n2, iwork, ierr )
                 ! frobenius norm-based difl estimate.
                 call stdlib${ii}$_ctgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda, work,n2, b( i, i ),&
                  ldb, b, ldb, work( n1*n2+1 ),n2, dscale, dif( 2_${ik}$ ), work( n1*n2*2_${ik}$+1 ),lwork-&
                            2_${ik}$*n1*n2, iwork, ierr )
              else
                 ! compute 1-norm-based estimates of difu and difl using
                 ! reversed communication with stdlib${ii}$_clacn2. in each step a
                 ! generalized sylvester equation or a transposed variant
                 ! is solved.
                 kase = 0_${ik}$
                 n1 = m
                 n2 = n - m
                 i = n1 + 1_${ik}$
                 ijb = 0_${ik}$
                 mn2 = 2_${ik}$*n1*n2
                 ! 1-norm-based estimate of difu.
                 40 continue
                 call stdlib${ii}$_clacn2( mn2, work( mn2+1 ), work, dif( 1_${ik}$ ), kase,isave )
                 if( kase/=0_${ik}$ ) then
                    if( kase==1_${ik}$ ) then
                       ! solve generalized sylvester equation
                       call stdlib${ii}$_ctgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, &
                       ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1_${ik}$ ),work( n1*n2*2_${ik}$+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    else
                       ! solve the transposed variant.
                       call stdlib${ii}$_ctgsyl( 'C', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, &
                       ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1_${ik}$ ),work( n1*n2*2_${ik}$+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    end if
                    go to 40
                 end if
                 dif( 1_${ik}$ ) = dscale / dif( 1_${ik}$ )
                 ! 1-norm-based estimate of difl.
                 50 continue
                 call stdlib${ii}$_clacn2( mn2, work( mn2+1 ), work, dif( 2_${ik}$ ), kase,isave )
                 if( kase/=0_${ik}$ ) then
                    if( kase==1_${ik}$ ) then
                       ! solve generalized sylvester equation
                       call stdlib${ii}$_ctgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b( &
                       i, i ), ldb, b, ldb,work( n1*n2+1 ), n2, dscale, dif( 2_${ik}$ ),work( n1*n2*2_${ik}$+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    else
                       ! solve the transposed variant.
                       call stdlib${ii}$_ctgsyl( 'C', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b, &
                       ldb, b( i, i ), ldb,work( n1*n2+1 ), n2, dscale, dif( 2_${ik}$ ),work( n1*n2*2_${ik}$+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    end if
                    go to 50
                 end if
                 dif( 2_${ik}$ ) = dscale / dif( 2_${ik}$ )
              end if
           end if
           ! if b(k,k) is complex, make it real and positive (normalization
           ! of the generalized schur form) and store the generalized
           ! eigenvalues of reordered pair (a, b)
           do k = 1, n
              dscale = abs( b( k, k ) )
              if( dscale>safmin ) then
                 temp1 = conjg( b( k, k ) / dscale )
                 temp2 = b( k, k ) / dscale
                 b( k, k ) = dscale
                 call stdlib${ii}$_cscal( n-k, temp1, b( k, k+1 ), ldb )
                 call stdlib${ii}$_cscal( n-k+1, temp1, a( k, k ), lda )
                 if( wantq )call stdlib${ii}$_cscal( n, temp2, q( 1_${ik}$, k ), 1_${ik}$ )
              else
                 b( k, k ) = cmplx( zero, zero,KIND=sp)
              end if
              alpha( k ) = a( k, k )
              beta( k ) = b( k, k )
           end do
           70 continue
           work( 1_${ik}$ ) = lwmin
           iwork( 1_${ik}$ ) = liwmin
           return
     end subroutine stdlib${ii}$_ctgsen

     pure module subroutine stdlib${ii}$_ztgsen( ijob, wantq, wantz, select, n, a, lda, b, ldb,alpha, beta, q, &
     !! ZTGSEN reorders the generalized Schur decomposition of a complex
     !! matrix pair (A, B) (in terms of an unitary equivalence trans-
     !! formation Q**H * (A, B) * Z), so that a selected cluster of eigenvalues
     !! appears in the leading diagonal blocks of the pair (A,B). The leading
     !! columns of Q and Z form unitary bases of the corresponding left and
     !! right eigenspaces (deflating subspaces). (A, B) must be in
     !! generalized Schur canonical form, that is, A and B are both upper
     !! triangular.
     !! ZTGSEN also computes the generalized eigenvalues
     !! w(j)= ALPHA(j) / BETA(j)
     !! of the reordered matrix pair (A, B).
     !! Optionally, the routine computes estimates of reciprocal condition
     !! numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11),
     !! (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s)
     !! between the matrix pairs (A11, B11) and (A22,B22) that correspond to
     !! the selected cluster and the eigenvalues outside the cluster, resp.,
     !! and norms of "projections" onto left and right eigenspaces w.r.t.
     !! the selected cluster in the (1,1)-block.
               ldq, z, ldz, m, pl, pr, dif,work, lwork, iwork, liwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: wantq, wantz
           integer(${ik}$), intent(in) :: ijob, lda, ldb, ldq, ldz, liwork, lwork, n
           integer(${ik}$), intent(out) :: info, m
           real(dp), intent(out) :: pl, pr
           ! Array Arguments 
           logical(lk), intent(in) :: select(*)
           integer(${ik}$), intent(out) :: iwork(*)
           real(dp), intent(out) :: dif(*)
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
           complex(dp), intent(out) :: alpha(*), beta(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: idifjb = 3_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: lquery, swap, wantd, wantd1, wantd2, wantp
           integer(${ik}$) :: i, ierr, ijb, k, kase, ks, liwmin, lwmin, mn2, n1, n2
           real(dp) :: dscale, dsum, rdscal, safmin
           complex(dp) :: temp1, temp2
           ! Local Arrays 
           integer(${ik}$) :: isave(3_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ .or. liwork==-1_${ik}$ )
           if( ijob<0_${ik}$ .or. ijob>5_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -9_${ik}$
           else if( ldq<1_${ik}$ .or. ( wantq .and. ldq<n ) ) then
              info = -13_${ik}$
           else if( ldz<1_${ik}$ .or. ( wantz .and. ldz<n ) ) then
              info = -15_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTGSEN', -info )
              return
           end if
           ierr = 0_${ik}$
           wantp = ijob==1_${ik}$ .or. ijob>=4_${ik}$
           wantd1 = ijob==2_${ik}$ .or. ijob==4_${ik}$
           wantd2 = ijob==3_${ik}$ .or. ijob==5_${ik}$
           wantd = wantd1 .or. wantd2
           ! set m to the dimension of the specified pair of deflating
           ! subspaces.
           m = 0_${ik}$
           if( .not.lquery .or. ijob/=0_${ik}$ ) then
           do k = 1, n
              alpha( k ) = a( k, k )
              beta( k ) = b( k, k )
              if( k<n ) then
                 if( select( k ) )m = m + 1_${ik}$
              else
                 if( select( n ) )m = m + 1_${ik}$
              end if
           end do
           end if
           if( ijob==1_${ik}$ .or. ijob==2_${ik}$ .or. ijob==4_${ik}$ ) then
              lwmin = max( 1_${ik}$, 2_${ik}$*m*( n-m ) )
              liwmin = max( 1_${ik}$, n+2 )
           else if( ijob==3_${ik}$ .or. ijob==5_${ik}$ ) then
              lwmin = max( 1_${ik}$, 4_${ik}$*m*( n-m ) )
              liwmin = max( 1_${ik}$, 2_${ik}$*m*( n-m ), n+2 )
           else
              lwmin = 1_${ik}$
              liwmin = 1_${ik}$
           end if
           work( 1_${ik}$ ) = lwmin
           iwork( 1_${ik}$ ) = liwmin
           if( lwork<lwmin .and. .not.lquery ) then
              info = -21_${ik}$
           else if( liwork<liwmin .and. .not.lquery ) then
              info = -23_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTGSEN', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible.
           if( m==n .or. m==0_${ik}$ ) then
              if( wantp ) then
                 pl = one
                 pr = one
              end if
              if( wantd ) then
                 dscale = zero
                 dsum = one
                 do i = 1, n
                    call stdlib${ii}$_zlassq( n, a( 1_${ik}$, i ), 1_${ik}$, dscale, dsum )
                    call stdlib${ii}$_zlassq( n, b( 1_${ik}$, i ), 1_${ik}$, dscale, dsum )
                 end do
                 dif( 1_${ik}$ ) = dscale*sqrt( dsum )
                 dif( 2_${ik}$ ) = dif( 1_${ik}$ )
              end if
              go to 70
           end if
           ! get machine constant
           safmin = stdlib${ii}$_dlamch( 'S' )
           ! collect the selected blocks at the top-left corner of (a, b).
           ks = 0_${ik}$
           do k = 1, n
              swap = select( k )
              if( swap ) then
                 ks = ks + 1_${ik}$
                 ! swap the k-th block to position ks. compute unitary q
                 ! and z that will swap adjacent diagonal blocks in (a, b).
                 if( k/=ks )call stdlib${ii}$_ztgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, k,&
                            ks, ierr )
                 if( ierr>0_${ik}$ ) then
                    ! swap is rejected: exit.
                    info = 1_${ik}$
                    if( wantp ) then
                       pl = zero
                       pr = zero
                    end if
                    if( wantd ) then
                       dif( 1_${ik}$ ) = zero
                       dif( 2_${ik}$ ) = zero
                    end if
                    go to 70
                 end if
              end if
           end do
           if( wantp ) then
              ! solve generalized sylvester equation for r and l:
                         ! a11 * r - l * a22 = a12
                         ! b11 * r - l * b22 = b12
              n1 = m
              n2 = n - m
              i = n1 + 1_${ik}$
              call stdlib${ii}$_zlacpy( 'FULL', n1, n2, a( 1_${ik}$, i ), lda, work, n1 )
              call stdlib${ii}$_zlacpy( 'FULL', n1, n2, b( 1_${ik}$, i ), ldb, work( n1*n2+1 ),n1 )
              ijb = 0_${ik}$
              call stdlib${ii}$_ztgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b( i,&
               i ), ldb, work( n1*n2+1 ), n1,dscale, dif( 1_${ik}$ ), work( n1*n2*2_${ik}$+1 ),lwork-2*n1*n2, &
                         iwork, ierr )
              ! estimate the reciprocal of norms of "projections" onto
              ! left and right eigenspaces
              rdscal = zero
              dsum = one
              call stdlib${ii}$_zlassq( n1*n2, work, 1_${ik}$, rdscal, dsum )
              pl = rdscal*sqrt( dsum )
              if( pl==zero ) then
                 pl = one
              else
                 pl = dscale / ( sqrt( dscale*dscale / pl+pl )*sqrt( pl ) )
              end if
              rdscal = zero
              dsum = one
              call stdlib${ii}$_zlassq( n1*n2, work( n1*n2+1 ), 1_${ik}$, rdscal, dsum )
              pr = rdscal*sqrt( dsum )
              if( pr==zero ) then
                 pr = one
              else
                 pr = dscale / ( sqrt( dscale*dscale / pr+pr )*sqrt( pr ) )
              end if
           end if
           if( wantd ) then
              ! compute estimates difu and difl.
              if( wantd1 ) then
                 n1 = m
                 n2 = n - m
                 i = n1 + 1_${ik}$
                 ijb = idifjb
                 ! frobenius norm-based difu estimate.
                 call stdlib${ii}$_ztgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b(&
                  i, i ), ldb, work( n1*n2+1 ),n1, dscale, dif( 1_${ik}$ ), work( n1*n2*2_${ik}$+1 ),lwork-&
                            2_${ik}$*n1*n2, iwork, ierr )
                 ! frobenius norm-based difl estimate.
                 call stdlib${ii}$_ztgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda, work,n2, b( i, i ),&
                  ldb, b, ldb, work( n1*n2+1 ),n2, dscale, dif( 2_${ik}$ ), work( n1*n2*2_${ik}$+1 ),lwork-&
                            2_${ik}$*n1*n2, iwork, ierr )
              else
                 ! compute 1-norm-based estimates of difu and difl using
                 ! reversed communication with stdlib${ii}$_zlacn2. in each step a
                 ! generalized sylvester equation or a transposed variant
                 ! is solved.
                 kase = 0_${ik}$
                 n1 = m
                 n2 = n - m
                 i = n1 + 1_${ik}$
                 ijb = 0_${ik}$
                 mn2 = 2_${ik}$*n1*n2
                 ! 1-norm-based estimate of difu.
                 40 continue
                 call stdlib${ii}$_zlacn2( mn2, work( mn2+1 ), work, dif( 1_${ik}$ ), kase,isave )
                 if( kase/=0_${ik}$ ) then
                    if( kase==1_${ik}$ ) then
                       ! solve generalized sylvester equation
                       call stdlib${ii}$_ztgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, &
                       ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1_${ik}$ ),work( n1*n2*2_${ik}$+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    else
                       ! solve the transposed variant.
                       call stdlib${ii}$_ztgsyl( 'C', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, &
                       ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1_${ik}$ ),work( n1*n2*2_${ik}$+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    end if
                    go to 40
                 end if
                 dif( 1_${ik}$ ) = dscale / dif( 1_${ik}$ )
                 ! 1-norm-based estimate of difl.
                 50 continue
                 call stdlib${ii}$_zlacn2( mn2, work( mn2+1 ), work, dif( 2_${ik}$ ), kase,isave )
                 if( kase/=0_${ik}$ ) then
                    if( kase==1_${ik}$ ) then
                       ! solve generalized sylvester equation
                       call stdlib${ii}$_ztgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b( &
                       i, i ), ldb, b, ldb,work( n1*n2+1 ), n2, dscale, dif( 2_${ik}$ ),work( n1*n2*2_${ik}$+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    else
                       ! solve the transposed variant.
                       call stdlib${ii}$_ztgsyl( 'C', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b, &
                       ldb, b( i, i ), ldb,work( n1*n2+1 ), n2, dscale, dif( 2_${ik}$ ),work( n1*n2*2_${ik}$+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    end if
                    go to 50
                 end if
                 dif( 2_${ik}$ ) = dscale / dif( 2_${ik}$ )
              end if
           end if
           ! if b(k,k) is complex, make it real and positive (normalization
           ! of the generalized schur form) and store the generalized
           ! eigenvalues of reordered pair (a, b)
           do k = 1, n
              dscale = abs( b( k, k ) )
              if( dscale>safmin ) then
                 temp1 = conjg( b( k, k ) / dscale )
                 temp2 = b( k, k ) / dscale
                 b( k, k ) = dscale
                 call stdlib${ii}$_zscal( n-k, temp1, b( k, k+1 ), ldb )
                 call stdlib${ii}$_zscal( n-k+1, temp1, a( k, k ), lda )
                 if( wantq )call stdlib${ii}$_zscal( n, temp2, q( 1_${ik}$, k ), 1_${ik}$ )
              else
                 b( k, k ) = cmplx( zero, zero,KIND=dp)
              end if
              alpha( k ) = a( k, k )
              beta( k ) = b( k, k )
           end do
           70 continue
           work( 1_${ik}$ ) = lwmin
           iwork( 1_${ik}$ ) = liwmin
           return
     end subroutine stdlib${ii}$_ztgsen

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$tgsen( ijob, wantq, wantz, select, n, a, lda, b, ldb,alpha, beta, q, &
     !! ZTGSEN: reorders the generalized Schur decomposition of a complex
     !! matrix pair (A, B) (in terms of an unitary equivalence trans-
     !! formation Q**H * (A, B) * Z), so that a selected cluster of eigenvalues
     !! appears in the leading diagonal blocks of the pair (A,B). The leading
     !! columns of Q and Z form unitary bases of the corresponding left and
     !! right eigenspaces (deflating subspaces). (A, B) must be in
     !! generalized Schur canonical form, that is, A and B are both upper
     !! triangular.
     !! ZTGSEN also computes the generalized eigenvalues
     !! w(j)= ALPHA(j) / BETA(j)
     !! of the reordered matrix pair (A, B).
     !! Optionally, the routine computes estimates of reciprocal condition
     !! numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11),
     !! (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s)
     !! between the matrix pairs (A11, B11) and (A22,B22) that correspond to
     !! the selected cluster and the eigenvalues outside the cluster, resp.,
     !! and norms of "projections" onto left and right eigenspaces w.r.t.
     !! the selected cluster in the (1,1)-block.
               ldq, z, ldz, m, pl, pr, dif,work, lwork, iwork, liwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: wantq, wantz
           integer(${ik}$), intent(in) :: ijob, lda, ldb, ldq, ldz, liwork, lwork, n
           integer(${ik}$), intent(out) :: info, m
           real(${ck}$), intent(out) :: pl, pr
           ! Array Arguments 
           logical(lk), intent(in) :: select(*)
           integer(${ik}$), intent(out) :: iwork(*)
           real(${ck}$), intent(out) :: dif(*)
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
           complex(${ck}$), intent(out) :: alpha(*), beta(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: idifjb = 3_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: lquery, swap, wantd, wantd1, wantd2, wantp
           integer(${ik}$) :: i, ierr, ijb, k, kase, ks, liwmin, lwmin, mn2, n1, n2
           real(${ck}$) :: dscale, dsum, rdscal, safmin
           complex(${ck}$) :: temp1, temp2
           ! Local Arrays 
           integer(${ik}$) :: isave(3_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ .or. liwork==-1_${ik}$ )
           if( ijob<0_${ik}$ .or. ijob>5_${ik}$ ) then
              info = -1_${ik}$
           else if( n<0_${ik}$ ) then
              info = -5_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -9_${ik}$
           else if( ldq<1_${ik}$ .or. ( wantq .and. ldq<n ) ) then
              info = -13_${ik}$
           else if( ldz<1_${ik}$ .or. ( wantz .and. ldz<n ) ) then
              info = -15_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTGSEN', -info )
              return
           end if
           ierr = 0_${ik}$
           wantp = ijob==1_${ik}$ .or. ijob>=4_${ik}$
           wantd1 = ijob==2_${ik}$ .or. ijob==4_${ik}$
           wantd2 = ijob==3_${ik}$ .or. ijob==5_${ik}$
           wantd = wantd1 .or. wantd2
           ! set m to the dimension of the specified pair of deflating
           ! subspaces.
           m = 0_${ik}$
           if( .not.lquery .or. ijob/=0_${ik}$ ) then
           do k = 1, n
              alpha( k ) = a( k, k )
              beta( k ) = b( k, k )
              if( k<n ) then
                 if( select( k ) )m = m + 1_${ik}$
              else
                 if( select( n ) )m = m + 1_${ik}$
              end if
           end do
           end if
           if( ijob==1_${ik}$ .or. ijob==2_${ik}$ .or. ijob==4_${ik}$ ) then
              lwmin = max( 1_${ik}$, 2_${ik}$*m*( n-m ) )
              liwmin = max( 1_${ik}$, n+2 )
           else if( ijob==3_${ik}$ .or. ijob==5_${ik}$ ) then
              lwmin = max( 1_${ik}$, 4_${ik}$*m*( n-m ) )
              liwmin = max( 1_${ik}$, 2_${ik}$*m*( n-m ), n+2 )
           else
              lwmin = 1_${ik}$
              liwmin = 1_${ik}$
           end if
           work( 1_${ik}$ ) = lwmin
           iwork( 1_${ik}$ ) = liwmin
           if( lwork<lwmin .and. .not.lquery ) then
              info = -21_${ik}$
           else if( liwork<liwmin .and. .not.lquery ) then
              info = -23_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTGSEN', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible.
           if( m==n .or. m==0_${ik}$ ) then
              if( wantp ) then
                 pl = one
                 pr = one
              end if
              if( wantd ) then
                 dscale = zero
                 dsum = one
                 do i = 1, n
                    call stdlib${ii}$_${ci}$lassq( n, a( 1_${ik}$, i ), 1_${ik}$, dscale, dsum )
                    call stdlib${ii}$_${ci}$lassq( n, b( 1_${ik}$, i ), 1_${ik}$, dscale, dsum )
                 end do
                 dif( 1_${ik}$ ) = dscale*sqrt( dsum )
                 dif( 2_${ik}$ ) = dif( 1_${ik}$ )
              end if
              go to 70
           end if
           ! get machine constant
           safmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'S' )
           ! collect the selected blocks at the top-left corner of (a, b).
           ks = 0_${ik}$
           do k = 1, n
              swap = select( k )
              if( swap ) then
                 ks = ks + 1_${ik}$
                 ! swap the k-th block to position ks. compute unitary q
                 ! and z that will swap adjacent diagonal blocks in (a, b).
                 if( k/=ks )call stdlib${ii}$_${ci}$tgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, k,&
                            ks, ierr )
                 if( ierr>0_${ik}$ ) then
                    ! swap is rejected: exit.
                    info = 1_${ik}$
                    if( wantp ) then
                       pl = zero
                       pr = zero
                    end if
                    if( wantd ) then
                       dif( 1_${ik}$ ) = zero
                       dif( 2_${ik}$ ) = zero
                    end if
                    go to 70
                 end if
              end if
           end do
           if( wantp ) then
              ! solve generalized sylvester equation for r and l:
                         ! a11 * r - l * a22 = a12
                         ! b11 * r - l * b22 = b12
              n1 = m
              n2 = n - m
              i = n1 + 1_${ik}$
              call stdlib${ii}$_${ci}$lacpy( 'FULL', n1, n2, a( 1_${ik}$, i ), lda, work, n1 )
              call stdlib${ii}$_${ci}$lacpy( 'FULL', n1, n2, b( 1_${ik}$, i ), ldb, work( n1*n2+1 ),n1 )
              ijb = 0_${ik}$
              call stdlib${ii}$_${ci}$tgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b( i,&
               i ), ldb, work( n1*n2+1 ), n1,dscale, dif( 1_${ik}$ ), work( n1*n2*2_${ik}$+1 ),lwork-2*n1*n2, &
                         iwork, ierr )
              ! estimate the reciprocal of norms of "projections" onto
              ! left and right eigenspaces
              rdscal = zero
              dsum = one
              call stdlib${ii}$_${ci}$lassq( n1*n2, work, 1_${ik}$, rdscal, dsum )
              pl = rdscal*sqrt( dsum )
              if( pl==zero ) then
                 pl = one
              else
                 pl = dscale / ( sqrt( dscale*dscale / pl+pl )*sqrt( pl ) )
              end if
              rdscal = zero
              dsum = one
              call stdlib${ii}$_${ci}$lassq( n1*n2, work( n1*n2+1 ), 1_${ik}$, rdscal, dsum )
              pr = rdscal*sqrt( dsum )
              if( pr==zero ) then
                 pr = one
              else
                 pr = dscale / ( sqrt( dscale*dscale / pr+pr )*sqrt( pr ) )
              end if
           end if
           if( wantd ) then
              ! compute estimates difu and difl.
              if( wantd1 ) then
                 n1 = m
                 n2 = n - m
                 i = n1 + 1_${ik}$
                 ijb = idifjb
                 ! frobenius norm-based difu estimate.
                 call stdlib${ii}$_${ci}$tgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda, work,n1, b, ldb, b(&
                  i, i ), ldb, work( n1*n2+1 ),n1, dscale, dif( 1_${ik}$ ), work( n1*n2*2_${ik}$+1 ),lwork-&
                            2_${ik}$*n1*n2, iwork, ierr )
                 ! frobenius norm-based difl estimate.
                 call stdlib${ii}$_${ci}$tgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda, work,n2, b( i, i ),&
                  ldb, b, ldb, work( n1*n2+1 ),n2, dscale, dif( 2_${ik}$ ), work( n1*n2*2_${ik}$+1 ),lwork-&
                            2_${ik}$*n1*n2, iwork, ierr )
              else
                 ! compute 1-norm-based estimates of difu and difl using
                 ! reversed communication with stdlib${ii}$_${ci}$lacn2. in each step a
                 ! generalized sylvester equation or a transposed variant
                 ! is solved.
                 kase = 0_${ik}$
                 n1 = m
                 n2 = n - m
                 i = n1 + 1_${ik}$
                 ijb = 0_${ik}$
                 mn2 = 2_${ik}$*n1*n2
                 ! 1-norm-based estimate of difu.
                 40 continue
                 call stdlib${ii}$_${ci}$lacn2( mn2, work( mn2+1 ), work, dif( 1_${ik}$ ), kase,isave )
                 if( kase/=0_${ik}$ ) then
                    if( kase==1_${ik}$ ) then
                       ! solve generalized sylvester equation
                       call stdlib${ii}$_${ci}$tgsyl( 'N', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, &
                       ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1_${ik}$ ),work( n1*n2*2_${ik}$+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    else
                       ! solve the transposed variant.
                       call stdlib${ii}$_${ci}$tgsyl( 'C', ijb, n1, n2, a, lda, a( i, i ), lda,work, n1, b, &
                       ldb, b( i, i ), ldb,work( n1*n2+1 ), n1, dscale, dif( 1_${ik}$ ),work( n1*n2*2_${ik}$+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    end if
                    go to 40
                 end if
                 dif( 1_${ik}$ ) = dscale / dif( 1_${ik}$ )
                 ! 1-norm-based estimate of difl.
                 50 continue
                 call stdlib${ii}$_${ci}$lacn2( mn2, work( mn2+1 ), work, dif( 2_${ik}$ ), kase,isave )
                 if( kase/=0_${ik}$ ) then
                    if( kase==1_${ik}$ ) then
                       ! solve generalized sylvester equation
                       call stdlib${ii}$_${ci}$tgsyl( 'N', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b( &
                       i, i ), ldb, b, ldb,work( n1*n2+1 ), n2, dscale, dif( 2_${ik}$ ),work( n1*n2*2_${ik}$+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    else
                       ! solve the transposed variant.
                       call stdlib${ii}$_${ci}$tgsyl( 'C', ijb, n2, n1, a( i, i ), lda, a, lda,work, n2, b, &
                       ldb, b( i, i ), ldb,work( n1*n2+1 ), n2, dscale, dif( 2_${ik}$ ),work( n1*n2*2_${ik}$+1 )&
                                 , lwork-2*n1*n2, iwork,ierr )
                    end if
                    go to 50
                 end if
                 dif( 2_${ik}$ ) = dscale / dif( 2_${ik}$ )
              end if
           end if
           ! if b(k,k) is complex, make it real and positive (normalization
           ! of the generalized schur form) and store the generalized
           ! eigenvalues of reordered pair (a, b)
           do k = 1, n
              dscale = abs( b( k, k ) )
              if( dscale>safmin ) then
                 temp1 = conjg( b( k, k ) / dscale )
                 temp2 = b( k, k ) / dscale
                 b( k, k ) = dscale
                 call stdlib${ii}$_${ci}$scal( n-k, temp1, b( k, k+1 ), ldb )
                 call stdlib${ii}$_${ci}$scal( n-k+1, temp1, a( k, k ), lda )
                 if( wantq )call stdlib${ii}$_${ci}$scal( n, temp2, q( 1_${ik}$, k ), 1_${ik}$ )
              else
                 b( k, k ) = cmplx( zero, zero,KIND=${ck}$)
              end if
              alpha( k ) = a( k, k )
              beta( k ) = b( k, k )
           end do
           70 continue
           work( 1_${ik}$ ) = lwmin
           iwork( 1_${ik}$ ) = liwmin
           return
     end subroutine stdlib${ii}$_${ci}$tgsen

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_stgsna( job, howmny, select, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, s, &
     !! STGSNA estimates reciprocal condition numbers for specified
     !! eigenvalues and/or eigenvectors of a matrix pair (A, B) in
     !! generalized real Schur canonical form (or of any matrix pair
     !! (Q*A*Z**T, Q*B*Z**T) with orthogonal matrices Q and Z, where
     !! Z**T denotes the transpose of Z.
     !! (A, B) must be in generalized real Schur form (as returned by SGGES),
     !! i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal
     !! blocks. B is upper triangular.
               dif, mm, m, work, lwork,iwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: howmny, job
           integer(${ik}$), intent(out) :: info, m
           integer(${ik}$), intent(in) :: lda, ldb, ldvl, ldvr, lwork, mm, n
           ! Array Arguments 
           logical(lk), intent(in) :: select(*)
           integer(${ik}$), intent(out) :: iwork(*)
           real(sp), intent(in) :: a(lda,*), b(ldb,*), vl(ldvl,*), vr(ldvr,*)
           real(sp), intent(out) :: dif(*), s(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: difdri = 3_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: lquery, pair, somcon, wantbh, wantdf, wants
           integer(${ik}$) :: i, ierr, ifst, ilst, iz, k, ks, lwmin, n1, n2
           real(sp) :: alphai, alphar, alprqt, beta, c1, c2, cond, eps, lnrm, rnrm, root1, root2, &
                     scale, smlnum, tmpii, tmpir, tmpri, tmprr, uhav, uhavi, uhbv, uhbvi
           ! Local Arrays 
           real(sp) :: dummy(1_${ik}$), dummy1(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           wantbh = stdlib_lsame( job, 'B' )
           wants = stdlib_lsame( job, 'E' ) .or. wantbh
           wantdf = stdlib_lsame( job, 'V' ) .or. wantbh
           somcon = stdlib_lsame( howmny, 'S' )
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( .not.wants .and. .not.wantdf ) then
              info = -1_${ik}$
           else if( .not.stdlib_lsame( howmny, 'A' ) .and. .not.somcon ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -8_${ik}$
           else if( wants .and. ldvl<n ) then
              info = -10_${ik}$
           else if( wants .and. ldvr<n ) then
              info = -12_${ik}$
           else
              ! set m to the number of eigenpairs for which condition numbers
              ! are required, and test mm.
              if( somcon ) then
                 m = 0_${ik}$
                 pair = .false.
                 do k = 1, n
                    if( pair ) then
                       pair = .false.
                    else
                       if( k<n ) then
                          if( a( k+1, k )==zero ) then
                             if( select( k ) )m = m + 1_${ik}$
                          else
                             pair = .true.
                             if( select( k ) .or. select( k+1 ) )m = m + 2_${ik}$
                          end if
                       else
                          if( select( n ) )m = m + 1_${ik}$
                       end if
                    end if
                 end do
              else
                 m = n
              end if
              if( n==0_${ik}$ ) then
                 lwmin = 1_${ik}$
              else if( stdlib_lsame( job, 'V' ) .or. stdlib_lsame( job, 'B' ) ) then
                 lwmin = 2_${ik}$*n*( n + 2_${ik}$ ) + 16_${ik}$
              else
                 lwmin = n
              end if
              work( 1_${ik}$ ) = lwmin
              if( mm<m ) then
                 info = -15_${ik}$
              else if( lwork<lwmin .and. .not.lquery ) then
                 info = -18_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'STGSNA', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n==0 )return
           ! get machine constants
           eps = stdlib${ii}$_slamch( 'P' )
           smlnum = stdlib${ii}$_slamch( 'S' ) / eps
           ks = 0_${ik}$
           pair = .false.
           loop_20: do k = 1, n
              ! determine whether a(k,k) begins a 1-by-1 or 2-by-2 block.
              if( pair ) then
                 pair = .false.
                 cycle loop_20
              else
                 if( k<n )pair = a( k+1, k )/=zero
              end if
              ! determine whether condition numbers are required for the k-th
              ! eigenpair.
              if( somcon ) then
                 if( pair ) then
                    if( .not.select( k ) .and. .not.select( k+1 ) )cycle loop_20
                 else
                    if( .not.select( k ) )cycle loop_20
                 end if
              end if
              ks = ks + 1_${ik}$
              if( wants ) then
                 ! compute the reciprocal condition number of the k-th
                 ! eigenvalue.
                 if( pair ) then
                    ! complex eigenvalue pair.
                    rnrm = stdlib${ii}$_slapy2( stdlib${ii}$_snrm2( n, vr( 1_${ik}$, ks ), 1_${ik}$ ),stdlib${ii}$_snrm2( n, vr( &
                              1_${ik}$, ks+1 ), 1_${ik}$ ) )
                    lnrm = stdlib${ii}$_slapy2( stdlib${ii}$_snrm2( n, vl( 1_${ik}$, ks ), 1_${ik}$ ),stdlib${ii}$_snrm2( n, vl( &
                              1_${ik}$, ks+1 ), 1_${ik}$ ) )
                    call stdlib${ii}$_sgemv( 'N', n, n, one, a, lda, vr( 1_${ik}$, ks ), 1_${ik}$, zero,work, 1_${ik}$ )
                              
                    tmprr = stdlib${ii}$_sdot( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    tmpri = stdlib${ii}$_sdot( n, work, 1_${ik}$, vl( 1_${ik}$, ks+1 ), 1_${ik}$ )
                    call stdlib${ii}$_sgemv( 'N', n, n, one, a, lda, vr( 1_${ik}$, ks+1 ), 1_${ik}$,zero, work, 1_${ik}$ )
                              
                    tmpii = stdlib${ii}$_sdot( n, work, 1_${ik}$, vl( 1_${ik}$, ks+1 ), 1_${ik}$ )
                    tmpir = stdlib${ii}$_sdot( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    uhav = tmprr + tmpii
                    uhavi = tmpir - tmpri
                    call stdlib${ii}$_sgemv( 'N', n, n, one, b, ldb, vr( 1_${ik}$, ks ), 1_${ik}$, zero,work, 1_${ik}$ )
                              
                    tmprr = stdlib${ii}$_sdot( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    tmpri = stdlib${ii}$_sdot( n, work, 1_${ik}$, vl( 1_${ik}$, ks+1 ), 1_${ik}$ )
                    call stdlib${ii}$_sgemv( 'N', n, n, one, b, ldb, vr( 1_${ik}$, ks+1 ), 1_${ik}$,zero, work, 1_${ik}$ )
                              
                    tmpii = stdlib${ii}$_sdot( n, work, 1_${ik}$, vl( 1_${ik}$, ks+1 ), 1_${ik}$ )
                    tmpir = stdlib${ii}$_sdot( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    uhbv = tmprr + tmpii
                    uhbvi = tmpir - tmpri
                    uhav = stdlib${ii}$_slapy2( uhav, uhavi )
                    uhbv = stdlib${ii}$_slapy2( uhbv, uhbvi )
                    cond = stdlib${ii}$_slapy2( uhav, uhbv )
                    s( ks ) = cond / ( rnrm*lnrm )
                    s( ks+1 ) = s( ks )
                 else
                    ! real eigenvalue.
                    rnrm = stdlib${ii}$_snrm2( n, vr( 1_${ik}$, ks ), 1_${ik}$ )
                    lnrm = stdlib${ii}$_snrm2( n, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    call stdlib${ii}$_sgemv( 'N', n, n, one, a, lda, vr( 1_${ik}$, ks ), 1_${ik}$, zero,work, 1_${ik}$ )
                              
                    uhav = stdlib${ii}$_sdot( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    call stdlib${ii}$_sgemv( 'N', n, n, one, b, ldb, vr( 1_${ik}$, ks ), 1_${ik}$, zero,work, 1_${ik}$ )
                              
                    uhbv = stdlib${ii}$_sdot( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    cond = stdlib${ii}$_slapy2( uhav, uhbv )
                    if( cond==zero ) then
                       s( ks ) = -one
                    else
                       s( ks ) = cond / ( rnrm*lnrm )
                    end if
                 end if
              end if
              if( wantdf ) then
                 if( n==1_${ik}$ ) then
                    dif( ks ) = stdlib${ii}$_slapy2( a( 1_${ik}$, 1_${ik}$ ), b( 1_${ik}$, 1_${ik}$ ) )
                    cycle loop_20
                 end if
                 ! estimate the reciprocal condition number of the k-th
                 ! eigenvectors.
                 if( pair ) then
                    ! copy the  2-by 2 pencil beginning at (a(k,k), b(k, k)).
                    ! compute the eigenvalue(s) at position k.
                    work( 1_${ik}$ ) = a( k, k )
                    work( 2_${ik}$ ) = a( k+1, k )
                    work( 3_${ik}$ ) = a( k, k+1 )
                    work( 4_${ik}$ ) = a( k+1, k+1 )
                    work( 5_${ik}$ ) = b( k, k )
                    work( 6_${ik}$ ) = b( k+1, k )
                    work( 7_${ik}$ ) = b( k, k+1 )
                    work( 8_${ik}$ ) = b( k+1, k+1 )
                    call stdlib${ii}$_slag2( work, 2_${ik}$, work( 5_${ik}$ ), 2_${ik}$, smlnum*eps, beta,dummy1( 1_${ik}$ ), &
                              alphar, dummy( 1_${ik}$ ), alphai )
                    alprqt = one
                    c1 = two*( alphar*alphar+alphai*alphai+beta*beta )
                    c2 = four*beta*beta*alphai*alphai
                    root1 = c1 + sqrt( c1*c1-4.0_sp*c2 )
                    root2 = c2 / root1
                    root1 = root1 / two
                    cond = min( sqrt( root1 ), sqrt( root2 ) )
                 end if
                 ! copy the matrix (a, b) to the array work and swap the
                 ! diagonal block beginning at a(k,k) to the (1,1) position.
                 call stdlib${ii}$_slacpy( 'FULL', n, n, a, lda, work, n )
                 call stdlib${ii}$_slacpy( 'FULL', n, n, b, ldb, work( n*n+1 ), n )
                 ifst = k
                 ilst = 1_${ik}$
                 call stdlib${ii}$_stgexc( .false., .false., n, work, n, work( n*n+1 ), n,dummy, 1_${ik}$, &
                           dummy1, 1_${ik}$, ifst, ilst,work( n*n*2_${ik}$+1 ), lwork-2*n*n, ierr )
                 if( ierr>0_${ik}$ ) then
                    ! ill-conditioned problem - swap rejected.
                    dif( ks ) = zero
                 else
                    ! reordering successful, solve generalized sylvester
                    ! equation for r and l,
                               ! a22 * r - l * a11 = a12
                               ! b22 * r - l * b11 = b12,
                    ! and compute estimate of difl((a11,b11), (a22, b22)).
                    n1 = 1_${ik}$
                    if( work( 2_${ik}$ )/=zero )n1 = 2_${ik}$
                    n2 = n - n1
                    if( n2==0_${ik}$ ) then
                       dif( ks ) = cond
                    else
                       i = n*n + 1_${ik}$
                       iz = 2_${ik}$*n*n + 1_${ik}$
                       call stdlib${ii}$_stgsyl( 'N', difdri, n2, n1, work( n*n1+n1+1 ),n, work, n, &
                       work( n1+1 ), n,work( n*n1+n1+i ), n, work( i ), n,work( n1+i ), n, scale, &
                                 dif( ks ),work( iz+1 ), lwork-2*n*n, iwork, ierr )
                       if( pair )dif( ks ) = min( max( one, alprqt )*dif( ks ),cond )
                    end if
                 end if
                 if( pair )dif( ks+1 ) = dif( ks )
              end if
              if( pair )ks = ks + 1_${ik}$
           end do loop_20
           work( 1_${ik}$ ) = lwmin
           return
     end subroutine stdlib${ii}$_stgsna

     pure module subroutine stdlib${ii}$_dtgsna( job, howmny, select, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, s, &
     !! DTGSNA estimates reciprocal condition numbers for specified
     !! eigenvalues and/or eigenvectors of a matrix pair (A, B) in
     !! generalized real Schur canonical form (or of any matrix pair
     !! (Q*A*Z**T, Q*B*Z**T) with orthogonal matrices Q and Z, where
     !! Z**T denotes the transpose of Z.
     !! (A, B) must be in generalized real Schur form (as returned by DGGES),
     !! i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal
     !! blocks. B is upper triangular.
               dif, mm, m, work, lwork,iwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: howmny, job
           integer(${ik}$), intent(out) :: info, m
           integer(${ik}$), intent(in) :: lda, ldb, ldvl, ldvr, lwork, mm, n
           ! Array Arguments 
           logical(lk), intent(in) :: select(*)
           integer(${ik}$), intent(out) :: iwork(*)
           real(dp), intent(in) :: a(lda,*), b(ldb,*), vl(ldvl,*), vr(ldvr,*)
           real(dp), intent(out) :: dif(*), s(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: difdri = 3_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: lquery, pair, somcon, wantbh, wantdf, wants
           integer(${ik}$) :: i, ierr, ifst, ilst, iz, k, ks, lwmin, n1, n2
           real(dp) :: alphai, alphar, alprqt, beta, c1, c2, cond, eps, lnrm, rnrm, root1, root2, &
                     scale, smlnum, tmpii, tmpir, tmpri, tmprr, uhav, uhavi, uhbv, uhbvi
           ! Local Arrays 
           real(dp) :: dummy(1_${ik}$), dummy1(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           wantbh = stdlib_lsame( job, 'B' )
           wants = stdlib_lsame( job, 'E' ) .or. wantbh
           wantdf = stdlib_lsame( job, 'V' ) .or. wantbh
           somcon = stdlib_lsame( howmny, 'S' )
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( .not.wants .and. .not.wantdf ) then
              info = -1_${ik}$
           else if( .not.stdlib_lsame( howmny, 'A' ) .and. .not.somcon ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -8_${ik}$
           else if( wants .and. ldvl<n ) then
              info = -10_${ik}$
           else if( wants .and. ldvr<n ) then
              info = -12_${ik}$
           else
              ! set m to the number of eigenpairs for which condition numbers
              ! are required, and test mm.
              if( somcon ) then
                 m = 0_${ik}$
                 pair = .false.
                 do k = 1, n
                    if( pair ) then
                       pair = .false.
                    else
                       if( k<n ) then
                          if( a( k+1, k )==zero ) then
                             if( select( k ) )m = m + 1_${ik}$
                          else
                             pair = .true.
                             if( select( k ) .or. select( k+1 ) )m = m + 2_${ik}$
                          end if
                       else
                          if( select( n ) )m = m + 1_${ik}$
                       end if
                    end if
                 end do
              else
                 m = n
              end if
              if( n==0_${ik}$ ) then
                 lwmin = 1_${ik}$
              else if( stdlib_lsame( job, 'V' ) .or. stdlib_lsame( job, 'B' ) ) then
                 lwmin = 2_${ik}$*n*( n + 2_${ik}$ ) + 16_${ik}$
              else
                 lwmin = n
              end if
              work( 1_${ik}$ ) = lwmin
              if( mm<m ) then
                 info = -15_${ik}$
              else if( lwork<lwmin .and. .not.lquery ) then
                 info = -18_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTGSNA', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n==0 )return
           ! get machine constants
           eps = stdlib${ii}$_dlamch( 'P' )
           smlnum = stdlib${ii}$_dlamch( 'S' ) / eps
           ks = 0_${ik}$
           pair = .false.
           loop_20: do k = 1, n
              ! determine whether a(k,k) begins a 1-by-1 or 2-by-2 block.
              if( pair ) then
                 pair = .false.
                 cycle loop_20
              else
                 if( k<n )pair = a( k+1, k )/=zero
              end if
              ! determine whether condition numbers are required for the k-th
              ! eigenpair.
              if( somcon ) then
                 if( pair ) then
                    if( .not.select( k ) .and. .not.select( k+1 ) )cycle loop_20
                 else
                    if( .not.select( k ) )cycle loop_20
                 end if
              end if
              ks = ks + 1_${ik}$
              if( wants ) then
                 ! compute the reciprocal condition number of the k-th
                 ! eigenvalue.
                 if( pair ) then
                    ! complex eigenvalue pair.
                    rnrm = stdlib${ii}$_dlapy2( stdlib${ii}$_dnrm2( n, vr( 1_${ik}$, ks ), 1_${ik}$ ),stdlib${ii}$_dnrm2( n, vr( &
                              1_${ik}$, ks+1 ), 1_${ik}$ ) )
                    lnrm = stdlib${ii}$_dlapy2( stdlib${ii}$_dnrm2( n, vl( 1_${ik}$, ks ), 1_${ik}$ ),stdlib${ii}$_dnrm2( n, vl( &
                              1_${ik}$, ks+1 ), 1_${ik}$ ) )
                    call stdlib${ii}$_dgemv( 'N', n, n, one, a, lda, vr( 1_${ik}$, ks ), 1_${ik}$, zero,work, 1_${ik}$ )
                              
                    tmprr = stdlib${ii}$_ddot( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    tmpri = stdlib${ii}$_ddot( n, work, 1_${ik}$, vl( 1_${ik}$, ks+1 ), 1_${ik}$ )
                    call stdlib${ii}$_dgemv( 'N', n, n, one, a, lda, vr( 1_${ik}$, ks+1 ), 1_${ik}$,zero, work, 1_${ik}$ )
                              
                    tmpii = stdlib${ii}$_ddot( n, work, 1_${ik}$, vl( 1_${ik}$, ks+1 ), 1_${ik}$ )
                    tmpir = stdlib${ii}$_ddot( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    uhav = tmprr + tmpii
                    uhavi = tmpir - tmpri
                    call stdlib${ii}$_dgemv( 'N', n, n, one, b, ldb, vr( 1_${ik}$, ks ), 1_${ik}$, zero,work, 1_${ik}$ )
                              
                    tmprr = stdlib${ii}$_ddot( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    tmpri = stdlib${ii}$_ddot( n, work, 1_${ik}$, vl( 1_${ik}$, ks+1 ), 1_${ik}$ )
                    call stdlib${ii}$_dgemv( 'N', n, n, one, b, ldb, vr( 1_${ik}$, ks+1 ), 1_${ik}$,zero, work, 1_${ik}$ )
                              
                    tmpii = stdlib${ii}$_ddot( n, work, 1_${ik}$, vl( 1_${ik}$, ks+1 ), 1_${ik}$ )
                    tmpir = stdlib${ii}$_ddot( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    uhbv = tmprr + tmpii
                    uhbvi = tmpir - tmpri
                    uhav = stdlib${ii}$_dlapy2( uhav, uhavi )
                    uhbv = stdlib${ii}$_dlapy2( uhbv, uhbvi )
                    cond = stdlib${ii}$_dlapy2( uhav, uhbv )
                    s( ks ) = cond / ( rnrm*lnrm )
                    s( ks+1 ) = s( ks )
                 else
                    ! real eigenvalue.
                    rnrm = stdlib${ii}$_dnrm2( n, vr( 1_${ik}$, ks ), 1_${ik}$ )
                    lnrm = stdlib${ii}$_dnrm2( n, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    call stdlib${ii}$_dgemv( 'N', n, n, one, a, lda, vr( 1_${ik}$, ks ), 1_${ik}$, zero,work, 1_${ik}$ )
                              
                    uhav = stdlib${ii}$_ddot( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    call stdlib${ii}$_dgemv( 'N', n, n, one, b, ldb, vr( 1_${ik}$, ks ), 1_${ik}$, zero,work, 1_${ik}$ )
                              
                    uhbv = stdlib${ii}$_ddot( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    cond = stdlib${ii}$_dlapy2( uhav, uhbv )
                    if( cond==zero ) then
                       s( ks ) = -one
                    else
                       s( ks ) = cond / ( rnrm*lnrm )
                    end if
                 end if
              end if
              if( wantdf ) then
                 if( n==1_${ik}$ ) then
                    dif( ks ) = stdlib${ii}$_dlapy2( a( 1_${ik}$, 1_${ik}$ ), b( 1_${ik}$, 1_${ik}$ ) )
                    cycle loop_20
                 end if
                 ! estimate the reciprocal condition number of the k-th
                 ! eigenvectors.
                 if( pair ) then
                    ! copy the  2-by 2 pencil beginning at (a(k,k), b(k, k)).
                    ! compute the eigenvalue(s) at position k.
                    work( 1_${ik}$ ) = a( k, k )
                    work( 2_${ik}$ ) = a( k+1, k )
                    work( 3_${ik}$ ) = a( k, k+1 )
                    work( 4_${ik}$ ) = a( k+1, k+1 )
                    work( 5_${ik}$ ) = b( k, k )
                    work( 6_${ik}$ ) = b( k+1, k )
                    work( 7_${ik}$ ) = b( k, k+1 )
                    work( 8_${ik}$ ) = b( k+1, k+1 )
                    call stdlib${ii}$_dlag2( work, 2_${ik}$, work( 5_${ik}$ ), 2_${ik}$, smlnum*eps, beta,dummy1( 1_${ik}$ ), &
                              alphar, dummy( 1_${ik}$ ), alphai )
                    alprqt = one
                    c1 = two*( alphar*alphar+alphai*alphai+beta*beta )
                    c2 = four*beta*beta*alphai*alphai
                    root1 = c1 + sqrt( c1*c1-4.0_dp*c2 )
                    root2 = c2 / root1
                    root1 = root1 / two
                    cond = min( sqrt( root1 ), sqrt( root2 ) )
                 end if
                 ! copy the matrix (a, b) to the array work and swap the
                 ! diagonal block beginning at a(k,k) to the (1,1) position.
                 call stdlib${ii}$_dlacpy( 'FULL', n, n, a, lda, work, n )
                 call stdlib${ii}$_dlacpy( 'FULL', n, n, b, ldb, work( n*n+1 ), n )
                 ifst = k
                 ilst = 1_${ik}$
                 call stdlib${ii}$_dtgexc( .false., .false., n, work, n, work( n*n+1 ), n,dummy, 1_${ik}$, &
                           dummy1, 1_${ik}$, ifst, ilst,work( n*n*2_${ik}$+1 ), lwork-2*n*n, ierr )
                 if( ierr>0_${ik}$ ) then
                    ! ill-conditioned problem - swap rejected.
                    dif( ks ) = zero
                 else
                    ! reordering successful, solve generalized sylvester
                    ! equation for r and l,
                               ! a22 * r - l * a11 = a12
                               ! b22 * r - l * b11 = b12,
                    ! and compute estimate of difl((a11,b11), (a22, b22)).
                    n1 = 1_${ik}$
                    if( work( 2_${ik}$ )/=zero )n1 = 2_${ik}$
                    n2 = n - n1
                    if( n2==0_${ik}$ ) then
                       dif( ks ) = cond
                    else
                       i = n*n + 1_${ik}$
                       iz = 2_${ik}$*n*n + 1_${ik}$
                       call stdlib${ii}$_dtgsyl( 'N', difdri, n2, n1, work( n*n1+n1+1 ),n, work, n, &
                       work( n1+1 ), n,work( n*n1+n1+i ), n, work( i ), n,work( n1+i ), n, scale, &
                                 dif( ks ),work( iz+1 ), lwork-2*n*n, iwork, ierr )
                       if( pair )dif( ks ) = min( max( one, alprqt )*dif( ks ),cond )
                    end if
                 end if
                 if( pair )dif( ks+1 ) = dif( ks )
              end if
              if( pair )ks = ks + 1_${ik}$
           end do loop_20
           work( 1_${ik}$ ) = lwmin
           return
     end subroutine stdlib${ii}$_dtgsna

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$tgsna( job, howmny, select, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, s, &
     !! DTGSNA: estimates reciprocal condition numbers for specified
     !! eigenvalues and/or eigenvectors of a matrix pair (A, B) in
     !! generalized real Schur canonical form (or of any matrix pair
     !! (Q*A*Z**T, Q*B*Z**T) with orthogonal matrices Q and Z, where
     !! Z**T denotes the transpose of Z.
     !! (A, B) must be in generalized real Schur form (as returned by DGGES),
     !! i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal
     !! blocks. B is upper triangular.
               dif, mm, m, work, lwork,iwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: howmny, job
           integer(${ik}$), intent(out) :: info, m
           integer(${ik}$), intent(in) :: lda, ldb, ldvl, ldvr, lwork, mm, n
           ! Array Arguments 
           logical(lk), intent(in) :: select(*)
           integer(${ik}$), intent(out) :: iwork(*)
           real(${rk}$), intent(in) :: a(lda,*), b(ldb,*), vl(ldvl,*), vr(ldvr,*)
           real(${rk}$), intent(out) :: dif(*), s(*), work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: difdri = 3_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: lquery, pair, somcon, wantbh, wantdf, wants
           integer(${ik}$) :: i, ierr, ifst, ilst, iz, k, ks, lwmin, n1, n2
           real(${rk}$) :: alphai, alphar, alprqt, beta, c1, c2, cond, eps, lnrm, rnrm, root1, root2, &
                     scale, smlnum, tmpii, tmpir, tmpri, tmprr, uhav, uhavi, uhbv, uhbvi
           ! Local Arrays 
           real(${rk}$) :: dummy(1_${ik}$), dummy1(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           wantbh = stdlib_lsame( job, 'B' )
           wants = stdlib_lsame( job, 'E' ) .or. wantbh
           wantdf = stdlib_lsame( job, 'V' ) .or. wantbh
           somcon = stdlib_lsame( howmny, 'S' )
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( .not.wants .and. .not.wantdf ) then
              info = -1_${ik}$
           else if( .not.stdlib_lsame( howmny, 'A' ) .and. .not.somcon ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -8_${ik}$
           else if( wants .and. ldvl<n ) then
              info = -10_${ik}$
           else if( wants .and. ldvr<n ) then
              info = -12_${ik}$
           else
              ! set m to the number of eigenpairs for which condition numbers
              ! are required, and test mm.
              if( somcon ) then
                 m = 0_${ik}$
                 pair = .false.
                 do k = 1, n
                    if( pair ) then
                       pair = .false.
                    else
                       if( k<n ) then
                          if( a( k+1, k )==zero ) then
                             if( select( k ) )m = m + 1_${ik}$
                          else
                             pair = .true.
                             if( select( k ) .or. select( k+1 ) )m = m + 2_${ik}$
                          end if
                       else
                          if( select( n ) )m = m + 1_${ik}$
                       end if
                    end if
                 end do
              else
                 m = n
              end if
              if( n==0_${ik}$ ) then
                 lwmin = 1_${ik}$
              else if( stdlib_lsame( job, 'V' ) .or. stdlib_lsame( job, 'B' ) ) then
                 lwmin = 2_${ik}$*n*( n + 2_${ik}$ ) + 16_${ik}$
              else
                 lwmin = n
              end if
              work( 1_${ik}$ ) = lwmin
              if( mm<m ) then
                 info = -15_${ik}$
              else if( lwork<lwmin .and. .not.lquery ) then
                 info = -18_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTGSNA', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n==0 )return
           ! get machine constants
           eps = stdlib${ii}$_${ri}$lamch( 'P' )
           smlnum = stdlib${ii}$_${ri}$lamch( 'S' ) / eps
           ks = 0_${ik}$
           pair = .false.
           loop_20: do k = 1, n
              ! determine whether a(k,k) begins a 1-by-1 or 2-by-2 block.
              if( pair ) then
                 pair = .false.
                 cycle loop_20
              else
                 if( k<n )pair = a( k+1, k )/=zero
              end if
              ! determine whether condition numbers are required for the k-th
              ! eigenpair.
              if( somcon ) then
                 if( pair ) then
                    if( .not.select( k ) .and. .not.select( k+1 ) )cycle loop_20
                 else
                    if( .not.select( k ) )cycle loop_20
                 end if
              end if
              ks = ks + 1_${ik}$
              if( wants ) then
                 ! compute the reciprocal condition number of the k-th
                 ! eigenvalue.
                 if( pair ) then
                    ! complex eigenvalue pair.
                    rnrm = stdlib${ii}$_${ri}$lapy2( stdlib${ii}$_${ri}$nrm2( n, vr( 1_${ik}$, ks ), 1_${ik}$ ),stdlib${ii}$_${ri}$nrm2( n, vr( &
                              1_${ik}$, ks+1 ), 1_${ik}$ ) )
                    lnrm = stdlib${ii}$_${ri}$lapy2( stdlib${ii}$_${ri}$nrm2( n, vl( 1_${ik}$, ks ), 1_${ik}$ ),stdlib${ii}$_${ri}$nrm2( n, vl( &
                              1_${ik}$, ks+1 ), 1_${ik}$ ) )
                    call stdlib${ii}$_${ri}$gemv( 'N', n, n, one, a, lda, vr( 1_${ik}$, ks ), 1_${ik}$, zero,work, 1_${ik}$ )
                              
                    tmprr = stdlib${ii}$_${ri}$dot( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    tmpri = stdlib${ii}$_${ri}$dot( n, work, 1_${ik}$, vl( 1_${ik}$, ks+1 ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$gemv( 'N', n, n, one, a, lda, vr( 1_${ik}$, ks+1 ), 1_${ik}$,zero, work, 1_${ik}$ )
                              
                    tmpii = stdlib${ii}$_${ri}$dot( n, work, 1_${ik}$, vl( 1_${ik}$, ks+1 ), 1_${ik}$ )
                    tmpir = stdlib${ii}$_${ri}$dot( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    uhav = tmprr + tmpii
                    uhavi = tmpir - tmpri
                    call stdlib${ii}$_${ri}$gemv( 'N', n, n, one, b, ldb, vr( 1_${ik}$, ks ), 1_${ik}$, zero,work, 1_${ik}$ )
                              
                    tmprr = stdlib${ii}$_${ri}$dot( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    tmpri = stdlib${ii}$_${ri}$dot( n, work, 1_${ik}$, vl( 1_${ik}$, ks+1 ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$gemv( 'N', n, n, one, b, ldb, vr( 1_${ik}$, ks+1 ), 1_${ik}$,zero, work, 1_${ik}$ )
                              
                    tmpii = stdlib${ii}$_${ri}$dot( n, work, 1_${ik}$, vl( 1_${ik}$, ks+1 ), 1_${ik}$ )
                    tmpir = stdlib${ii}$_${ri}$dot( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    uhbv = tmprr + tmpii
                    uhbvi = tmpir - tmpri
                    uhav = stdlib${ii}$_${ri}$lapy2( uhav, uhavi )
                    uhbv = stdlib${ii}$_${ri}$lapy2( uhbv, uhbvi )
                    cond = stdlib${ii}$_${ri}$lapy2( uhav, uhbv )
                    s( ks ) = cond / ( rnrm*lnrm )
                    s( ks+1 ) = s( ks )
                 else
                    ! real eigenvalue.
                    rnrm = stdlib${ii}$_${ri}$nrm2( n, vr( 1_${ik}$, ks ), 1_${ik}$ )
                    lnrm = stdlib${ii}$_${ri}$nrm2( n, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$gemv( 'N', n, n, one, a, lda, vr( 1_${ik}$, ks ), 1_${ik}$, zero,work, 1_${ik}$ )
                              
                    uhav = stdlib${ii}$_${ri}$dot( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    call stdlib${ii}$_${ri}$gemv( 'N', n, n, one, b, ldb, vr( 1_${ik}$, ks ), 1_${ik}$, zero,work, 1_${ik}$ )
                              
                    uhbv = stdlib${ii}$_${ri}$dot( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                    cond = stdlib${ii}$_${ri}$lapy2( uhav, uhbv )
                    if( cond==zero ) then
                       s( ks ) = -one
                    else
                       s( ks ) = cond / ( rnrm*lnrm )
                    end if
                 end if
              end if
              if( wantdf ) then
                 if( n==1_${ik}$ ) then
                    dif( ks ) = stdlib${ii}$_${ri}$lapy2( a( 1_${ik}$, 1_${ik}$ ), b( 1_${ik}$, 1_${ik}$ ) )
                    cycle loop_20
                 end if
                 ! estimate the reciprocal condition number of the k-th
                 ! eigenvectors.
                 if( pair ) then
                    ! copy the  2-by 2 pencil beginning at (a(k,k), b(k, k)).
                    ! compute the eigenvalue(s) at position k.
                    work( 1_${ik}$ ) = a( k, k )
                    work( 2_${ik}$ ) = a( k+1, k )
                    work( 3_${ik}$ ) = a( k, k+1 )
                    work( 4_${ik}$ ) = a( k+1, k+1 )
                    work( 5_${ik}$ ) = b( k, k )
                    work( 6_${ik}$ ) = b( k+1, k )
                    work( 7_${ik}$ ) = b( k, k+1 )
                    work( 8_${ik}$ ) = b( k+1, k+1 )
                    call stdlib${ii}$_${ri}$lag2( work, 2_${ik}$, work( 5_${ik}$ ), 2_${ik}$, smlnum*eps, beta,dummy1( 1_${ik}$ ), &
                              alphar, dummy( 1_${ik}$ ), alphai )
                    alprqt = one
                    c1 = two*( alphar*alphar+alphai*alphai+beta*beta )
                    c2 = four*beta*beta*alphai*alphai
                    root1 = c1 + sqrt( c1*c1-4.0_${rk}$*c2 )
                    root2 = c2 / root1
                    root1 = root1 / two
                    cond = min( sqrt( root1 ), sqrt( root2 ) )
                 end if
                 ! copy the matrix (a, b) to the array work and swap the
                 ! diagonal block beginning at a(k,k) to the (1,1) position.
                 call stdlib${ii}$_${ri}$lacpy( 'FULL', n, n, a, lda, work, n )
                 call stdlib${ii}$_${ri}$lacpy( 'FULL', n, n, b, ldb, work( n*n+1 ), n )
                 ifst = k
                 ilst = 1_${ik}$
                 call stdlib${ii}$_${ri}$tgexc( .false., .false., n, work, n, work( n*n+1 ), n,dummy, 1_${ik}$, &
                           dummy1, 1_${ik}$, ifst, ilst,work( n*n*2_${ik}$+1 ), lwork-2*n*n, ierr )
                 if( ierr>0_${ik}$ ) then
                    ! ill-conditioned problem - swap rejected.
                    dif( ks ) = zero
                 else
                    ! reordering successful, solve generalized sylvester
                    ! equation for r and l,
                               ! a22 * r - l * a11 = a12
                               ! b22 * r - l * b11 = b12,
                    ! and compute estimate of difl((a11,b11), (a22, b22)).
                    n1 = 1_${ik}$
                    if( work( 2_${ik}$ )/=zero )n1 = 2_${ik}$
                    n2 = n - n1
                    if( n2==0_${ik}$ ) then
                       dif( ks ) = cond
                    else
                       i = n*n + 1_${ik}$
                       iz = 2_${ik}$*n*n + 1_${ik}$
                       call stdlib${ii}$_${ri}$tgsyl( 'N', difdri, n2, n1, work( n*n1+n1+1 ),n, work, n, &
                       work( n1+1 ), n,work( n*n1+n1+i ), n, work( i ), n,work( n1+i ), n, scale, &
                                 dif( ks ),work( iz+1 ), lwork-2*n*n, iwork, ierr )
                       if( pair )dif( ks ) = min( max( one, alprqt )*dif( ks ),cond )
                    end if
                 end if
                 if( pair )dif( ks+1 ) = dif( ks )
              end if
              if( pair )ks = ks + 1_${ik}$
           end do loop_20
           work( 1_${ik}$ ) = lwmin
           return
     end subroutine stdlib${ii}$_${ri}$tgsna

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_ctgsna( job, howmny, select, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, s, &
     !! CTGSNA estimates reciprocal condition numbers for specified
     !! eigenvalues and/or eigenvectors of a matrix pair (A, B).
     !! (A, B) must be in generalized Schur canonical form, that is, A and
     !! B are both upper triangular.
               dif, mm, m, work, lwork,iwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: howmny, job
           integer(${ik}$), intent(out) :: info, m
           integer(${ik}$), intent(in) :: lda, ldb, ldvl, ldvr, lwork, mm, n
           ! Array Arguments 
           logical(lk), intent(in) :: select(*)
           integer(${ik}$), intent(out) :: iwork(*)
           real(sp), intent(out) :: dif(*), s(*)
           complex(sp), intent(in) :: a(lda,*), b(ldb,*), vl(ldvl,*), vr(ldvr,*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: idifjb = 3_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: lquery, somcon, wantbh, wantdf, wants
           integer(${ik}$) :: i, ierr, ifst, ilst, k, ks, lwmin, n1, n2
           real(sp) :: bignum, cond, eps, lnrm, rnrm, scale, smlnum
           complex(sp) :: yhax, yhbx
           ! Local Arrays 
           complex(sp) :: dummy(1_${ik}$), dummy1(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           wantbh = stdlib_lsame( job, 'B' )
           wants = stdlib_lsame( job, 'E' ) .or. wantbh
           wantdf = stdlib_lsame( job, 'V' ) .or. wantbh
           somcon = stdlib_lsame( howmny, 'S' )
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( .not.wants .and. .not.wantdf ) then
              info = -1_${ik}$
           else if( .not.stdlib_lsame( howmny, 'A' ) .and. .not.somcon ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -8_${ik}$
           else if( wants .and. ldvl<n ) then
              info = -10_${ik}$
           else if( wants .and. ldvr<n ) then
              info = -12_${ik}$
           else
              ! set m to the number of eigenpairs for which condition numbers
              ! are required, and test mm.
              if( somcon ) then
                 m = 0_${ik}$
                 do k = 1, n
                    if( select( k ) )m = m + 1_${ik}$
                 end do
              else
                 m = n
              end if
              if( n==0_${ik}$ ) then
                 lwmin = 1_${ik}$
              else if( stdlib_lsame( job, 'V' ) .or. stdlib_lsame( job, 'B' ) ) then
                 lwmin = 2_${ik}$*n*n
              else
                 lwmin = n
              end if
              work( 1_${ik}$ ) = lwmin
              if( mm<m ) then
                 info = -15_${ik}$
              else if( lwork<lwmin .and. .not.lquery ) then
                 info = -18_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CTGSNA', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n==0 )return
           ! get machine constants
           eps = stdlib${ii}$_slamch( 'P' )
           smlnum = stdlib${ii}$_slamch( 'S' ) / eps
           bignum = one / smlnum
           call stdlib${ii}$_slabad( smlnum, bignum )
           ks = 0_${ik}$
           loop_20: do k = 1, n
              ! determine whether condition numbers are required for the k-th
              ! eigenpair.
              if( somcon ) then
                 if( .not.select( k ) )cycle loop_20
              end if
              ks = ks + 1_${ik}$
              if( wants ) then
                 ! compute the reciprocal condition number of the k-th
                 ! eigenvalue.
                 rnrm = stdlib${ii}$_scnrm2( n, vr( 1_${ik}$, ks ), 1_${ik}$ )
                 lnrm = stdlib${ii}$_scnrm2( n, vl( 1_${ik}$, ks ), 1_${ik}$ )
                 call stdlib${ii}$_cgemv( 'N', n, n, cmplx( one, zero,KIND=sp), a, lda,vr( 1_${ik}$, ks ), 1_${ik}$, &
                           cmplx( zero, zero,KIND=sp), work, 1_${ik}$ )
                 yhax = stdlib${ii}$_cdotc( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                 call stdlib${ii}$_cgemv( 'N', n, n, cmplx( one, zero,KIND=sp), b, ldb,vr( 1_${ik}$, ks ), 1_${ik}$, &
                           cmplx( zero, zero,KIND=sp), work, 1_${ik}$ )
                 yhbx = stdlib${ii}$_cdotc( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                 cond = stdlib${ii}$_slapy2( abs( yhax ), abs( yhbx ) )
                 if( cond==zero ) then
                    s( ks ) = -one
                 else
                    s( ks ) = cond / ( rnrm*lnrm )
                 end if
              end if
              if( wantdf ) then
                 if( n==1_${ik}$ ) then
                    dif( ks ) = stdlib${ii}$_slapy2( abs( a( 1_${ik}$, 1_${ik}$ ) ), abs( b( 1_${ik}$, 1_${ik}$ ) ) )
                 else
                    ! estimate the reciprocal condition number of the k-th
                    ! eigenvectors.
                    ! copy the matrix (a, b) to the array work and move the
                    ! (k,k)th pair to the (1,1) position.
                    call stdlib${ii}$_clacpy( 'FULL', n, n, a, lda, work, n )
                    call stdlib${ii}$_clacpy( 'FULL', n, n, b, ldb, work( n*n+1 ), n )
                    ifst = k
                    ilst = 1_${ik}$
                    call stdlib${ii}$_ctgexc( .false., .false., n, work, n, work( n*n+1 ),n, dummy, 1_${ik}$, &
                              dummy1, 1_${ik}$, ifst, ilst, ierr )
                    if( ierr>0_${ik}$ ) then
                       ! ill-conditioned problem - swap rejected.
                       dif( ks ) = zero
                    else
                       ! reordering successful, solve generalized sylvester
                       ! equation for r and l,
                                  ! a22 * r - l * a11 = a12
                                  ! b22 * r - l * b11 = b12,
                       ! and compute estimate of difl[(a11,b11), (a22, b22)].
                       n1 = 1_${ik}$
                       n2 = n - n1
                       i = n*n + 1_${ik}$
                       call stdlib${ii}$_ctgsyl( 'N', idifjb, n2, n1, work( n*n1+n1+1 ),n, work, n, &
                       work( n1+1 ), n,work( n*n1+n1+i ), n, work( i ), n,work( n1+i ), n, scale, &
                                 dif( ks ), dummy,1_${ik}$, iwork, ierr )
                    end if
                 end if
              end if
           end do loop_20
           work( 1_${ik}$ ) = lwmin
           return
     end subroutine stdlib${ii}$_ctgsna

     pure module subroutine stdlib${ii}$_ztgsna( job, howmny, select, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, s, &
     !! ZTGSNA estimates reciprocal condition numbers for specified
     !! eigenvalues and/or eigenvectors of a matrix pair (A, B).
     !! (A, B) must be in generalized Schur canonical form, that is, A and
     !! B are both upper triangular.
               dif, mm, m, work, lwork,iwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: howmny, job
           integer(${ik}$), intent(out) :: info, m
           integer(${ik}$), intent(in) :: lda, ldb, ldvl, ldvr, lwork, mm, n
           ! Array Arguments 
           logical(lk), intent(in) :: select(*)
           integer(${ik}$), intent(out) :: iwork(*)
           real(dp), intent(out) :: dif(*), s(*)
           complex(dp), intent(in) :: a(lda,*), b(ldb,*), vl(ldvl,*), vr(ldvr,*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: idifjb = 3_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: lquery, somcon, wantbh, wantdf, wants
           integer(${ik}$) :: i, ierr, ifst, ilst, k, ks, lwmin, n1, n2
           real(dp) :: bignum, cond, eps, lnrm, rnrm, scale, smlnum
           complex(dp) :: yhax, yhbx
           ! Local Arrays 
           complex(dp) :: dummy(1_${ik}$), dummy1(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           wantbh = stdlib_lsame( job, 'B' )
           wants = stdlib_lsame( job, 'E' ) .or. wantbh
           wantdf = stdlib_lsame( job, 'V' ) .or. wantbh
           somcon = stdlib_lsame( howmny, 'S' )
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( .not.wants .and. .not.wantdf ) then
              info = -1_${ik}$
           else if( .not.stdlib_lsame( howmny, 'A' ) .and. .not.somcon ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -8_${ik}$
           else if( wants .and. ldvl<n ) then
              info = -10_${ik}$
           else if( wants .and. ldvr<n ) then
              info = -12_${ik}$
           else
              ! set m to the number of eigenpairs for which condition numbers
              ! are required, and test mm.
              if( somcon ) then
                 m = 0_${ik}$
                 do k = 1, n
                    if( select( k ) )m = m + 1_${ik}$
                 end do
              else
                 m = n
              end if
              if( n==0_${ik}$ ) then
                 lwmin = 1_${ik}$
              else if( stdlib_lsame( job, 'V' ) .or. stdlib_lsame( job, 'B' ) ) then
                 lwmin = 2_${ik}$*n*n
              else
                 lwmin = n
              end if
              work( 1_${ik}$ ) = lwmin
              if( mm<m ) then
                 info = -15_${ik}$
              else if( lwork<lwmin .and. .not.lquery ) then
                 info = -18_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTGSNA', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n==0 )return
           ! get machine constants
           eps = stdlib${ii}$_dlamch( 'P' )
           smlnum = stdlib${ii}$_dlamch( 'S' ) / eps
           bignum = one / smlnum
           call stdlib${ii}$_dlabad( smlnum, bignum )
           ks = 0_${ik}$
           loop_20: do k = 1, n
              ! determine whether condition numbers are required for the k-th
              ! eigenpair.
              if( somcon ) then
                 if( .not.select( k ) )cycle loop_20
              end if
              ks = ks + 1_${ik}$
              if( wants ) then
                 ! compute the reciprocal condition number of the k-th
                 ! eigenvalue.
                 rnrm = stdlib${ii}$_dznrm2( n, vr( 1_${ik}$, ks ), 1_${ik}$ )
                 lnrm = stdlib${ii}$_dznrm2( n, vl( 1_${ik}$, ks ), 1_${ik}$ )
                 call stdlib${ii}$_zgemv( 'N', n, n, cmplx( one, zero,KIND=dp), a, lda,vr( 1_${ik}$, ks ), 1_${ik}$, &
                           cmplx( zero, zero,KIND=dp), work, 1_${ik}$ )
                 yhax = stdlib${ii}$_zdotc( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                 call stdlib${ii}$_zgemv( 'N', n, n, cmplx( one, zero,KIND=dp), b, ldb,vr( 1_${ik}$, ks ), 1_${ik}$, &
                           cmplx( zero, zero,KIND=dp), work, 1_${ik}$ )
                 yhbx = stdlib${ii}$_zdotc( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                 cond = stdlib${ii}$_dlapy2( abs( yhax ), abs( yhbx ) )
                 if( cond==zero ) then
                    s( ks ) = -one
                 else
                    s( ks ) = cond / ( rnrm*lnrm )
                 end if
              end if
              if( wantdf ) then
                 if( n==1_${ik}$ ) then
                    dif( ks ) = stdlib${ii}$_dlapy2( abs( a( 1_${ik}$, 1_${ik}$ ) ), abs( b( 1_${ik}$, 1_${ik}$ ) ) )
                 else
                    ! estimate the reciprocal condition number of the k-th
                    ! eigenvectors.
                    ! copy the matrix (a, b) to the array work and move the
                    ! (k,k)th pair to the (1,1) position.
                    call stdlib${ii}$_zlacpy( 'FULL', n, n, a, lda, work, n )
                    call stdlib${ii}$_zlacpy( 'FULL', n, n, b, ldb, work( n*n+1 ), n )
                    ifst = k
                    ilst = 1_${ik}$
                    call stdlib${ii}$_ztgexc( .false., .false., n, work, n, work( n*n+1 ),n, dummy, 1_${ik}$, &
                              dummy1, 1_${ik}$, ifst, ilst, ierr )
                    if( ierr>0_${ik}$ ) then
                       ! ill-conditioned problem - swap rejected.
                       dif( ks ) = zero
                    else
                       ! reordering successful, solve generalized sylvester
                       ! equation for r and l,
                                  ! a22 * r - l * a11 = a12
                                  ! b22 * r - l * b11 = b12,
                       ! and compute estimate of difl[(a11,b11), (a22, b22)].
                       n1 = 1_${ik}$
                       n2 = n - n1
                       i = n*n + 1_${ik}$
                       call stdlib${ii}$_ztgsyl( 'N', idifjb, n2, n1, work( n*n1+n1+1 ),n, work, n, &
                       work( n1+1 ), n,work( n*n1+n1+i ), n, work( i ), n,work( n1+i ), n, scale, &
                                 dif( ks ), dummy,1_${ik}$, iwork, ierr )
                    end if
                 end if
              end if
           end do loop_20
           work( 1_${ik}$ ) = lwmin
           return
     end subroutine stdlib${ii}$_ztgsna

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$tgsna( job, howmny, select, n, a, lda, b, ldb, vl,ldvl, vr, ldvr, s, &
     !! ZTGSNA: estimates reciprocal condition numbers for specified
     !! eigenvalues and/or eigenvectors of a matrix pair (A, B).
     !! (A, B) must be in generalized Schur canonical form, that is, A and
     !! B are both upper triangular.
               dif, mm, m, work, lwork,iwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: howmny, job
           integer(${ik}$), intent(out) :: info, m
           integer(${ik}$), intent(in) :: lda, ldb, ldvl, ldvr, lwork, mm, n
           ! Array Arguments 
           logical(lk), intent(in) :: select(*)
           integer(${ik}$), intent(out) :: iwork(*)
           real(${ck}$), intent(out) :: dif(*), s(*)
           complex(${ck}$), intent(in) :: a(lda,*), b(ldb,*), vl(ldvl,*), vr(ldvr,*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: idifjb = 3_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: lquery, somcon, wantbh, wantdf, wants
           integer(${ik}$) :: i, ierr, ifst, ilst, k, ks, lwmin, n1, n2
           real(${ck}$) :: bignum, cond, eps, lnrm, rnrm, scale, smlnum
           complex(${ck}$) :: yhax, yhbx
           ! Local Arrays 
           complex(${ck}$) :: dummy(1_${ik}$), dummy1(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           wantbh = stdlib_lsame( job, 'B' )
           wants = stdlib_lsame( job, 'E' ) .or. wantbh
           wantdf = stdlib_lsame( job, 'V' ) .or. wantbh
           somcon = stdlib_lsame( howmny, 'S' )
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( .not.wants .and. .not.wantdf ) then
              info = -1_${ik}$
           else if( .not.stdlib_lsame( howmny, 'A' ) .and. .not.somcon ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -8_${ik}$
           else if( wants .and. ldvl<n ) then
              info = -10_${ik}$
           else if( wants .and. ldvr<n ) then
              info = -12_${ik}$
           else
              ! set m to the number of eigenpairs for which condition numbers
              ! are required, and test mm.
              if( somcon ) then
                 m = 0_${ik}$
                 do k = 1, n
                    if( select( k ) )m = m + 1_${ik}$
                 end do
              else
                 m = n
              end if
              if( n==0_${ik}$ ) then
                 lwmin = 1_${ik}$
              else if( stdlib_lsame( job, 'V' ) .or. stdlib_lsame( job, 'B' ) ) then
                 lwmin = 2_${ik}$*n*n
              else
                 lwmin = n
              end if
              work( 1_${ik}$ ) = lwmin
              if( mm<m ) then
                 info = -15_${ik}$
              else if( lwork<lwmin .and. .not.lquery ) then
                 info = -18_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTGSNA', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n==0 )return
           ! get machine constants
           eps = stdlib${ii}$_${c2ri(ci)}$lamch( 'P' )
           smlnum = stdlib${ii}$_${c2ri(ci)}$lamch( 'S' ) / eps
           bignum = one / smlnum
           call stdlib${ii}$_${c2ri(ci)}$labad( smlnum, bignum )
           ks = 0_${ik}$
           loop_20: do k = 1, n
              ! determine whether condition numbers are required for the k-th
              ! eigenpair.
              if( somcon ) then
                 if( .not.select( k ) )cycle loop_20
              end if
              ks = ks + 1_${ik}$
              if( wants ) then
                 ! compute the reciprocal condition number of the k-th
                 ! eigenvalue.
                 rnrm = stdlib${ii}$_${c2ri(ci)}$znrm2( n, vr( 1_${ik}$, ks ), 1_${ik}$ )
                 lnrm = stdlib${ii}$_${c2ri(ci)}$znrm2( n, vl( 1_${ik}$, ks ), 1_${ik}$ )
                 call stdlib${ii}$_${ci}$gemv( 'N', n, n, cmplx( one, zero,KIND=${ck}$), a, lda,vr( 1_${ik}$, ks ), 1_${ik}$, &
                           cmplx( zero, zero,KIND=${ck}$), work, 1_${ik}$ )
                 yhax = stdlib${ii}$_${ci}$dotc( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                 call stdlib${ii}$_${ci}$gemv( 'N', n, n, cmplx( one, zero,KIND=${ck}$), b, ldb,vr( 1_${ik}$, ks ), 1_${ik}$, &
                           cmplx( zero, zero,KIND=${ck}$), work, 1_${ik}$ )
                 yhbx = stdlib${ii}$_${ci}$dotc( n, work, 1_${ik}$, vl( 1_${ik}$, ks ), 1_${ik}$ )
                 cond = stdlib${ii}$_${c2ri(ci)}$lapy2( abs( yhax ), abs( yhbx ) )
                 if( cond==zero ) then
                    s( ks ) = -one
                 else
                    s( ks ) = cond / ( rnrm*lnrm )
                 end if
              end if
              if( wantdf ) then
                 if( n==1_${ik}$ ) then
                    dif( ks ) = stdlib${ii}$_${c2ri(ci)}$lapy2( abs( a( 1_${ik}$, 1_${ik}$ ) ), abs( b( 1_${ik}$, 1_${ik}$ ) ) )
                 else
                    ! estimate the reciprocal condition number of the k-th
                    ! eigenvectors.
                    ! copy the matrix (a, b) to the array work and move the
                    ! (k,k)th pair to the (1,1) position.
                    call stdlib${ii}$_${ci}$lacpy( 'FULL', n, n, a, lda, work, n )
                    call stdlib${ii}$_${ci}$lacpy( 'FULL', n, n, b, ldb, work( n*n+1 ), n )
                    ifst = k
                    ilst = 1_${ik}$
                    call stdlib${ii}$_${ci}$tgexc( .false., .false., n, work, n, work( n*n+1 ),n, dummy, 1_${ik}$, &
                              dummy1, 1_${ik}$, ifst, ilst, ierr )
                    if( ierr>0_${ik}$ ) then
                       ! ill-conditioned problem - swap rejected.
                       dif( ks ) = zero
                    else
                       ! reordering successful, solve generalized sylvester
                       ! equation for r and l,
                                  ! a22 * r - l * a11 = a12
                                  ! b22 * r - l * b11 = b12,
                       ! and compute estimate of difl[(a11,b11), (a22, b22)].
                       n1 = 1_${ik}$
                       n2 = n - n1
                       i = n*n + 1_${ik}$
                       call stdlib${ii}$_${ci}$tgsyl( 'N', idifjb, n2, n1, work( n*n1+n1+1 ),n, work, n, &
                       work( n1+1 ), n,work( n*n1+n1+i ), n, work( i ), n,work( n1+i ), n, scale, &
                                 dif( ks ), dummy,1_${ik}$, iwork, ierr )
                    end if
                 end if
              end if
           end do loop_20
           work( 1_${ik}$ ) = lwmin
           return
     end subroutine stdlib${ii}$_${ci}$tgsna

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_stgsyl( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, &
     !! STGSYL solves the generalized Sylvester equation:
     !! A * R - L * B = scale * C                 (1)
     !! D * R - L * E = scale * F
     !! where R and L are unknown m-by-n matrices, (A, D), (B, E) and
     !! (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n,
     !! respectively, with real entries. (A, D) and (B, E) must be in
     !! generalized (real) Schur canonical form, i.e. A, B are upper quasi
     !! triangular and D, E are upper triangular.
     !! The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output
     !! scaling factor chosen to avoid overflow.
     !! In matrix notation (1) is equivalent to solve  Zx = scale b, where
     !! Z is defined as
     !! Z = [ kron(In, A)  -kron(B**T, Im) ]         (2)
     !! [ kron(In, D)  -kron(E**T, Im) ].
     !! Here Ik is the identity matrix of size k and X**T is the transpose of
     !! X. kron(X, Y) is the Kronecker product between the matrices X and Y.
     !! If TRANS = 'T', STGSYL solves the transposed system Z**T*y = scale*b,
     !! which is equivalent to solve for R and L in
     !! A**T * R + D**T * L = scale * C           (3)
     !! R * B**T + L * E**T = scale * -F
     !! This case (TRANS = 'T') is used to compute an one-norm-based estimate
     !! of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D)
     !! and (B,E), using SLACON.
     !! If IJOB >= 1, STGSYL computes a Frobenius norm-based estimate
     !! of Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the
     !! reciprocal of the smallest singular value of Z. See [1-2] for more
     !! information.
     !! This is a level 3 BLAS algorithm.
               ldf, scale, dif, work, lwork,iwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, lwork, m, n
           integer(${ik}$), intent(out) :: info
           real(sp), intent(out) :: dif, scale
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           real(sp), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*)
           real(sp), intent(inout) :: c(ldc,*), f(ldf,*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
        ! replaced various illegal calls to stdlib${ii}$_scopy by calls to stdlib${ii}$_slaset.
        ! sven hammarling, 1/5/02.
           
           ! Local Scalars 
           logical(lk) :: lquery, notran
           integer(${ik}$) :: i, ie, ifunc, iround, is, isolve, j, je, js, k, linfo, lwmin, mb, nb, &
                     p, ppqq, pq, q
           real(sp) :: dscale, dsum, scale2, scaloc
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test input parameters
           info = 0_${ik}$
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -1_${ik}$
           else if( notran ) then
              if( ( ijob<0_${ik}$ ) .or. ( ijob>4_${ik}$ ) ) then
                 info = -2_${ik}$
              end if
           end if
           if( info==0_${ik}$ ) then
              if( m<=0_${ik}$ ) then
                 info = -3_${ik}$
              else if( n<=0_${ik}$ ) then
                 info = -4_${ik}$
              else if( lda<max( 1_${ik}$, m ) ) then
                 info = -6_${ik}$
              else if( ldb<max( 1_${ik}$, n ) ) then
                 info = -8_${ik}$
              else if( ldc<max( 1_${ik}$, m ) ) then
                 info = -10_${ik}$
              else if( ldd<max( 1_${ik}$, m ) ) then
                 info = -12_${ik}$
              else if( lde<max( 1_${ik}$, n ) ) then
                 info = -14_${ik}$
              else if( ldf<max( 1_${ik}$, m ) ) then
                 info = -16_${ik}$
              end if
           end if
           if( info==0_${ik}$ ) then
              if( notran ) then
                 if( ijob==1_${ik}$ .or. ijob==2_${ik}$ ) then
                    lwmin = max( 1_${ik}$, 2_${ik}$*m*n )
                 else
                    lwmin = 1_${ik}$
                 end if
              else
                 lwmin = 1_${ik}$
              end if
              work( 1_${ik}$ ) = lwmin
              if( lwork<lwmin .and. .not.lquery ) then
                 info = -20_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'STGSYL', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              scale = 1_${ik}$
              if( notran ) then
                 if( ijob/=0_${ik}$ ) then
                    dif = 0_${ik}$
                 end if
              end if
              return
           end if
           ! determine optimal block sizes mb and nb
           mb = stdlib${ii}$_ilaenv( 2_${ik}$, 'STGSYL', trans, m, n, -1_${ik}$, -1_${ik}$ )
           nb = stdlib${ii}$_ilaenv( 5_${ik}$, 'STGSYL', trans, m, n, -1_${ik}$, -1_${ik}$ )
           isolve = 1_${ik}$
           ifunc = 0_${ik}$
           if( notran ) then
              if( ijob>=3_${ik}$ ) then
                 ifunc = ijob - 2_${ik}$
                 call stdlib${ii}$_slaset( 'F', m, n, zero, zero, c, ldc )
                 call stdlib${ii}$_slaset( 'F', m, n, zero, zero, f, ldf )
              else if( ijob>=1_${ik}$ .and. notran ) then
                 isolve = 2_${ik}$
              end if
           end if
           if( ( mb<=1_${ik}$ .and. nb<=1_${ik}$ ) .or. ( mb>=m .and. nb>=n ) )then
              loop_30: do iround = 1, isolve
                 ! use unblocked level 2 solver
                 dscale = zero
                 dsum = one
                 pq = 0_${ik}$
                 call stdlib${ii}$_stgsy2( trans, ifunc, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f,&
                            ldf, scale, dsum, dscale,iwork, pq, info )
                 if( dscale/=zero ) then
                    if( ijob==1_${ik}$ .or. ijob==3_${ik}$ ) then
                       dif = sqrt( real( 2_${ik}$*m*n,KIND=sp) ) / ( dscale*sqrt( dsum ) )
                    else
                       dif = sqrt( real( pq,KIND=sp) ) / ( dscale*sqrt( dsum ) )
                    end if
                 end if
                 if( isolve==2_${ik}$ .and. iround==1_${ik}$ ) then
                    if( notran ) then
                       ifunc = ijob
                    end if
                    scale2 = scale
                    call stdlib${ii}$_slacpy( 'F', m, n, c, ldc, work, m )
                    call stdlib${ii}$_slacpy( 'F', m, n, f, ldf, work( m*n+1 ), m )
                    call stdlib${ii}$_slaset( 'F', m, n, zero, zero, c, ldc )
                    call stdlib${ii}$_slaset( 'F', m, n, zero, zero, f, ldf )
                 else if( isolve==2_${ik}$ .and. iround==2_${ik}$ ) then
                    call stdlib${ii}$_slacpy( 'F', m, n, work, m, c, ldc )
                    call stdlib${ii}$_slacpy( 'F', m, n, work( m*n+1 ), m, f, ldf )
                    scale = scale2
                 end if
              end do loop_30
              return
           end if
           ! determine block structure of a
           p = 0_${ik}$
           i = 1_${ik}$
           40 continue
           if( i>m )go to 50
           p = p + 1_${ik}$
           iwork( p ) = i
           i = i + mb
           if( i>=m )go to 50
           if( a( i, i-1 )/=zero )i = i + 1_${ik}$
           go to 40
           50 continue
           iwork( p+1 ) = m + 1_${ik}$
           if( iwork( p )==iwork( p+1 ) )p = p - 1_${ik}$
           ! determine block structure of b
           q = p + 1_${ik}$
           j = 1_${ik}$
           60 continue
           if( j>n )go to 70
           q = q + 1_${ik}$
           iwork( q ) = j
           j = j + nb
           if( j>=n )go to 70
           if( b( j, j-1 )/=zero )j = j + 1_${ik}$
           go to 60
           70 continue
           iwork( q+1 ) = n + 1_${ik}$
           if( iwork( q )==iwork( q+1 ) )q = q - 1_${ik}$
           if( notran ) then
              loop_150: do iround = 1, isolve
                 ! solve (i, j)-subsystem
                     ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j)
                     ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j)
                 ! for i = p, p - 1,..., 1; j = 1, 2,..., q
                 dscale = zero
                 dsum = one
                 pq = 0_${ik}$
                 scale = one
                 loop_130: do j = p + 2, q
                    js = iwork( j )
                    je = iwork( j+1 ) - 1_${ik}$
                    nb = je - js + 1_${ik}$
                    loop_120: do i = p, 1, -1
                       is = iwork( i )
                       ie = iwork( i+1 ) - 1_${ik}$
                       mb = ie - is + 1_${ik}$
                       ppqq = 0_${ik}$
                       call stdlib${ii}$_stgsy2( trans, ifunc, mb, nb, a( is, is ), lda,b( js, js ), &
                       ldb, c( is, js ), ldc,d( is, is ), ldd, e( js, js ), lde,f( is, js ), ldf, &
                                 scaloc, dsum, dscale,iwork( q+2 ), ppqq, linfo )
                       if( linfo>0_${ik}$ )info = linfo
                       pq = pq + ppqq
                       if( scaloc/=one ) then
                          do k = 1, js - 1
                             call stdlib${ii}$_sscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_sscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          do k = js, je
                             call stdlib${ii}$_sscal( is-1, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_sscal( is-1, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          do k = js, je
                             call stdlib${ii}$_sscal( m-ie, scaloc, c( ie+1, k ), 1_${ik}$ )
                             call stdlib${ii}$_sscal( m-ie, scaloc, f( ie+1, k ), 1_${ik}$ )
                          end do
                          do k = je + 1, n
                             call stdlib${ii}$_sscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_sscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          scale = scale*scaloc
                       end if
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( i>1_${ik}$ ) then
                          call stdlib${ii}$_sgemm( 'N', 'N', is-1, nb, mb, -one,a( 1_${ik}$, is ), lda, c( is, &
                                    js ), ldc, one,c( 1_${ik}$, js ), ldc )
                          call stdlib${ii}$_sgemm( 'N', 'N', is-1, nb, mb, -one,d( 1_${ik}$, is ), ldd, c( is, &
                                    js ), ldc, one,f( 1_${ik}$, js ), ldf )
                       end if
                       if( j<q ) then
                          call stdlib${ii}$_sgemm( 'N', 'N', mb, n-je, nb, one,f( is, js ), ldf, b( js, &
                                    je+1 ), ldb,one, c( is, je+1 ), ldc )
                          call stdlib${ii}$_sgemm( 'N', 'N', mb, n-je, nb, one,f( is, js ), ldf, e( js, &
                                    je+1 ), lde,one, f( is, je+1 ), ldf )
                       end if
                    end do loop_120
                 end do loop_130
                 if( dscale/=zero ) then
                    if( ijob==1_${ik}$ .or. ijob==3_${ik}$ ) then
                       dif = sqrt( real( 2_${ik}$*m*n,KIND=sp) ) / ( dscale*sqrt( dsum ) )
                    else
                       dif = sqrt( real( pq,KIND=sp) ) / ( dscale*sqrt( dsum ) )
                    end if
                 end if
                 if( isolve==2_${ik}$ .and. iround==1_${ik}$ ) then
                    if( notran ) then
                       ifunc = ijob
                    end if
                    scale2 = scale
                    call stdlib${ii}$_slacpy( 'F', m, n, c, ldc, work, m )
                    call stdlib${ii}$_slacpy( 'F', m, n, f, ldf, work( m*n+1 ), m )
                    call stdlib${ii}$_slaset( 'F', m, n, zero, zero, c, ldc )
                    call stdlib${ii}$_slaset( 'F', m, n, zero, zero, f, ldf )
                 else if( isolve==2_${ik}$ .and. iround==2_${ik}$ ) then
                    call stdlib${ii}$_slacpy( 'F', m, n, work, m, c, ldc )
                    call stdlib${ii}$_slacpy( 'F', m, n, work( m*n+1 ), m, f, ldf )
                    scale = scale2
                 end if
              end do loop_150
           else
              ! solve transposed (i, j)-subsystem
                   ! a(i, i)**t * r(i, j)  + d(i, i)**t * l(i, j)  =  c(i, j)
                   ! r(i, j)  * b(j, j)**t + l(i, j)  * e(j, j)**t = -f(i, j)
              ! for i = 1,2,..., p; j = q, q-1,..., 1
              scale = one
              loop_210: do i = 1, p
                 is = iwork( i )
                 ie = iwork( i+1 ) - 1_${ik}$
                 mb = ie - is + 1_${ik}$
                 loop_200: do j = q, p + 2, -1
                    js = iwork( j )
                    je = iwork( j+1 ) - 1_${ik}$
                    nb = je - js + 1_${ik}$
                    call stdlib${ii}$_stgsy2( trans, ifunc, mb, nb, a( is, is ), lda,b( js, js ), ldb, &
                    c( is, js ), ldc,d( is, is ), ldd, e( js, js ), lde,f( is, js ), ldf, scaloc, &
                              dsum, dscale,iwork( q+2 ), ppqq, linfo )
                    if( linfo>0_${ik}$ )info = linfo
                    if( scaloc/=one ) then
                       do k = 1, js - 1
                          call stdlib${ii}$_sscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                          call stdlib${ii}$_sscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                       end do
                       do k = js, je
                          call stdlib${ii}$_sscal( is-1, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                          call stdlib${ii}$_sscal( is-1, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                       end do
                       do k = js, je
                          call stdlib${ii}$_sscal( m-ie, scaloc, c( ie+1, k ), 1_${ik}$ )
                          call stdlib${ii}$_sscal( m-ie, scaloc, f( ie+1, k ), 1_${ik}$ )
                       end do
                       do k = je + 1, n
                          call stdlib${ii}$_sscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                          call stdlib${ii}$_sscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                       end do
                       scale = scale*scaloc
                    end if
                    ! substitute r(i, j) and l(i, j) into remaining equation.
                    if( j>p+2 ) then
                       call stdlib${ii}$_sgemm( 'N', 'T', mb, js-1, nb, one, c( is, js ),ldc, b( 1_${ik}$, js )&
                                 , ldb, one, f( is, 1_${ik}$ ),ldf )
                       call stdlib${ii}$_sgemm( 'N', 'T', mb, js-1, nb, one, f( is, js ),ldf, e( 1_${ik}$, js )&
                                 , lde, one, f( is, 1_${ik}$ ),ldf )
                    end if
                    if( i<p ) then
                       call stdlib${ii}$_sgemm( 'T', 'N', m-ie, nb, mb, -one,a( is, ie+1 ), lda, c( is, &
                                 js ), ldc, one,c( ie+1, js ), ldc )
                       call stdlib${ii}$_sgemm( 'T', 'N', m-ie, nb, mb, -one,d( is, ie+1 ), ldd, f( is, &
                                 js ), ldf, one,c( ie+1, js ), ldc )
                    end if
                 end do loop_200
              end do loop_210
           end if
           work( 1_${ik}$ ) = lwmin
           return
     end subroutine stdlib${ii}$_stgsyl

     pure module subroutine stdlib${ii}$_dtgsyl( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, &
     !! DTGSYL solves the generalized Sylvester equation:
     !! A * R - L * B = scale * C                 (1)
     !! D * R - L * E = scale * F
     !! where R and L are unknown m-by-n matrices, (A, D), (B, E) and
     !! (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n,
     !! respectively, with real entries. (A, D) and (B, E) must be in
     !! generalized (real) Schur canonical form, i.e. A, B are upper quasi
     !! triangular and D, E are upper triangular.
     !! The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output
     !! scaling factor chosen to avoid overflow.
     !! In matrix notation (1) is equivalent to solve  Zx = scale b, where
     !! Z is defined as
     !! Z = [ kron(In, A)  -kron(B**T, Im) ]         (2)
     !! [ kron(In, D)  -kron(E**T, Im) ].
     !! Here Ik is the identity matrix of size k and X**T is the transpose of
     !! X. kron(X, Y) is the Kronecker product between the matrices X and Y.
     !! If TRANS = 'T', DTGSYL solves the transposed system Z**T*y = scale*b,
     !! which is equivalent to solve for R and L in
     !! A**T * R + D**T * L = scale * C           (3)
     !! R * B**T + L * E**T = scale * -F
     !! This case (TRANS = 'T') is used to compute an one-norm-based estimate
     !! of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D)
     !! and (B,E), using DLACON.
     !! If IJOB >= 1, DTGSYL computes a Frobenius norm-based estimate
     !! of Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the
     !! reciprocal of the smallest singular value of Z. See [1-2] for more
     !! information.
     !! This is a level 3 BLAS algorithm.
               ldf, scale, dif, work, lwork,iwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, lwork, m, n
           integer(${ik}$), intent(out) :: info
           real(dp), intent(out) :: dif, scale
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           real(dp), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*)
           real(dp), intent(inout) :: c(ldc,*), f(ldf,*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
        ! replaced various illegal calls to stdlib${ii}$_dcopy by calls to stdlib${ii}$_dlaset.
        ! sven hammarling, 1/5/02.
           
           ! Local Scalars 
           logical(lk) :: lquery, notran
           integer(${ik}$) :: i, ie, ifunc, iround, is, isolve, j, je, js, k, linfo, lwmin, mb, nb, &
                     p, ppqq, pq, q
           real(dp) :: dscale, dsum, scale2, scaloc
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test input parameters
           info = 0_${ik}$
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -1_${ik}$
           else if( notran ) then
              if( ( ijob<0_${ik}$ ) .or. ( ijob>4_${ik}$ ) ) then
                 info = -2_${ik}$
              end if
           end if
           if( info==0_${ik}$ ) then
              if( m<=0_${ik}$ ) then
                 info = -3_${ik}$
              else if( n<=0_${ik}$ ) then
                 info = -4_${ik}$
              else if( lda<max( 1_${ik}$, m ) ) then
                 info = -6_${ik}$
              else if( ldb<max( 1_${ik}$, n ) ) then
                 info = -8_${ik}$
              else if( ldc<max( 1_${ik}$, m ) ) then
                 info = -10_${ik}$
              else if( ldd<max( 1_${ik}$, m ) ) then
                 info = -12_${ik}$
              else if( lde<max( 1_${ik}$, n ) ) then
                 info = -14_${ik}$
              else if( ldf<max( 1_${ik}$, m ) ) then
                 info = -16_${ik}$
              end if
           end if
           if( info==0_${ik}$ ) then
              if( notran ) then
                 if( ijob==1_${ik}$ .or. ijob==2_${ik}$ ) then
                    lwmin = max( 1_${ik}$, 2_${ik}$*m*n )
                 else
                    lwmin = 1_${ik}$
                 end if
              else
                 lwmin = 1_${ik}$
              end if
              work( 1_${ik}$ ) = lwmin
              if( lwork<lwmin .and. .not.lquery ) then
                 info = -20_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTGSYL', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              scale = 1_${ik}$
              if( notran ) then
                 if( ijob/=0_${ik}$ ) then
                    dif = 0_${ik}$
                 end if
              end if
              return
           end if
           ! determine optimal block sizes mb and nb
           mb = stdlib${ii}$_ilaenv( 2_${ik}$, 'DTGSYL', trans, m, n, -1_${ik}$, -1_${ik}$ )
           nb = stdlib${ii}$_ilaenv( 5_${ik}$, 'DTGSYL', trans, m, n, -1_${ik}$, -1_${ik}$ )
           isolve = 1_${ik}$
           ifunc = 0_${ik}$
           if( notran ) then
              if( ijob>=3_${ik}$ ) then
                 ifunc = ijob - 2_${ik}$
                 call stdlib${ii}$_dlaset( 'F', m, n, zero, zero, c, ldc )
                 call stdlib${ii}$_dlaset( 'F', m, n, zero, zero, f, ldf )
              else if( ijob>=1_${ik}$ ) then
                 isolve = 2_${ik}$
              end if
           end if
           if( ( mb<=1_${ik}$ .and. nb<=1_${ik}$ ) .or. ( mb>=m .and. nb>=n ) )then
              loop_30: do iround = 1, isolve
                 ! use unblocked level 2 solver
                 dscale = zero
                 dsum = one
                 pq = 0_${ik}$
                 call stdlib${ii}$_dtgsy2( trans, ifunc, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f,&
                            ldf, scale, dsum, dscale,iwork, pq, info )
                 if( dscale/=zero ) then
                    if( ijob==1_${ik}$ .or. ijob==3_${ik}$ ) then
                       dif = sqrt( real( 2_${ik}$*m*n,KIND=dp) ) / ( dscale*sqrt( dsum ) )
                    else
                       dif = sqrt( real( pq,KIND=dp) ) / ( dscale*sqrt( dsum ) )
                    end if
                 end if
                 if( isolve==2_${ik}$ .and. iround==1_${ik}$ ) then
                    if( notran ) then
                       ifunc = ijob
                    end if
                    scale2 = scale
                    call stdlib${ii}$_dlacpy( 'F', m, n, c, ldc, work, m )
                    call stdlib${ii}$_dlacpy( 'F', m, n, f, ldf, work( m*n+1 ), m )
                    call stdlib${ii}$_dlaset( 'F', m, n, zero, zero, c, ldc )
                    call stdlib${ii}$_dlaset( 'F', m, n, zero, zero, f, ldf )
                 else if( isolve==2_${ik}$ .and. iround==2_${ik}$ ) then
                    call stdlib${ii}$_dlacpy( 'F', m, n, work, m, c, ldc )
                    call stdlib${ii}$_dlacpy( 'F', m, n, work( m*n+1 ), m, f, ldf )
                    scale = scale2
                 end if
              end do loop_30
              return
           end if
           ! determine block structure of a
           p = 0_${ik}$
           i = 1_${ik}$
           40 continue
           if( i>m )go to 50
           p = p + 1_${ik}$
           iwork( p ) = i
           i = i + mb
           if( i>=m )go to 50
           if( a( i, i-1 )/=zero )i = i + 1_${ik}$
           go to 40
           50 continue
           iwork( p+1 ) = m + 1_${ik}$
           if( iwork( p )==iwork( p+1 ) )p = p - 1_${ik}$
           ! determine block structure of b
           q = p + 1_${ik}$
           j = 1_${ik}$
           60 continue
           if( j>n )go to 70
           q = q + 1_${ik}$
           iwork( q ) = j
           j = j + nb
           if( j>=n )go to 70
           if( b( j, j-1 )/=zero )j = j + 1_${ik}$
           go to 60
           70 continue
           iwork( q+1 ) = n + 1_${ik}$
           if( iwork( q )==iwork( q+1 ) )q = q - 1_${ik}$
           if( notran ) then
              loop_150: do iround = 1, isolve
                 ! solve (i, j)-subsystem
                     ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j)
                     ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j)
                 ! for i = p, p - 1,..., 1; j = 1, 2,..., q
                 dscale = zero
                 dsum = one
                 pq = 0_${ik}$
                 scale = one
                 loop_130: do j = p + 2, q
                    js = iwork( j )
                    je = iwork( j+1 ) - 1_${ik}$
                    nb = je - js + 1_${ik}$
                    loop_120: do i = p, 1, -1
                       is = iwork( i )
                       ie = iwork( i+1 ) - 1_${ik}$
                       mb = ie - is + 1_${ik}$
                       ppqq = 0_${ik}$
                       call stdlib${ii}$_dtgsy2( trans, ifunc, mb, nb, a( is, is ), lda,b( js, js ), &
                       ldb, c( is, js ), ldc,d( is, is ), ldd, e( js, js ), lde,f( is, js ), ldf, &
                                 scaloc, dsum, dscale,iwork( q+2 ), ppqq, linfo )
                       if( linfo>0_${ik}$ )info = linfo
                       pq = pq + ppqq
                       if( scaloc/=one ) then
                          do k = 1, js - 1
                             call stdlib${ii}$_dscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_dscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          do k = js, je
                             call stdlib${ii}$_dscal( is-1, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_dscal( is-1, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          do k = js, je
                             call stdlib${ii}$_dscal( m-ie, scaloc, c( ie+1, k ), 1_${ik}$ )
                             call stdlib${ii}$_dscal( m-ie, scaloc, f( ie+1, k ), 1_${ik}$ )
                          end do
                          do k = je + 1, n
                             call stdlib${ii}$_dscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_dscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          scale = scale*scaloc
                       end if
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( i>1_${ik}$ ) then
                          call stdlib${ii}$_dgemm( 'N', 'N', is-1, nb, mb, -one,a( 1_${ik}$, is ), lda, c( is, &
                                    js ), ldc, one,c( 1_${ik}$, js ), ldc )
                          call stdlib${ii}$_dgemm( 'N', 'N', is-1, nb, mb, -one,d( 1_${ik}$, is ), ldd, c( is, &
                                    js ), ldc, one,f( 1_${ik}$, js ), ldf )
                       end if
                       if( j<q ) then
                          call stdlib${ii}$_dgemm( 'N', 'N', mb, n-je, nb, one,f( is, js ), ldf, b( js, &
                                    je+1 ), ldb,one, c( is, je+1 ), ldc )
                          call stdlib${ii}$_dgemm( 'N', 'N', mb, n-je, nb, one,f( is, js ), ldf, e( js, &
                                    je+1 ), lde,one, f( is, je+1 ), ldf )
                       end if
                    end do loop_120
                 end do loop_130
                 if( dscale/=zero ) then
                    if( ijob==1_${ik}$ .or. ijob==3_${ik}$ ) then
                       dif = sqrt( real( 2_${ik}$*m*n,KIND=dp) ) / ( dscale*sqrt( dsum ) )
                    else
                       dif = sqrt( real( pq,KIND=dp) ) / ( dscale*sqrt( dsum ) )
                    end if
                 end if
                 if( isolve==2_${ik}$ .and. iround==1_${ik}$ ) then
                    if( notran ) then
                       ifunc = ijob
                    end if
                    scale2 = scale
                    call stdlib${ii}$_dlacpy( 'F', m, n, c, ldc, work, m )
                    call stdlib${ii}$_dlacpy( 'F', m, n, f, ldf, work( m*n+1 ), m )
                    call stdlib${ii}$_dlaset( 'F', m, n, zero, zero, c, ldc )
                    call stdlib${ii}$_dlaset( 'F', m, n, zero, zero, f, ldf )
                 else if( isolve==2_${ik}$ .and. iround==2_${ik}$ ) then
                    call stdlib${ii}$_dlacpy( 'F', m, n, work, m, c, ldc )
                    call stdlib${ii}$_dlacpy( 'F', m, n, work( m*n+1 ), m, f, ldf )
                    scale = scale2
                 end if
              end do loop_150
           else
              ! solve transposed (i, j)-subsystem
                   ! a(i, i)**t * r(i, j)  + d(i, i)**t * l(i, j)  =  c(i, j)
                   ! r(i, j)  * b(j, j)**t + l(i, j)  * e(j, j)**t = -f(i, j)
              ! for i = 1,2,..., p; j = q, q-1,..., 1
              scale = one
              loop_210: do i = 1, p
                 is = iwork( i )
                 ie = iwork( i+1 ) - 1_${ik}$
                 mb = ie - is + 1_${ik}$
                 loop_200: do j = q, p + 2, -1
                    js = iwork( j )
                    je = iwork( j+1 ) - 1_${ik}$
                    nb = je - js + 1_${ik}$
                    call stdlib${ii}$_dtgsy2( trans, ifunc, mb, nb, a( is, is ), lda,b( js, js ), ldb, &
                    c( is, js ), ldc,d( is, is ), ldd, e( js, js ), lde,f( is, js ), ldf, scaloc, &
                              dsum, dscale,iwork( q+2 ), ppqq, linfo )
                    if( linfo>0_${ik}$ )info = linfo
                    if( scaloc/=one ) then
                       do k = 1, js - 1
                          call stdlib${ii}$_dscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                          call stdlib${ii}$_dscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                       end do
                       do k = js, je
                          call stdlib${ii}$_dscal( is-1, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                          call stdlib${ii}$_dscal( is-1, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                       end do
                       do k = js, je
                          call stdlib${ii}$_dscal( m-ie, scaloc, c( ie+1, k ), 1_${ik}$ )
                          call stdlib${ii}$_dscal( m-ie, scaloc, f( ie+1, k ), 1_${ik}$ )
                       end do
                       do k = je + 1, n
                          call stdlib${ii}$_dscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                          call stdlib${ii}$_dscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                       end do
                       scale = scale*scaloc
                    end if
                    ! substitute r(i, j) and l(i, j) into remaining equation.
                    if( j>p+2 ) then
                       call stdlib${ii}$_dgemm( 'N', 'T', mb, js-1, nb, one, c( is, js ),ldc, b( 1_${ik}$, js )&
                                 , ldb, one, f( is, 1_${ik}$ ),ldf )
                       call stdlib${ii}$_dgemm( 'N', 'T', mb, js-1, nb, one, f( is, js ),ldf, e( 1_${ik}$, js )&
                                 , lde, one, f( is, 1_${ik}$ ),ldf )
                    end if
                    if( i<p ) then
                       call stdlib${ii}$_dgemm( 'T', 'N', m-ie, nb, mb, -one,a( is, ie+1 ), lda, c( is, &
                                 js ), ldc, one,c( ie+1, js ), ldc )
                       call stdlib${ii}$_dgemm( 'T', 'N', m-ie, nb, mb, -one,d( is, ie+1 ), ldd, f( is, &
                                 js ), ldf, one,c( ie+1, js ), ldc )
                    end if
                 end do loop_200
              end do loop_210
           end if
           work( 1_${ik}$ ) = lwmin
           return
     end subroutine stdlib${ii}$_dtgsyl

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$tgsyl( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, &
     !! DTGSYL: solves the generalized Sylvester equation:
     !! A * R - L * B = scale * C                 (1)
     !! D * R - L * E = scale * F
     !! where R and L are unknown m-by-n matrices, (A, D), (B, E) and
     !! (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n,
     !! respectively, with real entries. (A, D) and (B, E) must be in
     !! generalized (real) Schur canonical form, i.e. A, B are upper quasi
     !! triangular and D, E are upper triangular.
     !! The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output
     !! scaling factor chosen to avoid overflow.
     !! In matrix notation (1) is equivalent to solve  Zx = scale b, where
     !! Z is defined as
     !! Z = [ kron(In, A)  -kron(B**T, Im) ]         (2)
     !! [ kron(In, D)  -kron(E**T, Im) ].
     !! Here Ik is the identity matrix of size k and X**T is the transpose of
     !! X. kron(X, Y) is the Kronecker product between the matrices X and Y.
     !! If TRANS = 'T', DTGSYL solves the transposed system Z**T*y = scale*b,
     !! which is equivalent to solve for R and L in
     !! A**T * R + D**T * L = scale * C           (3)
     !! R * B**T + L * E**T = scale * -F
     !! This case (TRANS = 'T') is used to compute an one-norm-based estimate
     !! of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D)
     !! and (B,E), using DLACON.
     !! If IJOB >= 1, DTGSYL computes a Frobenius norm-based estimate
     !! of Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the
     !! reciprocal of the smallest singular value of Z. See [1-2] for more
     !! information.
     !! This is a level 3 BLAS algorithm.
               ldf, scale, dif, work, lwork,iwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, lwork, m, n
           integer(${ik}$), intent(out) :: info
           real(${rk}$), intent(out) :: dif, scale
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           real(${rk}$), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*)
           real(${rk}$), intent(inout) :: c(ldc,*), f(ldf,*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
        ! replaced various illegal calls to stdlib${ii}$_${ri}$copy by calls to stdlib${ii}$_${ri}$laset.
        ! sven hammarling, 1/5/02.
           
           ! Local Scalars 
           logical(lk) :: lquery, notran
           integer(${ik}$) :: i, ie, ifunc, iround, is, isolve, j, je, js, k, linfo, lwmin, mb, nb, &
                     p, ppqq, pq, q
           real(${rk}$) :: dscale, dsum, scale2, scaloc
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test input parameters
           info = 0_${ik}$
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -1_${ik}$
           else if( notran ) then
              if( ( ijob<0_${ik}$ ) .or. ( ijob>4_${ik}$ ) ) then
                 info = -2_${ik}$
              end if
           end if
           if( info==0_${ik}$ ) then
              if( m<=0_${ik}$ ) then
                 info = -3_${ik}$
              else if( n<=0_${ik}$ ) then
                 info = -4_${ik}$
              else if( lda<max( 1_${ik}$, m ) ) then
                 info = -6_${ik}$
              else if( ldb<max( 1_${ik}$, n ) ) then
                 info = -8_${ik}$
              else if( ldc<max( 1_${ik}$, m ) ) then
                 info = -10_${ik}$
              else if( ldd<max( 1_${ik}$, m ) ) then
                 info = -12_${ik}$
              else if( lde<max( 1_${ik}$, n ) ) then
                 info = -14_${ik}$
              else if( ldf<max( 1_${ik}$, m ) ) then
                 info = -16_${ik}$
              end if
           end if
           if( info==0_${ik}$ ) then
              if( notran ) then
                 if( ijob==1_${ik}$ .or. ijob==2_${ik}$ ) then
                    lwmin = max( 1_${ik}$, 2_${ik}$*m*n )
                 else
                    lwmin = 1_${ik}$
                 end if
              else
                 lwmin = 1_${ik}$
              end if
              work( 1_${ik}$ ) = lwmin
              if( lwork<lwmin .and. .not.lquery ) then
                 info = -20_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTGSYL', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              scale = 1_${ik}$
              if( notran ) then
                 if( ijob/=0_${ik}$ ) then
                    dif = 0_${ik}$
                 end if
              end if
              return
           end if
           ! determine optimal block sizes mb and nb
           mb = stdlib${ii}$_ilaenv( 2_${ik}$, 'DTGSYL', trans, m, n, -1_${ik}$, -1_${ik}$ )
           nb = stdlib${ii}$_ilaenv( 5_${ik}$, 'DTGSYL', trans, m, n, -1_${ik}$, -1_${ik}$ )
           isolve = 1_${ik}$
           ifunc = 0_${ik}$
           if( notran ) then
              if( ijob>=3_${ik}$ ) then
                 ifunc = ijob - 2_${ik}$
                 call stdlib${ii}$_${ri}$laset( 'F', m, n, zero, zero, c, ldc )
                 call stdlib${ii}$_${ri}$laset( 'F', m, n, zero, zero, f, ldf )
              else if( ijob>=1_${ik}$ ) then
                 isolve = 2_${ik}$
              end if
           end if
           if( ( mb<=1_${ik}$ .and. nb<=1_${ik}$ ) .or. ( mb>=m .and. nb>=n ) )then
              loop_30: do iround = 1, isolve
                 ! use unblocked level 2 solver
                 dscale = zero
                 dsum = one
                 pq = 0_${ik}$
                 call stdlib${ii}$_${ri}$tgsy2( trans, ifunc, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f,&
                            ldf, scale, dsum, dscale,iwork, pq, info )
                 if( dscale/=zero ) then
                    if( ijob==1_${ik}$ .or. ijob==3_${ik}$ ) then
                       dif = sqrt( real( 2_${ik}$*m*n,KIND=${rk}$) ) / ( dscale*sqrt( dsum ) )
                    else
                       dif = sqrt( real( pq,KIND=${rk}$) ) / ( dscale*sqrt( dsum ) )
                    end if
                 end if
                 if( isolve==2_${ik}$ .and. iround==1_${ik}$ ) then
                    if( notran ) then
                       ifunc = ijob
                    end if
                    scale2 = scale
                    call stdlib${ii}$_${ri}$lacpy( 'F', m, n, c, ldc, work, m )
                    call stdlib${ii}$_${ri}$lacpy( 'F', m, n, f, ldf, work( m*n+1 ), m )
                    call stdlib${ii}$_${ri}$laset( 'F', m, n, zero, zero, c, ldc )
                    call stdlib${ii}$_${ri}$laset( 'F', m, n, zero, zero, f, ldf )
                 else if( isolve==2_${ik}$ .and. iround==2_${ik}$ ) then
                    call stdlib${ii}$_${ri}$lacpy( 'F', m, n, work, m, c, ldc )
                    call stdlib${ii}$_${ri}$lacpy( 'F', m, n, work( m*n+1 ), m, f, ldf )
                    scale = scale2
                 end if
              end do loop_30
              return
           end if
           ! determine block structure of a
           p = 0_${ik}$
           i = 1_${ik}$
           40 continue
           if( i>m )go to 50
           p = p + 1_${ik}$
           iwork( p ) = i
           i = i + mb
           if( i>=m )go to 50
           if( a( i, i-1 )/=zero )i = i + 1_${ik}$
           go to 40
           50 continue
           iwork( p+1 ) = m + 1_${ik}$
           if( iwork( p )==iwork( p+1 ) )p = p - 1_${ik}$
           ! determine block structure of b
           q = p + 1_${ik}$
           j = 1_${ik}$
           60 continue
           if( j>n )go to 70
           q = q + 1_${ik}$
           iwork( q ) = j
           j = j + nb
           if( j>=n )go to 70
           if( b( j, j-1 )/=zero )j = j + 1_${ik}$
           go to 60
           70 continue
           iwork( q+1 ) = n + 1_${ik}$
           if( iwork( q )==iwork( q+1 ) )q = q - 1_${ik}$
           if( notran ) then
              loop_150: do iround = 1, isolve
                 ! solve (i, j)-subsystem
                     ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j)
                     ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j)
                 ! for i = p, p - 1,..., 1; j = 1, 2,..., q
                 dscale = zero
                 dsum = one
                 pq = 0_${ik}$
                 scale = one
                 loop_130: do j = p + 2, q
                    js = iwork( j )
                    je = iwork( j+1 ) - 1_${ik}$
                    nb = je - js + 1_${ik}$
                    loop_120: do i = p, 1, -1
                       is = iwork( i )
                       ie = iwork( i+1 ) - 1_${ik}$
                       mb = ie - is + 1_${ik}$
                       ppqq = 0_${ik}$
                       call stdlib${ii}$_${ri}$tgsy2( trans, ifunc, mb, nb, a( is, is ), lda,b( js, js ), &
                       ldb, c( is, js ), ldc,d( is, is ), ldd, e( js, js ), lde,f( is, js ), ldf, &
                                 scaloc, dsum, dscale,iwork( q+2 ), ppqq, linfo )
                       if( linfo>0_${ik}$ )info = linfo
                       pq = pq + ppqq
                       if( scaloc/=one ) then
                          do k = 1, js - 1
                             call stdlib${ii}$_${ri}$scal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_${ri}$scal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          do k = js, je
                             call stdlib${ii}$_${ri}$scal( is-1, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_${ri}$scal( is-1, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          do k = js, je
                             call stdlib${ii}$_${ri}$scal( m-ie, scaloc, c( ie+1, k ), 1_${ik}$ )
                             call stdlib${ii}$_${ri}$scal( m-ie, scaloc, f( ie+1, k ), 1_${ik}$ )
                          end do
                          do k = je + 1, n
                             call stdlib${ii}$_${ri}$scal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_${ri}$scal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          scale = scale*scaloc
                       end if
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( i>1_${ik}$ ) then
                          call stdlib${ii}$_${ri}$gemm( 'N', 'N', is-1, nb, mb, -one,a( 1_${ik}$, is ), lda, c( is, &
                                    js ), ldc, one,c( 1_${ik}$, js ), ldc )
                          call stdlib${ii}$_${ri}$gemm( 'N', 'N', is-1, nb, mb, -one,d( 1_${ik}$, is ), ldd, c( is, &
                                    js ), ldc, one,f( 1_${ik}$, js ), ldf )
                       end if
                       if( j<q ) then
                          call stdlib${ii}$_${ri}$gemm( 'N', 'N', mb, n-je, nb, one,f( is, js ), ldf, b( js, &
                                    je+1 ), ldb,one, c( is, je+1 ), ldc )
                          call stdlib${ii}$_${ri}$gemm( 'N', 'N', mb, n-je, nb, one,f( is, js ), ldf, e( js, &
                                    je+1 ), lde,one, f( is, je+1 ), ldf )
                       end if
                    end do loop_120
                 end do loop_130
                 if( dscale/=zero ) then
                    if( ijob==1_${ik}$ .or. ijob==3_${ik}$ ) then
                       dif = sqrt( real( 2_${ik}$*m*n,KIND=${rk}$) ) / ( dscale*sqrt( dsum ) )
                    else
                       dif = sqrt( real( pq,KIND=${rk}$) ) / ( dscale*sqrt( dsum ) )
                    end if
                 end if
                 if( isolve==2_${ik}$ .and. iround==1_${ik}$ ) then
                    if( notran ) then
                       ifunc = ijob
                    end if
                    scale2 = scale
                    call stdlib${ii}$_${ri}$lacpy( 'F', m, n, c, ldc, work, m )
                    call stdlib${ii}$_${ri}$lacpy( 'F', m, n, f, ldf, work( m*n+1 ), m )
                    call stdlib${ii}$_${ri}$laset( 'F', m, n, zero, zero, c, ldc )
                    call stdlib${ii}$_${ri}$laset( 'F', m, n, zero, zero, f, ldf )
                 else if( isolve==2_${ik}$ .and. iround==2_${ik}$ ) then
                    call stdlib${ii}$_${ri}$lacpy( 'F', m, n, work, m, c, ldc )
                    call stdlib${ii}$_${ri}$lacpy( 'F', m, n, work( m*n+1 ), m, f, ldf )
                    scale = scale2
                 end if
              end do loop_150
           else
              ! solve transposed (i, j)-subsystem
                   ! a(i, i)**t * r(i, j)  + d(i, i)**t * l(i, j)  =  c(i, j)
                   ! r(i, j)  * b(j, j)**t + l(i, j)  * e(j, j)**t = -f(i, j)
              ! for i = 1,2,..., p; j = q, q-1,..., 1
              scale = one
              loop_210: do i = 1, p
                 is = iwork( i )
                 ie = iwork( i+1 ) - 1_${ik}$
                 mb = ie - is + 1_${ik}$
                 loop_200: do j = q, p + 2, -1
                    js = iwork( j )
                    je = iwork( j+1 ) - 1_${ik}$
                    nb = je - js + 1_${ik}$
                    call stdlib${ii}$_${ri}$tgsy2( trans, ifunc, mb, nb, a( is, is ), lda,b( js, js ), ldb, &
                    c( is, js ), ldc,d( is, is ), ldd, e( js, js ), lde,f( is, js ), ldf, scaloc, &
                              dsum, dscale,iwork( q+2 ), ppqq, linfo )
                    if( linfo>0_${ik}$ )info = linfo
                    if( scaloc/=one ) then
                       do k = 1, js - 1
                          call stdlib${ii}$_${ri}$scal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                          call stdlib${ii}$_${ri}$scal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                       end do
                       do k = js, je
                          call stdlib${ii}$_${ri}$scal( is-1, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                          call stdlib${ii}$_${ri}$scal( is-1, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                       end do
                       do k = js, je
                          call stdlib${ii}$_${ri}$scal( m-ie, scaloc, c( ie+1, k ), 1_${ik}$ )
                          call stdlib${ii}$_${ri}$scal( m-ie, scaloc, f( ie+1, k ), 1_${ik}$ )
                       end do
                       do k = je + 1, n
                          call stdlib${ii}$_${ri}$scal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                          call stdlib${ii}$_${ri}$scal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                       end do
                       scale = scale*scaloc
                    end if
                    ! substitute r(i, j) and l(i, j) into remaining equation.
                    if( j>p+2 ) then
                       call stdlib${ii}$_${ri}$gemm( 'N', 'T', mb, js-1, nb, one, c( is, js ),ldc, b( 1_${ik}$, js )&
                                 , ldb, one, f( is, 1_${ik}$ ),ldf )
                       call stdlib${ii}$_${ri}$gemm( 'N', 'T', mb, js-1, nb, one, f( is, js ),ldf, e( 1_${ik}$, js )&
                                 , lde, one, f( is, 1_${ik}$ ),ldf )
                    end if
                    if( i<p ) then
                       call stdlib${ii}$_${ri}$gemm( 'T', 'N', m-ie, nb, mb, -one,a( is, ie+1 ), lda, c( is, &
                                 js ), ldc, one,c( ie+1, js ), ldc )
                       call stdlib${ii}$_${ri}$gemm( 'T', 'N', m-ie, nb, mb, -one,d( is, ie+1 ), ldd, f( is, &
                                 js ), ldf, one,c( ie+1, js ), ldc )
                    end if
                 end do loop_200
              end do loop_210
           end if
           work( 1_${ik}$ ) = lwmin
           return
     end subroutine stdlib${ii}$_${ri}$tgsyl

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_ctgsyl( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, &
     !! CTGSYL solves the generalized Sylvester equation:
     !! A * R - L * B = scale * C            (1)
     !! D * R - L * E = scale * F
     !! where R and L are unknown m-by-n matrices, (A, D), (B, E) and
     !! (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n,
     !! respectively, with complex entries. A, B, D and E are upper
     !! triangular (i.e., (A,D) and (B,E) in generalized Schur form).
     !! The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1
     !! is an output scaling factor chosen to avoid overflow.
     !! In matrix notation (1) is equivalent to solve Zx = scale*b, where Z
     !! is defined as
     !! Z = [ kron(In, A)  -kron(B**H, Im) ]        (2)
     !! [ kron(In, D)  -kron(E**H, Im) ],
     !! Here Ix is the identity matrix of size x and X**H is the conjugate
     !! transpose of X. Kron(X, Y) is the Kronecker product between the
     !! matrices X and Y.
     !! If TRANS = 'C', y in the conjugate transposed system Z**H *y = scale*b
     !! is solved for, which is equivalent to solve for R and L in
     !! A**H * R + D**H * L = scale * C           (3)
     !! R * B**H + L * E**H = scale * -F
     !! This case (TRANS = 'C') is used to compute an one-norm-based estimate
     !! of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D)
     !! and (B,E), using CLACON.
     !! If IJOB >= 1, CTGSYL computes a Frobenius norm-based estimate of
     !! Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the
     !! reciprocal of the smallest singular value of Z.
     !! This is a level-3 BLAS algorithm.
               ldf, scale, dif, work, lwork,iwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, lwork, m, n
           integer(${ik}$), intent(out) :: info
           real(sp), intent(out) :: dif, scale
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           complex(sp), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*)
           complex(sp), intent(inout) :: c(ldc,*), f(ldf,*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
        ! replaced various illegal calls to stdlib${ii}$_ccopy by calls to stdlib${ii}$_claset.
        ! sven hammarling, 1/5/02.
           
           
           ! Local Scalars 
           logical(lk) :: lquery, notran
           integer(${ik}$) :: i, ie, ifunc, iround, is, isolve, j, je, js, k, linfo, lwmin, mb, nb, &
                     p, pq, q
           real(sp) :: dscale, dsum, scale2, scaloc
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test input parameters
           info = 0_${ik}$
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -1_${ik}$
           else if( notran ) then
              if( ( ijob<0_${ik}$ ) .or. ( ijob>4_${ik}$ ) ) then
                 info = -2_${ik}$
              end if
           end if
           if( info==0_${ik}$ ) then
              if( m<=0_${ik}$ ) then
                 info = -3_${ik}$
              else if( n<=0_${ik}$ ) then
                 info = -4_${ik}$
              else if( lda<max( 1_${ik}$, m ) ) then
                 info = -6_${ik}$
              else if( ldb<max( 1_${ik}$, n ) ) then
                 info = -8_${ik}$
              else if( ldc<max( 1_${ik}$, m ) ) then
                 info = -10_${ik}$
              else if( ldd<max( 1_${ik}$, m ) ) then
                 info = -12_${ik}$
              else if( lde<max( 1_${ik}$, n ) ) then
                 info = -14_${ik}$
              else if( ldf<max( 1_${ik}$, m ) ) then
                 info = -16_${ik}$
              end if
           end if
           if( info==0_${ik}$ ) then
              if( notran ) then
                 if( ijob==1_${ik}$ .or. ijob==2_${ik}$ ) then
                    lwmin = max( 1_${ik}$, 2_${ik}$*m*n )
                 else
                    lwmin = 1_${ik}$
                 end if
              else
                 lwmin = 1_${ik}$
              end if
              work( 1_${ik}$ ) = lwmin
              if( lwork<lwmin .and. .not.lquery ) then
                 info = -20_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CTGSYL', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              scale = 1_${ik}$
              if( notran ) then
                 if( ijob/=0_${ik}$ ) then
                    dif = 0_${ik}$
                 end if
              end if
              return
           end if
           ! determine  optimal block sizes mb and nb
           mb = stdlib${ii}$_ilaenv( 2_${ik}$, 'CTGSYL', trans, m, n, -1_${ik}$, -1_${ik}$ )
           nb = stdlib${ii}$_ilaenv( 5_${ik}$, 'CTGSYL', trans, m, n, -1_${ik}$, -1_${ik}$ )
           isolve = 1_${ik}$
           ifunc = 0_${ik}$
           if( notran ) then
              if( ijob>=3_${ik}$ ) then
                 ifunc = ijob - 2_${ik}$
                 call stdlib${ii}$_claset( 'F', m, n, czero, czero, c, ldc )
                 call stdlib${ii}$_claset( 'F', m, n, czero, czero, f, ldf )
              else if( ijob>=1_${ik}$ .and. notran ) then
                 isolve = 2_${ik}$
              end if
           end if
           if( ( mb<=1_${ik}$ .and. nb<=1_${ik}$ ) .or. ( mb>=m .and. nb>=n ) )then
              ! use unblocked level 2 solver
              loop_30: do iround = 1, isolve
                 scale = one
                 dscale = zero
                 dsum = one
                 pq = m*n
                 call stdlib${ii}$_ctgsy2( trans, ifunc, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f,&
                            ldf, scale, dsum, dscale,info )
                 if( dscale/=zero ) then
                    if( ijob==1_${ik}$ .or. ijob==3_${ik}$ ) then
                       dif = sqrt( real( 2_${ik}$*m*n,KIND=sp) ) / ( dscale*sqrt( dsum ) )
                    else
                       dif = sqrt( real( pq,KIND=sp) ) / ( dscale*sqrt( dsum ) )
                    end if
                 end if
                 if( isolve==2_${ik}$ .and. iround==1_${ik}$ ) then
                    if( notran ) then
                       ifunc = ijob
                    end if
                    scale2 = scale
                    call stdlib${ii}$_clacpy( 'F', m, n, c, ldc, work, m )
                    call stdlib${ii}$_clacpy( 'F', m, n, f, ldf, work( m*n+1 ), m )
                    call stdlib${ii}$_claset( 'F', m, n, czero, czero, c, ldc )
                    call stdlib${ii}$_claset( 'F', m, n, czero, czero, f, ldf )
                 else if( isolve==2_${ik}$ .and. iround==2_${ik}$ ) then
                    call stdlib${ii}$_clacpy( 'F', m, n, work, m, c, ldc )
                    call stdlib${ii}$_clacpy( 'F', m, n, work( m*n+1 ), m, f, ldf )
                    scale = scale2
                 end if
              end do loop_30
              return
           end if
           ! determine block structure of a
           p = 0_${ik}$
           i = 1_${ik}$
           40 continue
           if( i>m )go to 50
           p = p + 1_${ik}$
           iwork( p ) = i
           i = i + mb
           if( i>=m )go to 50
           go to 40
           50 continue
           iwork( p+1 ) = m + 1_${ik}$
           if( iwork( p )==iwork( p+1 ) )p = p - 1_${ik}$
           ! determine block structure of b
           q = p + 1_${ik}$
           j = 1_${ik}$
           60 continue
           if( j>n )go to 70
           q = q + 1_${ik}$
           iwork( q ) = j
           j = j + nb
           if( j>=n )go to 70
           go to 60
           70 continue
           iwork( q+1 ) = n + 1_${ik}$
           if( iwork( q )==iwork( q+1 ) )q = q - 1_${ik}$
           if( notran ) then
              loop_150: do iround = 1, isolve
                 ! solve (i, j) - subsystem
                     ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j)
                     ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j)
                 ! for i = p, p - 1, ..., 1; j = 1, 2, ..., q
                 pq = 0_${ik}$
                 scale = one
                 dscale = zero
                 dsum = one
                 loop_130: do j = p + 2, q
                    js = iwork( j )
                    je = iwork( j+1 ) - 1_${ik}$
                    nb = je - js + 1_${ik}$
                    loop_120: do i = p, 1, -1
                       is = iwork( i )
                       ie = iwork( i+1 ) - 1_${ik}$
                       mb = ie - is + 1_${ik}$
                       call stdlib${ii}$_ctgsy2( trans, ifunc, mb, nb, a( is, is ), lda,b( js, js ), &
                       ldb, c( is, js ), ldc,d( is, is ), ldd, e( js, js ), lde,f( is, js ), ldf, &
                                 scaloc, dsum, dscale,linfo )
                       if( linfo>0_${ik}$ )info = linfo
                       pq = pq + mb*nb
                       if( scaloc/=one ) then
                          do k = 1, js - 1
                             call stdlib${ii}$_cscal( m, cmplx( scaloc, zero,KIND=sp), c( 1_${ik}$, k ),1_${ik}$ )
                                       
                             call stdlib${ii}$_cscal( m, cmplx( scaloc, zero,KIND=sp), f( 1_${ik}$, k ),1_${ik}$ )
                                       
                          end do
                          do k = js, je
                             call stdlib${ii}$_cscal( is-1, cmplx( scaloc, zero,KIND=sp),c( 1_${ik}$, k ), 1_${ik}$ )
                                       
                             call stdlib${ii}$_cscal( is-1, cmplx( scaloc, zero,KIND=sp),f( 1_${ik}$, k ), 1_${ik}$ )
                                       
                          end do
                          do k = js, je
                             call stdlib${ii}$_cscal( m-ie, cmplx( scaloc, zero,KIND=sp),c( ie+1, k ), &
                                       1_${ik}$ )
                             call stdlib${ii}$_cscal( m-ie, cmplx( scaloc, zero,KIND=sp),f( ie+1, k ), &
                                       1_${ik}$ )
                          end do
                          do k = je + 1, n
                             call stdlib${ii}$_cscal( m, cmplx( scaloc, zero,KIND=sp), c( 1_${ik}$, k ),1_${ik}$ )
                                       
                             call stdlib${ii}$_cscal( m, cmplx( scaloc, zero,KIND=sp), f( 1_${ik}$, k ),1_${ik}$ )
                                       
                          end do
                          scale = scale*scaloc
                       end if
                       ! substitute r(i,j) and l(i,j) into remaining equation.
                       if( i>1_${ik}$ ) then
                          call stdlib${ii}$_cgemm( 'N', 'N', is-1, nb, mb,cmplx( -one, zero,KIND=sp), a(&
                           1_${ik}$, is ), lda,c( is, js ), ldc, cmplx( one, zero,KIND=sp),c( 1_${ik}$, js ), &
                                     ldc )
                          call stdlib${ii}$_cgemm( 'N', 'N', is-1, nb, mb,cmplx( -one, zero,KIND=sp), d(&
                           1_${ik}$, is ), ldd,c( is, js ), ldc, cmplx( one, zero,KIND=sp),f( 1_${ik}$, js ), &
                                     ldf )
                       end if
                       if( j<q ) then
                          call stdlib${ii}$_cgemm( 'N', 'N', mb, n-je, nb,cmplx( one, zero,KIND=sp), f( &
                          is, js ), ldf,b( js, je+1 ), ldb, cmplx( one, zero,KIND=sp),c( is, je+1 &
                                    ), ldc )
                          call stdlib${ii}$_cgemm( 'N', 'N', mb, n-je, nb,cmplx( one, zero,KIND=sp), f( &
                          is, js ), ldf,e( js, je+1 ), lde, cmplx( one, zero,KIND=sp),f( is, je+1 &
                                    ), ldf )
                       end if
                    end do loop_120
                 end do loop_130
                 if( dscale/=zero ) then
                    if( ijob==1_${ik}$ .or. ijob==3_${ik}$ ) then
                       dif = sqrt( real( 2_${ik}$*m*n,KIND=sp) ) / ( dscale*sqrt( dsum ) )
                    else
                       dif = sqrt( real( pq,KIND=sp) ) / ( dscale*sqrt( dsum ) )
                    end if
                 end if
                 if( isolve==2_${ik}$ .and. iround==1_${ik}$ ) then
                    if( notran ) then
                       ifunc = ijob
                    end if
                    scale2 = scale
                    call stdlib${ii}$_clacpy( 'F', m, n, c, ldc, work, m )
                    call stdlib${ii}$_clacpy( 'F', m, n, f, ldf, work( m*n+1 ), m )
                    call stdlib${ii}$_claset( 'F', m, n, czero, czero, c, ldc )
                    call stdlib${ii}$_claset( 'F', m, n, czero, czero, f, ldf )
                 else if( isolve==2_${ik}$ .and. iround==2_${ik}$ ) then
                    call stdlib${ii}$_clacpy( 'F', m, n, work, m, c, ldc )
                    call stdlib${ii}$_clacpy( 'F', m, n, work( m*n+1 ), m, f, ldf )
                    scale = scale2
                 end if
              end do loop_150
           else
              ! solve transposed (i, j)-subsystem
                  ! a(i, i)**h * r(i, j) + d(i, i)**h * l(i, j) = c(i, j)
                  ! r(i, j) * b(j, j)  + l(i, j) * e(j, j) = -f(i, j)
              ! for i = 1,2,..., p; j = q, q-1,..., 1
              scale = one
              loop_210: do i = 1, p
                 is = iwork( i )
                 ie = iwork( i+1 ) - 1_${ik}$
                 mb = ie - is + 1_${ik}$
                 loop_200: do j = q, p + 2, -1
                    js = iwork( j )
                    je = iwork( j+1 ) - 1_${ik}$
                    nb = je - js + 1_${ik}$
                    call stdlib${ii}$_ctgsy2( trans, ifunc, mb, nb, a( is, is ), lda,b( js, js ), ldb, &
                    c( is, js ), ldc,d( is, is ), ldd, e( js, js ), lde,f( is, js ), ldf, scaloc, &
                              dsum, dscale,linfo )
                    if( linfo>0_${ik}$ )info = linfo
                    if( scaloc/=one ) then
                       do k = 1, js - 1
                          call stdlib${ii}$_cscal( m, cmplx( scaloc, zero,KIND=sp), c( 1_${ik}$, k ),1_${ik}$ )
                                    
                          call stdlib${ii}$_cscal( m, cmplx( scaloc, zero,KIND=sp), f( 1_${ik}$, k ),1_${ik}$ )
                                    
                       end do
                       do k = js, je
                          call stdlib${ii}$_cscal( is-1, cmplx( scaloc, zero,KIND=sp), c( 1_${ik}$, k ),1_${ik}$ )
                                    
                          call stdlib${ii}$_cscal( is-1, cmplx( scaloc, zero,KIND=sp), f( 1_${ik}$, k ),1_${ik}$ )
                                    
                       end do
                       do k = js, je
                          call stdlib${ii}$_cscal( m-ie, cmplx( scaloc, zero,KIND=sp),c( ie+1, k ), 1_${ik}$ )
                                    
                          call stdlib${ii}$_cscal( m-ie, cmplx( scaloc, zero,KIND=sp),f( ie+1, k ), 1_${ik}$ )
                                    
                       end do
                       do k = je + 1, n
                          call stdlib${ii}$_cscal( m, cmplx( scaloc, zero,KIND=sp), c( 1_${ik}$, k ),1_${ik}$ )
                                    
                          call stdlib${ii}$_cscal( m, cmplx( scaloc, zero,KIND=sp), f( 1_${ik}$, k ),1_${ik}$ )
                                    
                       end do
                       scale = scale*scaloc
                    end if
                    ! substitute r(i,j) and l(i,j) into remaining equation.
                    if( j>p+2 ) then
                       call stdlib${ii}$_cgemm( 'N', 'C', mb, js-1, nb,cmplx( one, zero,KIND=sp), c( is,&
                        js ), ldc,b( 1_${ik}$, js ), ldb, cmplx( one, zero,KIND=sp),f( is, 1_${ik}$ ), ldf )
                                  
                       call stdlib${ii}$_cgemm( 'N', 'C', mb, js-1, nb,cmplx( one, zero,KIND=sp), f( is,&
                        js ), ldf,e( 1_${ik}$, js ), lde, cmplx( one, zero,KIND=sp),f( is, 1_${ik}$ ), ldf )
                                  
                    end if
                    if( i<p ) then
                       call stdlib${ii}$_cgemm( 'C', 'N', m-ie, nb, mb,cmplx( -one, zero,KIND=sp), a( &
                       is, ie+1 ), lda,c( is, js ), ldc, cmplx( one, zero,KIND=sp),c( ie+1, js ), &
                                 ldc )
                       call stdlib${ii}$_cgemm( 'C', 'N', m-ie, nb, mb,cmplx( -one, zero,KIND=sp), d( &
                       is, ie+1 ), ldd,f( is, js ), ldf, cmplx( one, zero,KIND=sp),c( ie+1, js ), &
                                 ldc )
                    end if
                 end do loop_200
              end do loop_210
           end if
           work( 1_${ik}$ ) = lwmin
           return
     end subroutine stdlib${ii}$_ctgsyl

     pure module subroutine stdlib${ii}$_ztgsyl( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, &
     !! ZTGSYL solves the generalized Sylvester equation:
     !! A * R - L * B = scale * C            (1)
     !! D * R - L * E = scale * F
     !! where R and L are unknown m-by-n matrices, (A, D), (B, E) and
     !! (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n,
     !! respectively, with complex entries. A, B, D and E are upper
     !! triangular (i.e., (A,D) and (B,E) in generalized Schur form).
     !! The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1
     !! is an output scaling factor chosen to avoid overflow.
     !! In matrix notation (1) is equivalent to solve Zx = scale*b, where Z
     !! is defined as
     !! Z = [ kron(In, A)  -kron(B**H, Im) ]        (2)
     !! [ kron(In, D)  -kron(E**H, Im) ],
     !! Here Ix is the identity matrix of size x and X**H is the conjugate
     !! transpose of X. Kron(X, Y) is the Kronecker product between the
     !! matrices X and Y.
     !! If TRANS = 'C', y in the conjugate transposed system Z**H *y = scale*b
     !! is solved for, which is equivalent to solve for R and L in
     !! A**H * R + D**H * L = scale * C           (3)
     !! R * B**H + L * E**H = scale * -F
     !! This case (TRANS = 'C') is used to compute an one-norm-based estimate
     !! of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D)
     !! and (B,E), using ZLACON.
     !! If IJOB >= 1, ZTGSYL computes a Frobenius norm-based estimate of
     !! Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the
     !! reciprocal of the smallest singular value of Z.
     !! This is a level-3 BLAS algorithm.
               ldf, scale, dif, work, lwork,iwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, lwork, m, n
           integer(${ik}$), intent(out) :: info
           real(dp), intent(out) :: dif, scale
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           complex(dp), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*)
           complex(dp), intent(inout) :: c(ldc,*), f(ldf,*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
        ! replaced various illegal calls to stdlib${ii}$_ccopy by calls to stdlib${ii}$_claset.
        ! sven hammarling, 1/5/02.
           
           
           ! Local Scalars 
           logical(lk) :: lquery, notran
           integer(${ik}$) :: i, ie, ifunc, iround, is, isolve, j, je, js, k, linfo, lwmin, mb, nb, &
                     p, pq, q
           real(dp) :: dscale, dsum, scale2, scaloc
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test input parameters
           info = 0_${ik}$
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -1_${ik}$
           else if( notran ) then
              if( ( ijob<0_${ik}$ ) .or. ( ijob>4_${ik}$ ) ) then
                 info = -2_${ik}$
              end if
           end if
           if( info==0_${ik}$ ) then
              if( m<=0_${ik}$ ) then
                 info = -3_${ik}$
              else if( n<=0_${ik}$ ) then
                 info = -4_${ik}$
              else if( lda<max( 1_${ik}$, m ) ) then
                 info = -6_${ik}$
              else if( ldb<max( 1_${ik}$, n ) ) then
                 info = -8_${ik}$
              else if( ldc<max( 1_${ik}$, m ) ) then
                 info = -10_${ik}$
              else if( ldd<max( 1_${ik}$, m ) ) then
                 info = -12_${ik}$
              else if( lde<max( 1_${ik}$, n ) ) then
                 info = -14_${ik}$
              else if( ldf<max( 1_${ik}$, m ) ) then
                 info = -16_${ik}$
              end if
           end if
           if( info==0_${ik}$ ) then
              if( notran ) then
                 if( ijob==1_${ik}$ .or. ijob==2_${ik}$ ) then
                    lwmin = max( 1_${ik}$, 2_${ik}$*m*n )
                 else
                    lwmin = 1_${ik}$
                 end if
              else
                 lwmin = 1_${ik}$
              end if
              work( 1_${ik}$ ) = lwmin
              if( lwork<lwmin .and. .not.lquery ) then
                 info = -20_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTGSYL', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              scale = 1_${ik}$
              if( notran ) then
                 if( ijob/=0_${ik}$ ) then
                    dif = 0_${ik}$
                 end if
              end if
              return
           end if
           ! determine  optimal block sizes mb and nb
           mb = stdlib${ii}$_ilaenv( 2_${ik}$, 'ZTGSYL', trans, m, n, -1_${ik}$, -1_${ik}$ )
           nb = stdlib${ii}$_ilaenv( 5_${ik}$, 'ZTGSYL', trans, m, n, -1_${ik}$, -1_${ik}$ )
           isolve = 1_${ik}$
           ifunc = 0_${ik}$
           if( notran ) then
              if( ijob>=3_${ik}$ ) then
                 ifunc = ijob - 2_${ik}$
                 call stdlib${ii}$_zlaset( 'F', m, n, czero, czero, c, ldc )
                 call stdlib${ii}$_zlaset( 'F', m, n, czero, czero, f, ldf )
              else if( ijob>=1_${ik}$ .and. notran ) then
                 isolve = 2_${ik}$
              end if
           end if
           if( ( mb<=1_${ik}$ .and. nb<=1_${ik}$ ) .or. ( mb>=m .and. nb>=n ) )then
              ! use unblocked level 2 solver
              loop_30: do iround = 1, isolve
                 scale = one
                 dscale = zero
                 dsum = one
                 pq = m*n
                 call stdlib${ii}$_ztgsy2( trans, ifunc, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f,&
                            ldf, scale, dsum, dscale,info )
                 if( dscale/=zero ) then
                    if( ijob==1_${ik}$ .or. ijob==3_${ik}$ ) then
                       dif = sqrt( real( 2_${ik}$*m*n,KIND=dp) ) / ( dscale*sqrt( dsum ) )
                    else
                       dif = sqrt( real( pq,KIND=dp) ) / ( dscale*sqrt( dsum ) )
                    end if
                 end if
                 if( isolve==2_${ik}$ .and. iround==1_${ik}$ ) then
                    if( notran ) then
                       ifunc = ijob
                    end if
                    scale2 = scale
                    call stdlib${ii}$_zlacpy( 'F', m, n, c, ldc, work, m )
                    call stdlib${ii}$_zlacpy( 'F', m, n, f, ldf, work( m*n+1 ), m )
                    call stdlib${ii}$_zlaset( 'F', m, n, czero, czero, c, ldc )
                    call stdlib${ii}$_zlaset( 'F', m, n, czero, czero, f, ldf )
                 else if( isolve==2_${ik}$ .and. iround==2_${ik}$ ) then
                    call stdlib${ii}$_zlacpy( 'F', m, n, work, m, c, ldc )
                    call stdlib${ii}$_zlacpy( 'F', m, n, work( m*n+1 ), m, f, ldf )
                    scale = scale2
                 end if
              end do loop_30
              return
           end if
           ! determine block structure of a
           p = 0_${ik}$
           i = 1_${ik}$
           40 continue
           if( i>m )go to 50
           p = p + 1_${ik}$
           iwork( p ) = i
           i = i + mb
           if( i>=m )go to 50
           go to 40
           50 continue
           iwork( p+1 ) = m + 1_${ik}$
           if( iwork( p )==iwork( p+1 ) )p = p - 1_${ik}$
           ! determine block structure of b
           q = p + 1_${ik}$
           j = 1_${ik}$
           60 continue
           if( j>n )go to 70
           q = q + 1_${ik}$
           iwork( q ) = j
           j = j + nb
           if( j>=n )go to 70
           go to 60
           70 continue
           iwork( q+1 ) = n + 1_${ik}$
           if( iwork( q )==iwork( q+1 ) )q = q - 1_${ik}$
           if( notran ) then
              loop_150: do iround = 1, isolve
                 ! solve (i, j) - subsystem
                     ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j)
                     ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j)
                 ! for i = p, p - 1, ..., 1; j = 1, 2, ..., q
                 pq = 0_${ik}$
                 scale = one
                 dscale = zero
                 dsum = one
                 loop_130: do j = p + 2, q
                    js = iwork( j )
                    je = iwork( j+1 ) - 1_${ik}$
                    nb = je - js + 1_${ik}$
                    loop_120: do i = p, 1, -1
                       is = iwork( i )
                       ie = iwork( i+1 ) - 1_${ik}$
                       mb = ie - is + 1_${ik}$
                       call stdlib${ii}$_ztgsy2( trans, ifunc, mb, nb, a( is, is ), lda,b( js, js ), &
                       ldb, c( is, js ), ldc,d( is, is ), ldd, e( js, js ), lde,f( is, js ), ldf, &
                                 scaloc, dsum, dscale,linfo )
                       if( linfo>0_${ik}$ )info = linfo
                       pq = pq + mb*nb
                       if( scaloc/=one ) then
                          do k = 1, js - 1
                             call stdlib${ii}$_zscal( m, cmplx( scaloc, zero,KIND=dp),c( 1_${ik}$, k ), 1_${ik}$ )
                                       
                             call stdlib${ii}$_zscal( m, cmplx( scaloc, zero,KIND=dp),f( 1_${ik}$, k ), 1_${ik}$ )
                                       
                          end do
                          do k = js, je
                             call stdlib${ii}$_zscal( is-1, cmplx( scaloc, zero,KIND=dp),c( 1_${ik}$, k ), 1_${ik}$ )
                                       
                             call stdlib${ii}$_zscal( is-1, cmplx( scaloc, zero,KIND=dp),f( 1_${ik}$, k ), 1_${ik}$ )
                                       
                          end do
                          do k = js, je
                             call stdlib${ii}$_zscal( m-ie, cmplx( scaloc, zero,KIND=dp),c( ie+1, k ), &
                                       1_${ik}$ )
                             call stdlib${ii}$_zscal( m-ie, cmplx( scaloc, zero,KIND=dp),f( ie+1, k ), &
                                       1_${ik}$ )
                          end do
                          do k = je + 1, n
                             call stdlib${ii}$_zscal( m, cmplx( scaloc, zero,KIND=dp),c( 1_${ik}$, k ), 1_${ik}$ )
                                       
                             call stdlib${ii}$_zscal( m, cmplx( scaloc, zero,KIND=dp),f( 1_${ik}$, k ), 1_${ik}$ )
                                       
                          end do
                          scale = scale*scaloc
                       end if
                       ! substitute r(i,j) and l(i,j) into remaining equation.
                       if( i>1_${ik}$ ) then
                          call stdlib${ii}$_zgemm( 'N', 'N', is-1, nb, mb,cmplx( -one, zero,KIND=dp), a(&
                           1_${ik}$, is ), lda,c( is, js ), ldc, cmplx( one, zero,KIND=dp),c( 1_${ik}$, js ), &
                                     ldc )
                          call stdlib${ii}$_zgemm( 'N', 'N', is-1, nb, mb,cmplx( -one, zero,KIND=dp), d(&
                           1_${ik}$, is ), ldd,c( is, js ), ldc, cmplx( one, zero,KIND=dp),f( 1_${ik}$, js ), &
                                     ldf )
                       end if
                       if( j<q ) then
                          call stdlib${ii}$_zgemm( 'N', 'N', mb, n-je, nb,cmplx( one, zero,KIND=dp), f( &
                          is, js ), ldf,b( js, je+1 ), ldb,cmplx( one, zero,KIND=dp), c( is, je+1 &
                                    ),ldc )
                          call stdlib${ii}$_zgemm( 'N', 'N', mb, n-je, nb,cmplx( one, zero,KIND=dp), f( &
                          is, js ), ldf,e( js, je+1 ), lde,cmplx( one, zero,KIND=dp), f( is, je+1 &
                                    ),ldf )
                       end if
                    end do loop_120
                 end do loop_130
                 if( dscale/=zero ) then
                    if( ijob==1_${ik}$ .or. ijob==3_${ik}$ ) then
                       dif = sqrt( real( 2_${ik}$*m*n,KIND=dp) ) / ( dscale*sqrt( dsum ) )
                    else
                       dif = sqrt( real( pq,KIND=dp) ) / ( dscale*sqrt( dsum ) )
                    end if
                 end if
                 if( isolve==2_${ik}$ .and. iround==1_${ik}$ ) then
                    if( notran ) then
                       ifunc = ijob
                    end if
                    scale2 = scale
                    call stdlib${ii}$_zlacpy( 'F', m, n, c, ldc, work, m )
                    call stdlib${ii}$_zlacpy( 'F', m, n, f, ldf, work( m*n+1 ), m )
                    call stdlib${ii}$_zlaset( 'F', m, n, czero, czero, c, ldc )
                    call stdlib${ii}$_zlaset( 'F', m, n, czero, czero, f, ldf )
                 else if( isolve==2_${ik}$ .and. iround==2_${ik}$ ) then
                    call stdlib${ii}$_zlacpy( 'F', m, n, work, m, c, ldc )
                    call stdlib${ii}$_zlacpy( 'F', m, n, work( m*n+1 ), m, f, ldf )
                    scale = scale2
                 end if
              end do loop_150
           else
              ! solve transposed (i, j)-subsystem
                  ! a(i, i)**h * r(i, j) + d(i, i)**h * l(i, j) = c(i, j)
                  ! r(i, j) * b(j, j)  + l(i, j) * e(j, j) = -f(i, j)
              ! for i = 1,2,..., p; j = q, q-1,..., 1
              scale = one
              loop_210: do i = 1, p
                 is = iwork( i )
                 ie = iwork( i+1 ) - 1_${ik}$
                 mb = ie - is + 1_${ik}$
                 loop_200: do j = q, p + 2, -1
                    js = iwork( j )
                    je = iwork( j+1 ) - 1_${ik}$
                    nb = je - js + 1_${ik}$
                    call stdlib${ii}$_ztgsy2( trans, ifunc, mb, nb, a( is, is ), lda,b( js, js ), ldb, &
                    c( is, js ), ldc,d( is, is ), ldd, e( js, js ), lde,f( is, js ), ldf, scaloc, &
                              dsum, dscale,linfo )
                    if( linfo>0_${ik}$ )info = linfo
                    if( scaloc/=one ) then
                       do k = 1, js - 1
                          call stdlib${ii}$_zscal( m, cmplx( scaloc, zero,KIND=dp), c( 1_${ik}$, k ),1_${ik}$ )
                                    
                          call stdlib${ii}$_zscal( m, cmplx( scaloc, zero,KIND=dp), f( 1_${ik}$, k ),1_${ik}$ )
                                    
                       end do
                       do k = js, je
                          call stdlib${ii}$_zscal( is-1, cmplx( scaloc, zero,KIND=dp),c( 1_${ik}$, k ), 1_${ik}$ )
                                    
                          call stdlib${ii}$_zscal( is-1, cmplx( scaloc, zero,KIND=dp),f( 1_${ik}$, k ), 1_${ik}$ )
                                    
                       end do
                       do k = js, je
                          call stdlib${ii}$_zscal( m-ie, cmplx( scaloc, zero,KIND=dp),c( ie+1, k ), 1_${ik}$ )
                                    
                          call stdlib${ii}$_zscal( m-ie, cmplx( scaloc, zero,KIND=dp),f( ie+1, k ), 1_${ik}$ )
                                    
                       end do
                       do k = je + 1, n
                          call stdlib${ii}$_zscal( m, cmplx( scaloc, zero,KIND=dp), c( 1_${ik}$, k ),1_${ik}$ )
                                    
                          call stdlib${ii}$_zscal( m, cmplx( scaloc, zero,KIND=dp), f( 1_${ik}$, k ),1_${ik}$ )
                                    
                       end do
                       scale = scale*scaloc
                    end if
                    ! substitute r(i,j) and l(i,j) into remaining equation.
                    if( j>p+2 ) then
                       call stdlib${ii}$_zgemm( 'N', 'C', mb, js-1, nb,cmplx( one, zero,KIND=dp), c( is,&
                        js ), ldc,b( 1_${ik}$, js ), ldb, cmplx( one, zero,KIND=dp),f( is, 1_${ik}$ ), ldf )
                                  
                       call stdlib${ii}$_zgemm( 'N', 'C', mb, js-1, nb,cmplx( one, zero,KIND=dp), f( is,&
                        js ), ldf,e( 1_${ik}$, js ), lde, cmplx( one, zero,KIND=dp),f( is, 1_${ik}$ ), ldf )
                                  
                    end if
                    if( i<p ) then
                       call stdlib${ii}$_zgemm( 'C', 'N', m-ie, nb, mb,cmplx( -one, zero,KIND=dp), a( &
                       is, ie+1 ), lda,c( is, js ), ldc, cmplx( one, zero,KIND=dp),c( ie+1, js ), &
                                 ldc )
                       call stdlib${ii}$_zgemm( 'C', 'N', m-ie, nb, mb,cmplx( -one, zero,KIND=dp), d( &
                       is, ie+1 ), ldd,f( is, js ), ldf, cmplx( one, zero,KIND=dp),c( ie+1, js ), &
                                 ldc )
                    end if
                 end do loop_200
              end do loop_210
           end if
           work( 1_${ik}$ ) = lwmin
           return
     end subroutine stdlib${ii}$_ztgsyl

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$tgsyl( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, &
     !! ZTGSYL: solves the generalized Sylvester equation:
     !! A * R - L * B = scale * C            (1)
     !! D * R - L * E = scale * F
     !! where R and L are unknown m-by-n matrices, (A, D), (B, E) and
     !! (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n,
     !! respectively, with complex entries. A, B, D and E are upper
     !! triangular (i.e., (A,D) and (B,E) in generalized Schur form).
     !! The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1
     !! is an output scaling factor chosen to avoid overflow.
     !! In matrix notation (1) is equivalent to solve Zx = scale*b, where Z
     !! is defined as
     !! Z = [ kron(In, A)  -kron(B**H, Im) ]        (2)
     !! [ kron(In, D)  -kron(E**H, Im) ],
     !! Here Ix is the identity matrix of size x and X**H is the conjugate
     !! transpose of X. Kron(X, Y) is the Kronecker product between the
     !! matrices X and Y.
     !! If TRANS = 'C', y in the conjugate transposed system Z**H *y = scale*b
     !! is solved for, which is equivalent to solve for R and L in
     !! A**H * R + D**H * L = scale * C           (3)
     !! R * B**H + L * E**H = scale * -F
     !! This case (TRANS = 'C') is used to compute an one-norm-based estimate
     !! of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D)
     !! and (B,E), using ZLACON.
     !! If IJOB >= 1, ZTGSYL computes a Frobenius norm-based estimate of
     !! Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the
     !! reciprocal of the smallest singular value of Z.
     !! This is a level-3 BLAS algorithm.
               ldf, scale, dif, work, lwork,iwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, lwork, m, n
           integer(${ik}$), intent(out) :: info
           real(${ck}$), intent(out) :: dif, scale
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           complex(${ck}$), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*)
           complex(${ck}$), intent(inout) :: c(ldc,*), f(ldf,*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
        ! replaced various illegal calls to stdlib${ii}$_zcopy by calls to stdlib${ii}$_zlaset.
        ! sven hammarling, 1/5/02.
           
           
           ! Local Scalars 
           logical(lk) :: lquery, notran
           integer(${ik}$) :: i, ie, ifunc, iround, is, isolve, j, je, js, k, linfo, lwmin, mb, nb, &
                     p, pq, q
           real(${ck}$) :: dscale, dsum, scale2, scaloc
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test input parameters
           info = 0_${ik}$
           notran = stdlib_lsame( trans, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -1_${ik}$
           else if( notran ) then
              if( ( ijob<0_${ik}$ ) .or. ( ijob>4_${ik}$ ) ) then
                 info = -2_${ik}$
              end if
           end if
           if( info==0_${ik}$ ) then
              if( m<=0_${ik}$ ) then
                 info = -3_${ik}$
              else if( n<=0_${ik}$ ) then
                 info = -4_${ik}$
              else if( lda<max( 1_${ik}$, m ) ) then
                 info = -6_${ik}$
              else if( ldb<max( 1_${ik}$, n ) ) then
                 info = -8_${ik}$
              else if( ldc<max( 1_${ik}$, m ) ) then
                 info = -10_${ik}$
              else if( ldd<max( 1_${ik}$, m ) ) then
                 info = -12_${ik}$
              else if( lde<max( 1_${ik}$, n ) ) then
                 info = -14_${ik}$
              else if( ldf<max( 1_${ik}$, m ) ) then
                 info = -16_${ik}$
              end if
           end if
           if( info==0_${ik}$ ) then
              if( notran ) then
                 if( ijob==1_${ik}$ .or. ijob==2_${ik}$ ) then
                    lwmin = max( 1_${ik}$, 2_${ik}$*m*n )
                 else
                    lwmin = 1_${ik}$
                 end if
              else
                 lwmin = 1_${ik}$
              end if
              work( 1_${ik}$ ) = lwmin
              if( lwork<lwmin .and. .not.lquery ) then
                 info = -20_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTGSYL', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              scale = 1_${ik}$
              if( notran ) then
                 if( ijob/=0_${ik}$ ) then
                    dif = 0_${ik}$
                 end if
              end if
              return
           end if
           ! determine  optimal block sizes mb and nb
           mb = stdlib${ii}$_ilaenv( 2_${ik}$, 'ZTGSYL', trans, m, n, -1_${ik}$, -1_${ik}$ )
           nb = stdlib${ii}$_ilaenv( 5_${ik}$, 'ZTGSYL', trans, m, n, -1_${ik}$, -1_${ik}$ )
           isolve = 1_${ik}$
           ifunc = 0_${ik}$
           if( notran ) then
              if( ijob>=3_${ik}$ ) then
                 ifunc = ijob - 2_${ik}$
                 call stdlib${ii}$_${ci}$laset( 'F', m, n, czero, czero, c, ldc )
                 call stdlib${ii}$_${ci}$laset( 'F', m, n, czero, czero, f, ldf )
              else if( ijob>=1_${ik}$ .and. notran ) then
                 isolve = 2_${ik}$
              end if
           end if
           if( ( mb<=1_${ik}$ .and. nb<=1_${ik}$ ) .or. ( mb>=m .and. nb>=n ) )then
              ! use unblocked level 2 solver
              loop_30: do iround = 1, isolve
                 scale = one
                 dscale = zero
                 dsum = one
                 pq = m*n
                 call stdlib${ii}$_${ci}$tgsy2( trans, ifunc, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f,&
                            ldf, scale, dsum, dscale,info )
                 if( dscale/=zero ) then
                    if( ijob==1_${ik}$ .or. ijob==3_${ik}$ ) then
                       dif = sqrt( real( 2_${ik}$*m*n,KIND=${ck}$) ) / ( dscale*sqrt( dsum ) )
                    else
                       dif = sqrt( real( pq,KIND=${ck}$) ) / ( dscale*sqrt( dsum ) )
                    end if
                 end if
                 if( isolve==2_${ik}$ .and. iround==1_${ik}$ ) then
                    if( notran ) then
                       ifunc = ijob
                    end if
                    scale2 = scale
                    call stdlib${ii}$_${ci}$lacpy( 'F', m, n, c, ldc, work, m )
                    call stdlib${ii}$_${ci}$lacpy( 'F', m, n, f, ldf, work( m*n+1 ), m )
                    call stdlib${ii}$_${ci}$laset( 'F', m, n, czero, czero, c, ldc )
                    call stdlib${ii}$_${ci}$laset( 'F', m, n, czero, czero, f, ldf )
                 else if( isolve==2_${ik}$ .and. iround==2_${ik}$ ) then
                    call stdlib${ii}$_${ci}$lacpy( 'F', m, n, work, m, c, ldc )
                    call stdlib${ii}$_${ci}$lacpy( 'F', m, n, work( m*n+1 ), m, f, ldf )
                    scale = scale2
                 end if
              end do loop_30
              return
           end if
           ! determine block structure of a
           p = 0_${ik}$
           i = 1_${ik}$
           40 continue
           if( i>m )go to 50
           p = p + 1_${ik}$
           iwork( p ) = i
           i = i + mb
           if( i>=m )go to 50
           go to 40
           50 continue
           iwork( p+1 ) = m + 1_${ik}$
           if( iwork( p )==iwork( p+1 ) )p = p - 1_${ik}$
           ! determine block structure of b
           q = p + 1_${ik}$
           j = 1_${ik}$
           60 continue
           if( j>n )go to 70
           q = q + 1_${ik}$
           iwork( q ) = j
           j = j + nb
           if( j>=n )go to 70
           go to 60
           70 continue
           iwork( q+1 ) = n + 1_${ik}$
           if( iwork( q )==iwork( q+1 ) )q = q - 1_${ik}$
           if( notran ) then
              loop_150: do iround = 1, isolve
                 ! solve (i, j) - subsystem
                     ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j)
                     ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j)
                 ! for i = p, p - 1, ..., 1; j = 1, 2, ..., q
                 pq = 0_${ik}$
                 scale = one
                 dscale = zero
                 dsum = one
                 loop_130: do j = p + 2, q
                    js = iwork( j )
                    je = iwork( j+1 ) - 1_${ik}$
                    nb = je - js + 1_${ik}$
                    loop_120: do i = p, 1, -1
                       is = iwork( i )
                       ie = iwork( i+1 ) - 1_${ik}$
                       mb = ie - is + 1_${ik}$
                       call stdlib${ii}$_${ci}$tgsy2( trans, ifunc, mb, nb, a( is, is ), lda,b( js, js ), &
                       ldb, c( is, js ), ldc,d( is, is ), ldd, e( js, js ), lde,f( is, js ), ldf, &
                                 scaloc, dsum, dscale,linfo )
                       if( linfo>0_${ik}$ )info = linfo
                       pq = pq + mb*nb
                       if( scaloc/=one ) then
                          do k = 1, js - 1
                             call stdlib${ii}$_${ci}$scal( m, cmplx( scaloc, zero,KIND=${ck}$),c( 1_${ik}$, k ), 1_${ik}$ )
                                       
                             call stdlib${ii}$_${ci}$scal( m, cmplx( scaloc, zero,KIND=${ck}$),f( 1_${ik}$, k ), 1_${ik}$ )
                                       
                          end do
                          do k = js, je
                             call stdlib${ii}$_${ci}$scal( is-1, cmplx( scaloc, zero,KIND=${ck}$),c( 1_${ik}$, k ), 1_${ik}$ )
                                       
                             call stdlib${ii}$_${ci}$scal( is-1, cmplx( scaloc, zero,KIND=${ck}$),f( 1_${ik}$, k ), 1_${ik}$ )
                                       
                          end do
                          do k = js, je
                             call stdlib${ii}$_${ci}$scal( m-ie, cmplx( scaloc, zero,KIND=${ck}$),c( ie+1, k ), &
                                       1_${ik}$ )
                             call stdlib${ii}$_${ci}$scal( m-ie, cmplx( scaloc, zero,KIND=${ck}$),f( ie+1, k ), &
                                       1_${ik}$ )
                          end do
                          do k = je + 1, n
                             call stdlib${ii}$_${ci}$scal( m, cmplx( scaloc, zero,KIND=${ck}$),c( 1_${ik}$, k ), 1_${ik}$ )
                                       
                             call stdlib${ii}$_${ci}$scal( m, cmplx( scaloc, zero,KIND=${ck}$),f( 1_${ik}$, k ), 1_${ik}$ )
                                       
                          end do
                          scale = scale*scaloc
                       end if
                       ! substitute r(i,j) and l(i,j) into remaining equation.
                       if( i>1_${ik}$ ) then
                          call stdlib${ii}$_${ci}$gemm( 'N', 'N', is-1, nb, mb,cmplx( -one, zero,KIND=${ck}$), a(&
                           1_${ik}$, is ), lda,c( is, js ), ldc, cmplx( one, zero,KIND=${ck}$),c( 1_${ik}$, js ), &
                                     ldc )
                          call stdlib${ii}$_${ci}$gemm( 'N', 'N', is-1, nb, mb,cmplx( -one, zero,KIND=${ck}$), d(&
                           1_${ik}$, is ), ldd,c( is, js ), ldc, cmplx( one, zero,KIND=${ck}$),f( 1_${ik}$, js ), &
                                     ldf )
                       end if
                       if( j<q ) then
                          call stdlib${ii}$_${ci}$gemm( 'N', 'N', mb, n-je, nb,cmplx( one, zero,KIND=${ck}$), f( &
                          is, js ), ldf,b( js, je+1 ), ldb,cmplx( one, zero,KIND=${ck}$), c( is, je+1 &
                                    ),ldc )
                          call stdlib${ii}$_${ci}$gemm( 'N', 'N', mb, n-je, nb,cmplx( one, zero,KIND=${ck}$), f( &
                          is, js ), ldf,e( js, je+1 ), lde,cmplx( one, zero,KIND=${ck}$), f( is, je+1 &
                                    ),ldf )
                       end if
                    end do loop_120
                 end do loop_130
                 if( dscale/=zero ) then
                    if( ijob==1_${ik}$ .or. ijob==3_${ik}$ ) then
                       dif = sqrt( real( 2_${ik}$*m*n,KIND=${ck}$) ) / ( dscale*sqrt( dsum ) )
                    else
                       dif = sqrt( real( pq,KIND=${ck}$) ) / ( dscale*sqrt( dsum ) )
                    end if
                 end if
                 if( isolve==2_${ik}$ .and. iround==1_${ik}$ ) then
                    if( notran ) then
                       ifunc = ijob
                    end if
                    scale2 = scale
                    call stdlib${ii}$_${ci}$lacpy( 'F', m, n, c, ldc, work, m )
                    call stdlib${ii}$_${ci}$lacpy( 'F', m, n, f, ldf, work( m*n+1 ), m )
                    call stdlib${ii}$_${ci}$laset( 'F', m, n, czero, czero, c, ldc )
                    call stdlib${ii}$_${ci}$laset( 'F', m, n, czero, czero, f, ldf )
                 else if( isolve==2_${ik}$ .and. iround==2_${ik}$ ) then
                    call stdlib${ii}$_${ci}$lacpy( 'F', m, n, work, m, c, ldc )
                    call stdlib${ii}$_${ci}$lacpy( 'F', m, n, work( m*n+1 ), m, f, ldf )
                    scale = scale2
                 end if
              end do loop_150
           else
              ! solve transposed (i, j)-subsystem
                  ! a(i, i)**h * r(i, j) + d(i, i)**h * l(i, j) = c(i, j)
                  ! r(i, j) * b(j, j)  + l(i, j) * e(j, j) = -f(i, j)
              ! for i = 1,2,..., p; j = q, q-1,..., 1
              scale = one
              loop_210: do i = 1, p
                 is = iwork( i )
                 ie = iwork( i+1 ) - 1_${ik}$
                 mb = ie - is + 1_${ik}$
                 loop_200: do j = q, p + 2, -1
                    js = iwork( j )
                    je = iwork( j+1 ) - 1_${ik}$
                    nb = je - js + 1_${ik}$
                    call stdlib${ii}$_${ci}$tgsy2( trans, ifunc, mb, nb, a( is, is ), lda,b( js, js ), ldb, &
                    c( is, js ), ldc,d( is, is ), ldd, e( js, js ), lde,f( is, js ), ldf, scaloc, &
                              dsum, dscale,linfo )
                    if( linfo>0_${ik}$ )info = linfo
                    if( scaloc/=one ) then
                       do k = 1, js - 1
                          call stdlib${ii}$_${ci}$scal( m, cmplx( scaloc, zero,KIND=${ck}$), c( 1_${ik}$, k ),1_${ik}$ )
                                    
                          call stdlib${ii}$_${ci}$scal( m, cmplx( scaloc, zero,KIND=${ck}$), f( 1_${ik}$, k ),1_${ik}$ )
                                    
                       end do
                       do k = js, je
                          call stdlib${ii}$_${ci}$scal( is-1, cmplx( scaloc, zero,KIND=${ck}$),c( 1_${ik}$, k ), 1_${ik}$ )
                                    
                          call stdlib${ii}$_${ci}$scal( is-1, cmplx( scaloc, zero,KIND=${ck}$),f( 1_${ik}$, k ), 1_${ik}$ )
                                    
                       end do
                       do k = js, je
                          call stdlib${ii}$_${ci}$scal( m-ie, cmplx( scaloc, zero,KIND=${ck}$),c( ie+1, k ), 1_${ik}$ )
                                    
                          call stdlib${ii}$_${ci}$scal( m-ie, cmplx( scaloc, zero,KIND=${ck}$),f( ie+1, k ), 1_${ik}$ )
                                    
                       end do
                       do k = je + 1, n
                          call stdlib${ii}$_${ci}$scal( m, cmplx( scaloc, zero,KIND=${ck}$), c( 1_${ik}$, k ),1_${ik}$ )
                                    
                          call stdlib${ii}$_${ci}$scal( m, cmplx( scaloc, zero,KIND=${ck}$), f( 1_${ik}$, k ),1_${ik}$ )
                                    
                       end do
                       scale = scale*scaloc
                    end if
                    ! substitute r(i,j) and l(i,j) into remaining equation.
                    if( j>p+2 ) then
                       call stdlib${ii}$_${ci}$gemm( 'N', 'C', mb, js-1, nb,cmplx( one, zero,KIND=${ck}$), c( is,&
                        js ), ldc,b( 1_${ik}$, js ), ldb, cmplx( one, zero,KIND=${ck}$),f( is, 1_${ik}$ ), ldf )
                                  
                       call stdlib${ii}$_${ci}$gemm( 'N', 'C', mb, js-1, nb,cmplx( one, zero,KIND=${ck}$), f( is,&
                        js ), ldf,e( 1_${ik}$, js ), lde, cmplx( one, zero,KIND=${ck}$),f( is, 1_${ik}$ ), ldf )
                                  
                    end if
                    if( i<p ) then
                       call stdlib${ii}$_${ci}$gemm( 'C', 'N', m-ie, nb, mb,cmplx( -one, zero,KIND=${ck}$), a( &
                       is, ie+1 ), lda,c( is, js ), ldc, cmplx( one, zero,KIND=${ck}$),c( ie+1, js ), &
                                 ldc )
                       call stdlib${ii}$_${ci}$gemm( 'C', 'N', m-ie, nb, mb,cmplx( -one, zero,KIND=${ck}$), d( &
                       is, ie+1 ), ldd,f( is, js ), ldf, cmplx( one, zero,KIND=${ck}$),c( ie+1, js ), &
                                 ldc )
                    end if
                 end do loop_200
              end do loop_210
           end if
           work( 1_${ik}$ ) = lwmin
           return
     end subroutine stdlib${ii}$_${ci}$tgsyl

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_stgsy2( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, &
     !! STGSY2 solves the generalized Sylvester equation:
     !! A * R - L * B = scale * C                (1)
     !! D * R - L * E = scale * F,
     !! using Level 1 and 2 BLAS. where R and L are unknown M-by-N matrices,
     !! (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M,
     !! N-by-N and M-by-N, respectively, with real entries. (A, D) and (B, E)
     !! must be in generalized Schur canonical form, i.e. A, B are upper
     !! quasi triangular and D, E are upper triangular. The solution (R, L)
     !! overwrites (C, F). 0 <= SCALE <= 1 is an output scaling factor
     !! chosen to avoid overflow.
     !! In matrix notation solving equation (1) corresponds to solve
     !! Z*x = scale*b, where Z is defined as
     !! Z = [ kron(In, A)  -kron(B**T, Im) ]             (2)
     !! [ kron(In, D)  -kron(E**T, Im) ],
     !! Ik is the identity matrix of size k and X**T is the transpose of X.
     !! kron(X, Y) is the Kronecker product between the matrices X and Y.
     !! In the process of solving (1), we solve a number of such systems
     !! where Dim(In), Dim(In) = 1 or 2.
     !! If TRANS = 'T', solve the transposed system Z**T*y = scale*b for y,
     !! which is equivalent to solve for R and L in
     !! A**T * R  + D**T * L   = scale * C           (3)
     !! R  * B**T + L  * E**T  = scale * -F
     !! This case is used to compute an estimate of Dif[(A, D), (B, E)] =
     !! sigma_min(Z) using reverse communication with SLACON.
     !! STGSY2 also (IJOB >= 1) contributes to the computation in STGSYL
     !! of an upper bound on the separation between to matrix pairs. Then
     !! the input (A, D), (B, E) are sub-pencils of the matrix pair in
     !! STGSYL. See STGSYL for details.
               ldf, scale, rdsum, rdscal,iwork, pq, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, m, n
           integer(${ik}$), intent(out) :: info, pq
           real(sp), intent(inout) :: rdscal, rdsum
           real(sp), intent(out) :: scale
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           real(sp), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*)
           real(sp), intent(inout) :: c(ldc,*), f(ldf,*)
        ! =====================================================================
        ! replaced various illegal calls to stdlib${ii}$_scopy by calls to stdlib${ii}$_slaset.
        ! sven hammarling, 27/5/02.
           ! Parameters 
           integer(${ik}$), parameter :: ldz = 8_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: notran
           integer(${ik}$) :: i, ie, ierr, ii, is, isp1, j, je, jj, js, jsp1, k, mb, nb, p, q, &
                     zdim
           real(sp) :: alpha, scaloc
           ! Local Arrays 
           integer(${ik}$) :: ipiv(ldz), jpiv(ldz)
           real(sp) :: rhs(ldz), z(ldz,ldz)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test input parameters
           info = 0_${ik}$
           ierr = 0_${ik}$
           notran = stdlib_lsame( trans, 'N' )
           if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -1_${ik}$
           else if( notran ) then
              if( ( ijob<0_${ik}$ ) .or. ( ijob>2_${ik}$ ) ) then
                 info = -2_${ik}$
              end if
           end if
           if( info==0_${ik}$ ) then
              if( m<=0_${ik}$ ) then
                 info = -3_${ik}$
              else if( n<=0_${ik}$ ) then
                 info = -4_${ik}$
              else if( lda<max( 1_${ik}$, m ) ) then
                 info = -6_${ik}$
              else if( ldb<max( 1_${ik}$, n ) ) then
                 info = -8_${ik}$
              else if( ldc<max( 1_${ik}$, m ) ) then
                 info = -10_${ik}$
              else if( ldd<max( 1_${ik}$, m ) ) then
                 info = -12_${ik}$
              else if( lde<max( 1_${ik}$, n ) ) then
                 info = -14_${ik}$
              else if( ldf<max( 1_${ik}$, m ) ) then
                 info = -16_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'STGSY2', -info )
              return
           end if
           ! determine block structure of a
           pq = 0_${ik}$
           p = 0_${ik}$
           i = 1_${ik}$
           10 continue
           if( i>m )go to 20
           p = p + 1_${ik}$
           iwork( p ) = i
           if( i==m )go to 20
           if( a( i+1, i )/=zero ) then
              i = i + 2_${ik}$
           else
              i = i + 1_${ik}$
           end if
           go to 10
           20 continue
           iwork( p+1 ) = m + 1_${ik}$
           ! determine block structure of b
           q = p + 1_${ik}$
           j = 1_${ik}$
           30 continue
           if( j>n )go to 40
           q = q + 1_${ik}$
           iwork( q ) = j
           if( j==n )go to 40
           if( b( j+1, j )/=zero ) then
              j = j + 2_${ik}$
           else
              j = j + 1_${ik}$
           end if
           go to 30
           40 continue
           iwork( q+1 ) = n + 1_${ik}$
           pq = p*( q-p-1 )
           if( notran ) then
              ! solve (i, j) - subsystem
                 ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j)
                 ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j)
              ! for i = p, p - 1, ..., 1; j = 1, 2, ..., q
              scale = one
              scaloc = one
              loop_120: do j = p + 2, q
                 js = iwork( j )
                 jsp1 = js + 1_${ik}$
                 je = iwork( j+1 ) - 1_${ik}$
                 nb = je - js + 1_${ik}$
                 loop_110: do i = p, 1, -1
                    is = iwork( i )
                    isp1 = is + 1_${ik}$
                    ie = iwork( i+1 ) - 1_${ik}$
                    mb = ie - is + 1_${ik}$
                    zdim = mb*nb*2_${ik}$
                    if( ( mb==1_${ik}$ ) .and. ( nb==1_${ik}$ ) ) then
                       ! build a 2-by-2 system z * x = rhs
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = d( is, is )
                       z( 1_${ik}$, 2_${ik}$ ) = -b( js, js )
                       z( 2_${ik}$, 2_${ik}$ ) = -e( js, js )
                       ! set up right hand side(s)
                       rhs( 1_${ik}$ ) = c( is, js )
                       rhs( 2_${ik}$ ) = f( is, js )
                       ! solve z * x = rhs
                       call stdlib${ii}$_sgetc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       if( ijob==0_${ik}$ ) then
                          call stdlib${ii}$_sgesc2( zdim, z, ldz, rhs, ipiv, jpiv,scaloc )
                          if( scaloc/=one ) then
                             do k = 1, n
                                call stdlib${ii}$_sscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                                call stdlib${ii}$_sscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                             end do
                             scale = scale*scaloc
                          end if
                       else
                          call stdlib${ii}$_slatdf( ijob, zdim, z, ldz, rhs, rdsum,rdscal, ipiv, jpiv )
                                    
                       end if
                       ! unpack solution vector(s)
                       c( is, js ) = rhs( 1_${ik}$ )
                       f( is, js ) = rhs( 2_${ik}$ )
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( i>1_${ik}$ ) then
                          alpha = -rhs( 1_${ik}$ )
                          call stdlib${ii}$_saxpy( is-1, alpha, a( 1_${ik}$, is ), 1_${ik}$, c( 1_${ik}$, js ),1_${ik}$ )
                          call stdlib${ii}$_saxpy( is-1, alpha, d( 1_${ik}$, is ), 1_${ik}$, f( 1_${ik}$, js ),1_${ik}$ )
                       end if
                       if( j<q ) then
                          call stdlib${ii}$_saxpy( n-je, rhs( 2_${ik}$ ), b( js, je+1 ), ldb,c( is, je+1 ), &
                                    ldc )
                          call stdlib${ii}$_saxpy( n-je, rhs( 2_${ik}$ ), e( js, je+1 ), lde,f( is, je+1 ), &
                                    ldf )
                       end if
                    else if( ( mb==1_${ik}$ ) .and. ( nb==2_${ik}$ ) ) then
                       ! build a 4-by-4 system z * x = rhs
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = zero
                       z( 3_${ik}$, 1_${ik}$ ) = d( is, is )
                       z( 4_${ik}$, 1_${ik}$ ) = zero
                       z( 1_${ik}$, 2_${ik}$ ) = zero
                       z( 2_${ik}$, 2_${ik}$ ) = a( is, is )
                       z( 3_${ik}$, 2_${ik}$ ) = zero
                       z( 4_${ik}$, 2_${ik}$ ) = d( is, is )
                       z( 1_${ik}$, 3_${ik}$ ) = -b( js, js )
                       z( 2_${ik}$, 3_${ik}$ ) = -b( js, jsp1 )
                       z( 3_${ik}$, 3_${ik}$ ) = -e( js, js )
                       z( 4_${ik}$, 3_${ik}$ ) = -e( js, jsp1 )
                       z( 1_${ik}$, 4_${ik}$ ) = -b( jsp1, js )
                       z( 2_${ik}$, 4_${ik}$ ) = -b( jsp1, jsp1 )
                       z( 3_${ik}$, 4_${ik}$ ) = zero
                       z( 4_${ik}$, 4_${ik}$ ) = -e( jsp1, jsp1 )
                       ! set up right hand side(s)
                       rhs( 1_${ik}$ ) = c( is, js )
                       rhs( 2_${ik}$ ) = c( is, jsp1 )
                       rhs( 3_${ik}$ ) = f( is, js )
                       rhs( 4_${ik}$ ) = f( is, jsp1 )
                       ! solve z * x = rhs
                       call stdlib${ii}$_sgetc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       if( ijob==0_${ik}$ ) then
                          call stdlib${ii}$_sgesc2( zdim, z, ldz, rhs, ipiv, jpiv,scaloc )
                          if( scaloc/=one ) then
                             do k = 1, n
                                call stdlib${ii}$_sscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                                call stdlib${ii}$_sscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                             end do
                             scale = scale*scaloc
                          end if
                       else
                          call stdlib${ii}$_slatdf( ijob, zdim, z, ldz, rhs, rdsum,rdscal, ipiv, jpiv )
                                    
                       end if
                       ! unpack solution vector(s)
                       c( is, js ) = rhs( 1_${ik}$ )
                       c( is, jsp1 ) = rhs( 2_${ik}$ )
                       f( is, js ) = rhs( 3_${ik}$ )
                       f( is, jsp1 ) = rhs( 4_${ik}$ )
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( i>1_${ik}$ ) then
                          call stdlib${ii}$_sger( is-1, nb, -one, a( 1_${ik}$, is ), 1_${ik}$, rhs( 1_${ik}$ ),1_${ik}$, c( 1_${ik}$, js ),&
                                     ldc )
                          call stdlib${ii}$_sger( is-1, nb, -one, d( 1_${ik}$, is ), 1_${ik}$, rhs( 1_${ik}$ ),1_${ik}$, f( 1_${ik}$, js ),&
                                     ldf )
                       end if
                       if( j<q ) then
                          call stdlib${ii}$_saxpy( n-je, rhs( 3_${ik}$ ), b( js, je+1 ), ldb,c( is, je+1 ), &
                                    ldc )
                          call stdlib${ii}$_saxpy( n-je, rhs( 3_${ik}$ ), e( js, je+1 ), lde,f( is, je+1 ), &
                                    ldf )
                          call stdlib${ii}$_saxpy( n-je, rhs( 4_${ik}$ ), b( jsp1, je+1 ), ldb,c( is, je+1 ), &
                                    ldc )
                          call stdlib${ii}$_saxpy( n-je, rhs( 4_${ik}$ ), e( jsp1, je+1 ), lde,f( is, je+1 ), &
                                    ldf )
                       end if
                    else if( ( mb==2_${ik}$ ) .and. ( nb==1_${ik}$ ) ) then
                       ! build a 4-by-4 system z * x = rhs
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = a( isp1, is )
                       z( 3_${ik}$, 1_${ik}$ ) = d( is, is )
                       z( 4_${ik}$, 1_${ik}$ ) = zero
                       z( 1_${ik}$, 2_${ik}$ ) = a( is, isp1 )
                       z( 2_${ik}$, 2_${ik}$ ) = a( isp1, isp1 )
                       z( 3_${ik}$, 2_${ik}$ ) = d( is, isp1 )
                       z( 4_${ik}$, 2_${ik}$ ) = d( isp1, isp1 )
                       z( 1_${ik}$, 3_${ik}$ ) = -b( js, js )
                       z( 2_${ik}$, 3_${ik}$ ) = zero
                       z( 3_${ik}$, 3_${ik}$ ) = -e( js, js )
                       z( 4_${ik}$, 3_${ik}$ ) = zero
                       z( 1_${ik}$, 4_${ik}$ ) = zero
                       z( 2_${ik}$, 4_${ik}$ ) = -b( js, js )
                       z( 3_${ik}$, 4_${ik}$ ) = zero
                       z( 4_${ik}$, 4_${ik}$ ) = -e( js, js )
                       ! set up right hand side(s)
                       rhs( 1_${ik}$ ) = c( is, js )
                       rhs( 2_${ik}$ ) = c( isp1, js )
                       rhs( 3_${ik}$ ) = f( is, js )
                       rhs( 4_${ik}$ ) = f( isp1, js )
                       ! solve z * x = rhs
                       call stdlib${ii}$_sgetc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       if( ijob==0_${ik}$ ) then
                          call stdlib${ii}$_sgesc2( zdim, z, ldz, rhs, ipiv, jpiv,scaloc )
                          if( scaloc/=one ) then
                             do k = 1, n
                                call stdlib${ii}$_sscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                                call stdlib${ii}$_sscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                             end do
                             scale = scale*scaloc
                          end if
                       else
                          call stdlib${ii}$_slatdf( ijob, zdim, z, ldz, rhs, rdsum,rdscal, ipiv, jpiv )
                                    
                       end if
                       ! unpack solution vector(s)
                       c( is, js ) = rhs( 1_${ik}$ )
                       c( isp1, js ) = rhs( 2_${ik}$ )
                       f( is, js ) = rhs( 3_${ik}$ )
                       f( isp1, js ) = rhs( 4_${ik}$ )
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( i>1_${ik}$ ) then
                          call stdlib${ii}$_sgemv( 'N', is-1, mb, -one, a( 1_${ik}$, is ), lda,rhs( 1_${ik}$ ), 1_${ik}$, &
                                    one, c( 1_${ik}$, js ), 1_${ik}$ )
                          call stdlib${ii}$_sgemv( 'N', is-1, mb, -one, d( 1_${ik}$, is ), ldd,rhs( 1_${ik}$ ), 1_${ik}$, &
                                    one, f( 1_${ik}$, js ), 1_${ik}$ )
                       end if
                       if( j<q ) then
                          call stdlib${ii}$_sger( mb, n-je, one, rhs( 3_${ik}$ ), 1_${ik}$,b( js, je+1 ), ldb, c( is, &
                                    je+1 ), ldc )
                          call stdlib${ii}$_sger( mb, n-je, one, rhs( 3_${ik}$ ), 1_${ik}$,e( js, je+1 ), lde, f( is, &
                                    je+1 ), ldf )
                       end if
                    else if( ( mb==2_${ik}$ ) .and. ( nb==2_${ik}$ ) ) then
                       ! build an 8-by-8 system z * x = rhs
                       call stdlib${ii}$_slaset( 'F', ldz, ldz, zero, zero, z, ldz )
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = a( isp1, is )
                       z( 5_${ik}$, 1_${ik}$ ) = d( is, is )
                       z( 1_${ik}$, 2_${ik}$ ) = a( is, isp1 )
                       z( 2_${ik}$, 2_${ik}$ ) = a( isp1, isp1 )
                       z( 5_${ik}$, 2_${ik}$ ) = d( is, isp1 )
                       z( 6_${ik}$, 2_${ik}$ ) = d( isp1, isp1 )
                       z( 3_${ik}$, 3_${ik}$ ) = a( is, is )
                       z( 4_${ik}$, 3_${ik}$ ) = a( isp1, is )
                       z( 7_${ik}$, 3_${ik}$ ) = d( is, is )
                       z( 3_${ik}$, 4_${ik}$ ) = a( is, isp1 )
                       z( 4_${ik}$, 4_${ik}$ ) = a( isp1, isp1 )
                       z( 7_${ik}$, 4_${ik}$ ) = d( is, isp1 )
                       z( 8_${ik}$, 4_${ik}$ ) = d( isp1, isp1 )
                       z( 1_${ik}$, 5_${ik}$ ) = -b( js, js )
                       z( 3_${ik}$, 5_${ik}$ ) = -b( js, jsp1 )
                       z( 5_${ik}$, 5_${ik}$ ) = -e( js, js )
                       z( 7_${ik}$, 5_${ik}$ ) = -e( js, jsp1 )
                       z( 2_${ik}$, 6_${ik}$ ) = -b( js, js )
                       z( 4_${ik}$, 6_${ik}$ ) = -b( js, jsp1 )
                       z( 6_${ik}$, 6_${ik}$ ) = -e( js, js )
                       z( 8_${ik}$, 6_${ik}$ ) = -e( js, jsp1 )
                       z( 1_${ik}$, 7_${ik}$ ) = -b( jsp1, js )
                       z( 3_${ik}$, 7_${ik}$ ) = -b( jsp1, jsp1 )
                       z( 7_${ik}$, 7_${ik}$ ) = -e( jsp1, jsp1 )
                       z( 2_${ik}$, 8_${ik}$ ) = -b( jsp1, js )
                       z( 4_${ik}$, 8_${ik}$ ) = -b( jsp1, jsp1 )
                       z( 8_${ik}$, 8_${ik}$ ) = -e( jsp1, jsp1 )
                       ! set up right hand side(s)
                       k = 1_${ik}$
                       ii = mb*nb + 1_${ik}$
                       do jj = 0, nb - 1
                          call stdlib${ii}$_scopy( mb, c( is, js+jj ), 1_${ik}$, rhs( k ), 1_${ik}$ )
                          call stdlib${ii}$_scopy( mb, f( is, js+jj ), 1_${ik}$, rhs( ii ), 1_${ik}$ )
                          k = k + mb
                          ii = ii + mb
                       end do
                       ! solve z * x = rhs
                       call stdlib${ii}$_sgetc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       if( ijob==0_${ik}$ ) then
                          call stdlib${ii}$_sgesc2( zdim, z, ldz, rhs, ipiv, jpiv,scaloc )
                          if( scaloc/=one ) then
                             do k = 1, n
                                call stdlib${ii}$_sscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                                call stdlib${ii}$_sscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                             end do
                             scale = scale*scaloc
                          end if
                       else
                          call stdlib${ii}$_slatdf( ijob, zdim, z, ldz, rhs, rdsum,rdscal, ipiv, jpiv )
                                    
                       end if
                       ! unpack solution vector(s)
                       k = 1_${ik}$
                       ii = mb*nb + 1_${ik}$
                       do jj = 0, nb - 1
                          call stdlib${ii}$_scopy( mb, rhs( k ), 1_${ik}$, c( is, js+jj ), 1_${ik}$ )
                          call stdlib${ii}$_scopy( mb, rhs( ii ), 1_${ik}$, f( is, js+jj ), 1_${ik}$ )
                          k = k + mb
                          ii = ii + mb
                       end do
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( i>1_${ik}$ ) then
                          call stdlib${ii}$_sgemm( 'N', 'N', is-1, nb, mb, -one,a( 1_${ik}$, is ), lda, rhs( 1_${ik}$ &
                                    ), mb, one,c( 1_${ik}$, js ), ldc )
                          call stdlib${ii}$_sgemm( 'N', 'N', is-1, nb, mb, -one,d( 1_${ik}$, is ), ldd, rhs( 1_${ik}$ &
                                    ), mb, one,f( 1_${ik}$, js ), ldf )
                       end if
                       if( j<q ) then
                          k = mb*nb + 1_${ik}$
                          call stdlib${ii}$_sgemm( 'N', 'N', mb, n-je, nb, one, rhs( k ),mb, b( js, je+&
                                    1_${ik}$ ), ldb, one,c( is, je+1 ), ldc )
                          call stdlib${ii}$_sgemm( 'N', 'N', mb, n-je, nb, one, rhs( k ),mb, e( js, je+&
                                    1_${ik}$ ), lde, one,f( is, je+1 ), ldf )
                       end if
                    end if
                 end do loop_110
              end do loop_120
           else
              ! solve (i, j) - subsystem
                   ! a(i, i)**t * r(i, j) + d(i, i)**t * l(j, j)  =  c(i, j)
                   ! r(i, i)  * b(j, j) + l(i, j)  * e(j, j)  = -f(i, j)
              ! for i = 1, 2, ..., p, j = q, q - 1, ..., 1
              scale = one
              scaloc = one
              loop_200: do i = 1, p
                 is = iwork( i )
                 isp1 = is + 1_${ik}$
                 ie = iwork( i+1 ) - 1_${ik}$
                 mb = ie - is + 1_${ik}$
                 loop_190: do j = q, p + 2, -1
                    js = iwork( j )
                    jsp1 = js + 1_${ik}$
                    je = iwork( j+1 ) - 1_${ik}$
                    nb = je - js + 1_${ik}$
                    zdim = mb*nb*2_${ik}$
                    if( ( mb==1_${ik}$ ) .and. ( nb==1_${ik}$ ) ) then
                       ! build a 2-by-2 system z**t * x = rhs
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = -b( js, js )
                       z( 1_${ik}$, 2_${ik}$ ) = d( is, is )
                       z( 2_${ik}$, 2_${ik}$ ) = -e( js, js )
                       ! set up right hand side(s)
                       rhs( 1_${ik}$ ) = c( is, js )
                       rhs( 2_${ik}$ ) = f( is, js )
                       ! solve z**t * x = rhs
                       call stdlib${ii}$_sgetc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       call stdlib${ii}$_sgesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc )
                       if( scaloc/=one ) then
                          do k = 1, n
                             call stdlib${ii}$_sscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_sscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          scale = scale*scaloc
                       end if
                       ! unpack solution vector(s)
                       c( is, js ) = rhs( 1_${ik}$ )
                       f( is, js ) = rhs( 2_${ik}$ )
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( j>p+2 ) then
                          alpha = rhs( 1_${ik}$ )
                          call stdlib${ii}$_saxpy( js-1, alpha, b( 1_${ik}$, js ), 1_${ik}$, f( is, 1_${ik}$ ),ldf )
                          alpha = rhs( 2_${ik}$ )
                          call stdlib${ii}$_saxpy( js-1, alpha, e( 1_${ik}$, js ), 1_${ik}$, f( is, 1_${ik}$ ),ldf )
                       end if
                       if( i<p ) then
                          alpha = -rhs( 1_${ik}$ )
                          call stdlib${ii}$_saxpy( m-ie, alpha, a( is, ie+1 ), lda,c( ie+1, js ), 1_${ik}$ )
                                    
                          alpha = -rhs( 2_${ik}$ )
                          call stdlib${ii}$_saxpy( m-ie, alpha, d( is, ie+1 ), ldd,c( ie+1, js ), 1_${ik}$ )
                                    
                       end if
                    else if( ( mb==1_${ik}$ ) .and. ( nb==2_${ik}$ ) ) then
                       ! build a 4-by-4 system z**t * x = rhs
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = zero
                       z( 3_${ik}$, 1_${ik}$ ) = -b( js, js )
                       z( 4_${ik}$, 1_${ik}$ ) = -b( jsp1, js )
                       z( 1_${ik}$, 2_${ik}$ ) = zero
                       z( 2_${ik}$, 2_${ik}$ ) = a( is, is )
                       z( 3_${ik}$, 2_${ik}$ ) = -b( js, jsp1 )
                       z( 4_${ik}$, 2_${ik}$ ) = -b( jsp1, jsp1 )
                       z( 1_${ik}$, 3_${ik}$ ) = d( is, is )
                       z( 2_${ik}$, 3_${ik}$ ) = zero
                       z( 3_${ik}$, 3_${ik}$ ) = -e( js, js )
                       z( 4_${ik}$, 3_${ik}$ ) = zero
                       z( 1_${ik}$, 4_${ik}$ ) = zero
                       z( 2_${ik}$, 4_${ik}$ ) = d( is, is )
                       z( 3_${ik}$, 4_${ik}$ ) = -e( js, jsp1 )
                       z( 4_${ik}$, 4_${ik}$ ) = -e( jsp1, jsp1 )
                       ! set up right hand side(s)
                       rhs( 1_${ik}$ ) = c( is, js )
                       rhs( 2_${ik}$ ) = c( is, jsp1 )
                       rhs( 3_${ik}$ ) = f( is, js )
                       rhs( 4_${ik}$ ) = f( is, jsp1 )
                       ! solve z**t * x = rhs
                       call stdlib${ii}$_sgetc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       call stdlib${ii}$_sgesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc )
                       if( scaloc/=one ) then
                          do k = 1, n
                             call stdlib${ii}$_sscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_sscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          scale = scale*scaloc
                       end if
                       ! unpack solution vector(s)
                       c( is, js ) = rhs( 1_${ik}$ )
                       c( is, jsp1 ) = rhs( 2_${ik}$ )
                       f( is, js ) = rhs( 3_${ik}$ )
                       f( is, jsp1 ) = rhs( 4_${ik}$ )
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( j>p+2 ) then
                          call stdlib${ii}$_saxpy( js-1, rhs( 1_${ik}$ ), b( 1_${ik}$, js ), 1_${ik}$,f( is, 1_${ik}$ ), ldf )
                                    
                          call stdlib${ii}$_saxpy( js-1, rhs( 2_${ik}$ ), b( 1_${ik}$, jsp1 ), 1_${ik}$,f( is, 1_${ik}$ ), ldf )
                                    
                          call stdlib${ii}$_saxpy( js-1, rhs( 3_${ik}$ ), e( 1_${ik}$, js ), 1_${ik}$,f( is, 1_${ik}$ ), ldf )
                                    
                          call stdlib${ii}$_saxpy( js-1, rhs( 4_${ik}$ ), e( 1_${ik}$, jsp1 ), 1_${ik}$,f( is, 1_${ik}$ ), ldf )
                                    
                       end if
                       if( i<p ) then
                          call stdlib${ii}$_sger( m-ie, nb, -one, a( is, ie+1 ), lda,rhs( 1_${ik}$ ), 1_${ik}$, c( ie+&
                                    1_${ik}$, js ), ldc )
                          call stdlib${ii}$_sger( m-ie, nb, -one, d( is, ie+1 ), ldd,rhs( 3_${ik}$ ), 1_${ik}$, c( ie+&
                                    1_${ik}$, js ), ldc )
                       end if
                    else if( ( mb==2_${ik}$ ) .and. ( nb==1_${ik}$ ) ) then
                       ! build a 4-by-4 system z**t * x = rhs
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = a( is, isp1 )
                       z( 3_${ik}$, 1_${ik}$ ) = -b( js, js )
                       z( 4_${ik}$, 1_${ik}$ ) = zero
                       z( 1_${ik}$, 2_${ik}$ ) = a( isp1, is )
                       z( 2_${ik}$, 2_${ik}$ ) = a( isp1, isp1 )
                       z( 3_${ik}$, 2_${ik}$ ) = zero
                       z( 4_${ik}$, 2_${ik}$ ) = -b( js, js )
                       z( 1_${ik}$, 3_${ik}$ ) = d( is, is )
                       z( 2_${ik}$, 3_${ik}$ ) = d( is, isp1 )
                       z( 3_${ik}$, 3_${ik}$ ) = -e( js, js )
                       z( 4_${ik}$, 3_${ik}$ ) = zero
                       z( 1_${ik}$, 4_${ik}$ ) = zero
                       z( 2_${ik}$, 4_${ik}$ ) = d( isp1, isp1 )
                       z( 3_${ik}$, 4_${ik}$ ) = zero
                       z( 4_${ik}$, 4_${ik}$ ) = -e( js, js )
                       ! set up right hand side(s)
                       rhs( 1_${ik}$ ) = c( is, js )
                       rhs( 2_${ik}$ ) = c( isp1, js )
                       rhs( 3_${ik}$ ) = f( is, js )
                       rhs( 4_${ik}$ ) = f( isp1, js )
                       ! solve z**t * x = rhs
                       call stdlib${ii}$_sgetc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       call stdlib${ii}$_sgesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc )
                       if( scaloc/=one ) then
                          do k = 1, n
                             call stdlib${ii}$_sscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_sscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          scale = scale*scaloc
                       end if
                       ! unpack solution vector(s)
                       c( is, js ) = rhs( 1_${ik}$ )
                       c( isp1, js ) = rhs( 2_${ik}$ )
                       f( is, js ) = rhs( 3_${ik}$ )
                       f( isp1, js ) = rhs( 4_${ik}$ )
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( j>p+2 ) then
                          call stdlib${ii}$_sger( mb, js-1, one, rhs( 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, js ),1_${ik}$, f( is, 1_${ik}$ ), &
                                    ldf )
                          call stdlib${ii}$_sger( mb, js-1, one, rhs( 3_${ik}$ ), 1_${ik}$, e( 1_${ik}$, js ),1_${ik}$, f( is, 1_${ik}$ ), &
                                    ldf )
                       end if
                       if( i<p ) then
                          call stdlib${ii}$_sgemv( 'T', mb, m-ie, -one, a( is, ie+1 ),lda, rhs( 1_${ik}$ ), 1_${ik}$, &
                                    one, c( ie+1, js ),1_${ik}$ )
                          call stdlib${ii}$_sgemv( 'T', mb, m-ie, -one, d( is, ie+1 ),ldd, rhs( 3_${ik}$ ), 1_${ik}$, &
                                    one, c( ie+1, js ),1_${ik}$ )
                       end if
                    else if( ( mb==2_${ik}$ ) .and. ( nb==2_${ik}$ ) ) then
                       ! build an 8-by-8 system z**t * x = rhs
                       call stdlib${ii}$_slaset( 'F', ldz, ldz, zero, zero, z, ldz )
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = a( is, isp1 )
                       z( 5_${ik}$, 1_${ik}$ ) = -b( js, js )
                       z( 7_${ik}$, 1_${ik}$ ) = -b( jsp1, js )
                       z( 1_${ik}$, 2_${ik}$ ) = a( isp1, is )
                       z( 2_${ik}$, 2_${ik}$ ) = a( isp1, isp1 )
                       z( 6_${ik}$, 2_${ik}$ ) = -b( js, js )
                       z( 8_${ik}$, 2_${ik}$ ) = -b( jsp1, js )
                       z( 3_${ik}$, 3_${ik}$ ) = a( is, is )
                       z( 4_${ik}$, 3_${ik}$ ) = a( is, isp1 )
                       z( 5_${ik}$, 3_${ik}$ ) = -b( js, jsp1 )
                       z( 7_${ik}$, 3_${ik}$ ) = -b( jsp1, jsp1 )
                       z( 3_${ik}$, 4_${ik}$ ) = a( isp1, is )
                       z( 4_${ik}$, 4_${ik}$ ) = a( isp1, isp1 )
                       z( 6_${ik}$, 4_${ik}$ ) = -b( js, jsp1 )
                       z( 8_${ik}$, 4_${ik}$ ) = -b( jsp1, jsp1 )
                       z( 1_${ik}$, 5_${ik}$ ) = d( is, is )
                       z( 2_${ik}$, 5_${ik}$ ) = d( is, isp1 )
                       z( 5_${ik}$, 5_${ik}$ ) = -e( js, js )
                       z( 2_${ik}$, 6_${ik}$ ) = d( isp1, isp1 )
                       z( 6_${ik}$, 6_${ik}$ ) = -e( js, js )
                       z( 3_${ik}$, 7_${ik}$ ) = d( is, is )
                       z( 4_${ik}$, 7_${ik}$ ) = d( is, isp1 )
                       z( 5_${ik}$, 7_${ik}$ ) = -e( js, jsp1 )
                       z( 7_${ik}$, 7_${ik}$ ) = -e( jsp1, jsp1 )
                       z( 4_${ik}$, 8_${ik}$ ) = d( isp1, isp1 )
                       z( 6_${ik}$, 8_${ik}$ ) = -e( js, jsp1 )
                       z( 8_${ik}$, 8_${ik}$ ) = -e( jsp1, jsp1 )
                       ! set up right hand side(s)
                       k = 1_${ik}$
                       ii = mb*nb + 1_${ik}$
                       do jj = 0, nb - 1
                          call stdlib${ii}$_scopy( mb, c( is, js+jj ), 1_${ik}$, rhs( k ), 1_${ik}$ )
                          call stdlib${ii}$_scopy( mb, f( is, js+jj ), 1_${ik}$, rhs( ii ), 1_${ik}$ )
                          k = k + mb
                          ii = ii + mb
                       end do
                       ! solve z**t * x = rhs
                       call stdlib${ii}$_sgetc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       call stdlib${ii}$_sgesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc )
                       if( scaloc/=one ) then
                          do k = 1, n
                             call stdlib${ii}$_sscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_sscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          scale = scale*scaloc
                       end if
                       ! unpack solution vector(s)
                       k = 1_${ik}$
                       ii = mb*nb + 1_${ik}$
                       do jj = 0, nb - 1
                          call stdlib${ii}$_scopy( mb, rhs( k ), 1_${ik}$, c( is, js+jj ), 1_${ik}$ )
                          call stdlib${ii}$_scopy( mb, rhs( ii ), 1_${ik}$, f( is, js+jj ), 1_${ik}$ )
                          k = k + mb
                          ii = ii + mb
                       end do
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( j>p+2 ) then
                          call stdlib${ii}$_sgemm( 'N', 'T', mb, js-1, nb, one,c( is, js ), ldc, b( 1_${ik}$, &
                                    js ), ldb, one,f( is, 1_${ik}$ ), ldf )
                          call stdlib${ii}$_sgemm( 'N', 'T', mb, js-1, nb, one,f( is, js ), ldf, e( 1_${ik}$, &
                                    js ), lde, one,f( is, 1_${ik}$ ), ldf )
                       end if
                       if( i<p ) then
                          call stdlib${ii}$_sgemm( 'T', 'N', m-ie, nb, mb, -one,a( is, ie+1 ), lda, c( &
                                    is, js ), ldc,one, c( ie+1, js ), ldc )
                          call stdlib${ii}$_sgemm( 'T', 'N', m-ie, nb, mb, -one,d( is, ie+1 ), ldd, f( &
                                    is, js ), ldf,one, c( ie+1, js ), ldc )
                       end if
                    end if
                 end do loop_190
              end do loop_200
           end if
           return
     end subroutine stdlib${ii}$_stgsy2

     pure module subroutine stdlib${ii}$_dtgsy2( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, &
     !! DTGSY2 solves the generalized Sylvester equation:
     !! A * R - L * B = scale * C                (1)
     !! D * R - L * E = scale * F,
     !! using Level 1 and 2 BLAS. where R and L are unknown M-by-N matrices,
     !! (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M,
     !! N-by-N and M-by-N, respectively, with real entries. (A, D) and (B, E)
     !! must be in generalized Schur canonical form, i.e. A, B are upper
     !! quasi triangular and D, E are upper triangular. The solution (R, L)
     !! overwrites (C, F). 0 <= SCALE <= 1 is an output scaling factor
     !! chosen to avoid overflow.
     !! In matrix notation solving equation (1) corresponds to solve
     !! Z*x = scale*b, where Z is defined as
     !! Z = [ kron(In, A)  -kron(B**T, Im) ]             (2)
     !! [ kron(In, D)  -kron(E**T, Im) ],
     !! Ik is the identity matrix of size k and X**T is the transpose of X.
     !! kron(X, Y) is the Kronecker product between the matrices X and Y.
     !! In the process of solving (1), we solve a number of such systems
     !! where Dim(In), Dim(In) = 1 or 2.
     !! If TRANS = 'T', solve the transposed system Z**T*y = scale*b for y,
     !! which is equivalent to solve for R and L in
     !! A**T * R  + D**T * L   = scale * C           (3)
     !! R  * B**T + L  * E**T  = scale * -F
     !! This case is used to compute an estimate of Dif[(A, D), (B, E)] =
     !! sigma_min(Z) using reverse communication with DLACON.
     !! DTGSY2 also (IJOB >= 1) contributes to the computation in DTGSYL
     !! of an upper bound on the separation between to matrix pairs. Then
     !! the input (A, D), (B, E) are sub-pencils of the matrix pair in
     !! DTGSYL. See DTGSYL for details.
               ldf, scale, rdsum, rdscal,iwork, pq, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, m, n
           integer(${ik}$), intent(out) :: info, pq
           real(dp), intent(inout) :: rdscal, rdsum
           real(dp), intent(out) :: scale
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           real(dp), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*)
           real(dp), intent(inout) :: c(ldc,*), f(ldf,*)
        ! =====================================================================
        ! replaced various illegal calls to stdlib${ii}$_dcopy by calls to stdlib${ii}$_dlaset.
        ! sven hammarling, 27/5/02.
           ! Parameters 
           integer(${ik}$), parameter :: ldz = 8_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: notran
           integer(${ik}$) :: i, ie, ierr, ii, is, isp1, j, je, jj, js, jsp1, k, mb, nb, p, q, &
                     zdim
           real(dp) :: alpha, scaloc
           ! Local Arrays 
           integer(${ik}$) :: ipiv(ldz), jpiv(ldz)
           real(dp) :: rhs(ldz), z(ldz,ldz)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test input parameters
           info = 0_${ik}$
           ierr = 0_${ik}$
           notran = stdlib_lsame( trans, 'N' )
           if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -1_${ik}$
           else if( notran ) then
              if( ( ijob<0_${ik}$ ) .or. ( ijob>2_${ik}$ ) ) then
                 info = -2_${ik}$
              end if
           end if
           if( info==0_${ik}$ ) then
              if( m<=0_${ik}$ ) then
                 info = -3_${ik}$
              else if( n<=0_${ik}$ ) then
                 info = -4_${ik}$
              else if( lda<max( 1_${ik}$, m ) ) then
                 info = -6_${ik}$
              else if( ldb<max( 1_${ik}$, n ) ) then
                 info = -8_${ik}$
              else if( ldc<max( 1_${ik}$, m ) ) then
                 info = -10_${ik}$
              else if( ldd<max( 1_${ik}$, m ) ) then
                 info = -12_${ik}$
              else if( lde<max( 1_${ik}$, n ) ) then
                 info = -14_${ik}$
              else if( ldf<max( 1_${ik}$, m ) ) then
                 info = -16_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTGSY2', -info )
              return
           end if
           ! determine block structure of a
           pq = 0_${ik}$
           p = 0_${ik}$
           i = 1_${ik}$
           10 continue
           if( i>m )go to 20
           p = p + 1_${ik}$
           iwork( p ) = i
           if( i==m )go to 20
           if( a( i+1, i )/=zero ) then
              i = i + 2_${ik}$
           else
              i = i + 1_${ik}$
           end if
           go to 10
           20 continue
           iwork( p+1 ) = m + 1_${ik}$
           ! determine block structure of b
           q = p + 1_${ik}$
           j = 1_${ik}$
           30 continue
           if( j>n )go to 40
           q = q + 1_${ik}$
           iwork( q ) = j
           if( j==n )go to 40
           if( b( j+1, j )/=zero ) then
              j = j + 2_${ik}$
           else
              j = j + 1_${ik}$
           end if
           go to 30
           40 continue
           iwork( q+1 ) = n + 1_${ik}$
           pq = p*( q-p-1 )
           if( notran ) then
              ! solve (i, j) - subsystem
                 ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j)
                 ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j)
              ! for i = p, p - 1, ..., 1; j = 1, 2, ..., q
              scale = one
              scaloc = one
              loop_120: do j = p + 2, q
                 js = iwork( j )
                 jsp1 = js + 1_${ik}$
                 je = iwork( j+1 ) - 1_${ik}$
                 nb = je - js + 1_${ik}$
                 loop_110: do i = p, 1, -1
                    is = iwork( i )
                    isp1 = is + 1_${ik}$
                    ie = iwork( i+1 ) - 1_${ik}$
                    mb = ie - is + 1_${ik}$
                    zdim = mb*nb*2_${ik}$
                    if( ( mb==1_${ik}$ ) .and. ( nb==1_${ik}$ ) ) then
                       ! build a 2-by-2 system z * x = rhs
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = d( is, is )
                       z( 1_${ik}$, 2_${ik}$ ) = -b( js, js )
                       z( 2_${ik}$, 2_${ik}$ ) = -e( js, js )
                       ! set up right hand side(s)
                       rhs( 1_${ik}$ ) = c( is, js )
                       rhs( 2_${ik}$ ) = f( is, js )
                       ! solve z * x = rhs
                       call stdlib${ii}$_dgetc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       if( ijob==0_${ik}$ ) then
                          call stdlib${ii}$_dgesc2( zdim, z, ldz, rhs, ipiv, jpiv,scaloc )
                          if( scaloc/=one ) then
                             do k = 1, n
                                call stdlib${ii}$_dscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                                call stdlib${ii}$_dscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                             end do
                             scale = scale*scaloc
                          end if
                       else
                          call stdlib${ii}$_dlatdf( ijob, zdim, z, ldz, rhs, rdsum,rdscal, ipiv, jpiv )
                                    
                       end if
                       ! unpack solution vector(s)
                       c( is, js ) = rhs( 1_${ik}$ )
                       f( is, js ) = rhs( 2_${ik}$ )
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( i>1_${ik}$ ) then
                          alpha = -rhs( 1_${ik}$ )
                          call stdlib${ii}$_daxpy( is-1, alpha, a( 1_${ik}$, is ), 1_${ik}$, c( 1_${ik}$, js ),1_${ik}$ )
                          call stdlib${ii}$_daxpy( is-1, alpha, d( 1_${ik}$, is ), 1_${ik}$, f( 1_${ik}$, js ),1_${ik}$ )
                       end if
                       if( j<q ) then
                          call stdlib${ii}$_daxpy( n-je, rhs( 2_${ik}$ ), b( js, je+1 ), ldb,c( is, je+1 ), &
                                    ldc )
                          call stdlib${ii}$_daxpy( n-je, rhs( 2_${ik}$ ), e( js, je+1 ), lde,f( is, je+1 ), &
                                    ldf )
                       end if
                    else if( ( mb==1_${ik}$ ) .and. ( nb==2_${ik}$ ) ) then
                       ! build a 4-by-4 system z * x = rhs
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = zero
                       z( 3_${ik}$, 1_${ik}$ ) = d( is, is )
                       z( 4_${ik}$, 1_${ik}$ ) = zero
                       z( 1_${ik}$, 2_${ik}$ ) = zero
                       z( 2_${ik}$, 2_${ik}$ ) = a( is, is )
                       z( 3_${ik}$, 2_${ik}$ ) = zero
                       z( 4_${ik}$, 2_${ik}$ ) = d( is, is )
                       z( 1_${ik}$, 3_${ik}$ ) = -b( js, js )
                       z( 2_${ik}$, 3_${ik}$ ) = -b( js, jsp1 )
                       z( 3_${ik}$, 3_${ik}$ ) = -e( js, js )
                       z( 4_${ik}$, 3_${ik}$ ) = -e( js, jsp1 )
                       z( 1_${ik}$, 4_${ik}$ ) = -b( jsp1, js )
                       z( 2_${ik}$, 4_${ik}$ ) = -b( jsp1, jsp1 )
                       z( 3_${ik}$, 4_${ik}$ ) = zero
                       z( 4_${ik}$, 4_${ik}$ ) = -e( jsp1, jsp1 )
                       ! set up right hand side(s)
                       rhs( 1_${ik}$ ) = c( is, js )
                       rhs( 2_${ik}$ ) = c( is, jsp1 )
                       rhs( 3_${ik}$ ) = f( is, js )
                       rhs( 4_${ik}$ ) = f( is, jsp1 )
                       ! solve z * x = rhs
                       call stdlib${ii}$_dgetc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       if( ijob==0_${ik}$ ) then
                          call stdlib${ii}$_dgesc2( zdim, z, ldz, rhs, ipiv, jpiv,scaloc )
                          if( scaloc/=one ) then
                             do k = 1, n
                                call stdlib${ii}$_dscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                                call stdlib${ii}$_dscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                             end do
                             scale = scale*scaloc
                          end if
                       else
                          call stdlib${ii}$_dlatdf( ijob, zdim, z, ldz, rhs, rdsum,rdscal, ipiv, jpiv )
                                    
                       end if
                       ! unpack solution vector(s)
                       c( is, js ) = rhs( 1_${ik}$ )
                       c( is, jsp1 ) = rhs( 2_${ik}$ )
                       f( is, js ) = rhs( 3_${ik}$ )
                       f( is, jsp1 ) = rhs( 4_${ik}$ )
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( i>1_${ik}$ ) then
                          call stdlib${ii}$_dger( is-1, nb, -one, a( 1_${ik}$, is ), 1_${ik}$, rhs( 1_${ik}$ ),1_${ik}$, c( 1_${ik}$, js ),&
                                     ldc )
                          call stdlib${ii}$_dger( is-1, nb, -one, d( 1_${ik}$, is ), 1_${ik}$, rhs( 1_${ik}$ ),1_${ik}$, f( 1_${ik}$, js ),&
                                     ldf )
                       end if
                       if( j<q ) then
                          call stdlib${ii}$_daxpy( n-je, rhs( 3_${ik}$ ), b( js, je+1 ), ldb,c( is, je+1 ), &
                                    ldc )
                          call stdlib${ii}$_daxpy( n-je, rhs( 3_${ik}$ ), e( js, je+1 ), lde,f( is, je+1 ), &
                                    ldf )
                          call stdlib${ii}$_daxpy( n-je, rhs( 4_${ik}$ ), b( jsp1, je+1 ), ldb,c( is, je+1 ), &
                                    ldc )
                          call stdlib${ii}$_daxpy( n-je, rhs( 4_${ik}$ ), e( jsp1, je+1 ), lde,f( is, je+1 ), &
                                    ldf )
                       end if
                    else if( ( mb==2_${ik}$ ) .and. ( nb==1_${ik}$ ) ) then
                       ! build a 4-by-4 system z * x = rhs
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = a( isp1, is )
                       z( 3_${ik}$, 1_${ik}$ ) = d( is, is )
                       z( 4_${ik}$, 1_${ik}$ ) = zero
                       z( 1_${ik}$, 2_${ik}$ ) = a( is, isp1 )
                       z( 2_${ik}$, 2_${ik}$ ) = a( isp1, isp1 )
                       z( 3_${ik}$, 2_${ik}$ ) = d( is, isp1 )
                       z( 4_${ik}$, 2_${ik}$ ) = d( isp1, isp1 )
                       z( 1_${ik}$, 3_${ik}$ ) = -b( js, js )
                       z( 2_${ik}$, 3_${ik}$ ) = zero
                       z( 3_${ik}$, 3_${ik}$ ) = -e( js, js )
                       z( 4_${ik}$, 3_${ik}$ ) = zero
                       z( 1_${ik}$, 4_${ik}$ ) = zero
                       z( 2_${ik}$, 4_${ik}$ ) = -b( js, js )
                       z( 3_${ik}$, 4_${ik}$ ) = zero
                       z( 4_${ik}$, 4_${ik}$ ) = -e( js, js )
                       ! set up right hand side(s)
                       rhs( 1_${ik}$ ) = c( is, js )
                       rhs( 2_${ik}$ ) = c( isp1, js )
                       rhs( 3_${ik}$ ) = f( is, js )
                       rhs( 4_${ik}$ ) = f( isp1, js )
                       ! solve z * x = rhs
                       call stdlib${ii}$_dgetc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       if( ijob==0_${ik}$ ) then
                          call stdlib${ii}$_dgesc2( zdim, z, ldz, rhs, ipiv, jpiv,scaloc )
                          if( scaloc/=one ) then
                             do k = 1, n
                                call stdlib${ii}$_dscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                                call stdlib${ii}$_dscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                             end do
                             scale = scale*scaloc
                          end if
                       else
                          call stdlib${ii}$_dlatdf( ijob, zdim, z, ldz, rhs, rdsum,rdscal, ipiv, jpiv )
                                    
                       end if
                       ! unpack solution vector(s)
                       c( is, js ) = rhs( 1_${ik}$ )
                       c( isp1, js ) = rhs( 2_${ik}$ )
                       f( is, js ) = rhs( 3_${ik}$ )
                       f( isp1, js ) = rhs( 4_${ik}$ )
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( i>1_${ik}$ ) then
                          call stdlib${ii}$_dgemv( 'N', is-1, mb, -one, a( 1_${ik}$, is ), lda,rhs( 1_${ik}$ ), 1_${ik}$, &
                                    one, c( 1_${ik}$, js ), 1_${ik}$ )
                          call stdlib${ii}$_dgemv( 'N', is-1, mb, -one, d( 1_${ik}$, is ), ldd,rhs( 1_${ik}$ ), 1_${ik}$, &
                                    one, f( 1_${ik}$, js ), 1_${ik}$ )
                       end if
                       if( j<q ) then
                          call stdlib${ii}$_dger( mb, n-je, one, rhs( 3_${ik}$ ), 1_${ik}$,b( js, je+1 ), ldb, c( is, &
                                    je+1 ), ldc )
                          call stdlib${ii}$_dger( mb, n-je, one, rhs( 3_${ik}$ ), 1_${ik}$,e( js, je+1 ), lde, f( is, &
                                    je+1 ), ldf )
                       end if
                    else if( ( mb==2_${ik}$ ) .and. ( nb==2_${ik}$ ) ) then
                       ! build an 8-by-8 system z * x = rhs
                       call stdlib${ii}$_dlaset( 'F', ldz, ldz, zero, zero, z, ldz )
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = a( isp1, is )
                       z( 5_${ik}$, 1_${ik}$ ) = d( is, is )
                       z( 1_${ik}$, 2_${ik}$ ) = a( is, isp1 )
                       z( 2_${ik}$, 2_${ik}$ ) = a( isp1, isp1 )
                       z( 5_${ik}$, 2_${ik}$ ) = d( is, isp1 )
                       z( 6_${ik}$, 2_${ik}$ ) = d( isp1, isp1 )
                       z( 3_${ik}$, 3_${ik}$ ) = a( is, is )
                       z( 4_${ik}$, 3_${ik}$ ) = a( isp1, is )
                       z( 7_${ik}$, 3_${ik}$ ) = d( is, is )
                       z( 3_${ik}$, 4_${ik}$ ) = a( is, isp1 )
                       z( 4_${ik}$, 4_${ik}$ ) = a( isp1, isp1 )
                       z( 7_${ik}$, 4_${ik}$ ) = d( is, isp1 )
                       z( 8_${ik}$, 4_${ik}$ ) = d( isp1, isp1 )
                       z( 1_${ik}$, 5_${ik}$ ) = -b( js, js )
                       z( 3_${ik}$, 5_${ik}$ ) = -b( js, jsp1 )
                       z( 5_${ik}$, 5_${ik}$ ) = -e( js, js )
                       z( 7_${ik}$, 5_${ik}$ ) = -e( js, jsp1 )
                       z( 2_${ik}$, 6_${ik}$ ) = -b( js, js )
                       z( 4_${ik}$, 6_${ik}$ ) = -b( js, jsp1 )
                       z( 6_${ik}$, 6_${ik}$ ) = -e( js, js )
                       z( 8_${ik}$, 6_${ik}$ ) = -e( js, jsp1 )
                       z( 1_${ik}$, 7_${ik}$ ) = -b( jsp1, js )
                       z( 3_${ik}$, 7_${ik}$ ) = -b( jsp1, jsp1 )
                       z( 7_${ik}$, 7_${ik}$ ) = -e( jsp1, jsp1 )
                       z( 2_${ik}$, 8_${ik}$ ) = -b( jsp1, js )
                       z( 4_${ik}$, 8_${ik}$ ) = -b( jsp1, jsp1 )
                       z( 8_${ik}$, 8_${ik}$ ) = -e( jsp1, jsp1 )
                       ! set up right hand side(s)
                       k = 1_${ik}$
                       ii = mb*nb + 1_${ik}$
                       do jj = 0, nb - 1
                          call stdlib${ii}$_dcopy( mb, c( is, js+jj ), 1_${ik}$, rhs( k ), 1_${ik}$ )
                          call stdlib${ii}$_dcopy( mb, f( is, js+jj ), 1_${ik}$, rhs( ii ), 1_${ik}$ )
                          k = k + mb
                          ii = ii + mb
                       end do
                       ! solve z * x = rhs
                       call stdlib${ii}$_dgetc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       if( ijob==0_${ik}$ ) then
                          call stdlib${ii}$_dgesc2( zdim, z, ldz, rhs, ipiv, jpiv,scaloc )
                          if( scaloc/=one ) then
                             do k = 1, n
                                call stdlib${ii}$_dscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                                call stdlib${ii}$_dscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                             end do
                             scale = scale*scaloc
                          end if
                       else
                          call stdlib${ii}$_dlatdf( ijob, zdim, z, ldz, rhs, rdsum,rdscal, ipiv, jpiv )
                                    
                       end if
                       ! unpack solution vector(s)
                       k = 1_${ik}$
                       ii = mb*nb + 1_${ik}$
                       do jj = 0, nb - 1
                          call stdlib${ii}$_dcopy( mb, rhs( k ), 1_${ik}$, c( is, js+jj ), 1_${ik}$ )
                          call stdlib${ii}$_dcopy( mb, rhs( ii ), 1_${ik}$, f( is, js+jj ), 1_${ik}$ )
                          k = k + mb
                          ii = ii + mb
                       end do
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( i>1_${ik}$ ) then
                          call stdlib${ii}$_dgemm( 'N', 'N', is-1, nb, mb, -one,a( 1_${ik}$, is ), lda, rhs( 1_${ik}$ &
                                    ), mb, one,c( 1_${ik}$, js ), ldc )
                          call stdlib${ii}$_dgemm( 'N', 'N', is-1, nb, mb, -one,d( 1_${ik}$, is ), ldd, rhs( 1_${ik}$ &
                                    ), mb, one,f( 1_${ik}$, js ), ldf )
                       end if
                       if( j<q ) then
                          k = mb*nb + 1_${ik}$
                          call stdlib${ii}$_dgemm( 'N', 'N', mb, n-je, nb, one, rhs( k ),mb, b( js, je+&
                                    1_${ik}$ ), ldb, one,c( is, je+1 ), ldc )
                          call stdlib${ii}$_dgemm( 'N', 'N', mb, n-je, nb, one, rhs( k ),mb, e( js, je+&
                                    1_${ik}$ ), lde, one,f( is, je+1 ), ldf )
                       end if
                    end if
                 end do loop_110
              end do loop_120
           else
              ! solve (i, j) - subsystem
                   ! a(i, i)**t * r(i, j) + d(i, i)**t * l(j, j)  =  c(i, j)
                   ! r(i, i)  * b(j, j) + l(i, j)  * e(j, j)  = -f(i, j)
              ! for i = 1, 2, ..., p, j = q, q - 1, ..., 1
              scale = one
              scaloc = one
              loop_200: do i = 1, p
                 is = iwork( i )
                 isp1 = is + 1_${ik}$
                 ie = iwork ( i+1 ) - 1_${ik}$
                 mb = ie - is + 1_${ik}$
                 loop_190: do j = q, p + 2, -1
                    js = iwork( j )
                    jsp1 = js + 1_${ik}$
                    je = iwork( j+1 ) - 1_${ik}$
                    nb = je - js + 1_${ik}$
                    zdim = mb*nb*2_${ik}$
                    if( ( mb==1_${ik}$ ) .and. ( nb==1_${ik}$ ) ) then
                       ! build a 2-by-2 system z**t * x = rhs
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = -b( js, js )
                       z( 1_${ik}$, 2_${ik}$ ) = d( is, is )
                       z( 2_${ik}$, 2_${ik}$ ) = -e( js, js )
                       ! set up right hand side(s)
                       rhs( 1_${ik}$ ) = c( is, js )
                       rhs( 2_${ik}$ ) = f( is, js )
                       ! solve z**t * x = rhs
                       call stdlib${ii}$_dgetc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       call stdlib${ii}$_dgesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc )
                       if( scaloc/=one ) then
                          do k = 1, n
                             call stdlib${ii}$_dscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_dscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          scale = scale*scaloc
                       end if
                       ! unpack solution vector(s)
                       c( is, js ) = rhs( 1_${ik}$ )
                       f( is, js ) = rhs( 2_${ik}$ )
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( j>p+2 ) then
                          alpha = rhs( 1_${ik}$ )
                          call stdlib${ii}$_daxpy( js-1, alpha, b( 1_${ik}$, js ), 1_${ik}$, f( is, 1_${ik}$ ),ldf )
                          alpha = rhs( 2_${ik}$ )
                          call stdlib${ii}$_daxpy( js-1, alpha, e( 1_${ik}$, js ), 1_${ik}$, f( is, 1_${ik}$ ),ldf )
                       end if
                       if( i<p ) then
                          alpha = -rhs( 1_${ik}$ )
                          call stdlib${ii}$_daxpy( m-ie, alpha, a( is, ie+1 ), lda,c( ie+1, js ), 1_${ik}$ )
                                    
                          alpha = -rhs( 2_${ik}$ )
                          call stdlib${ii}$_daxpy( m-ie, alpha, d( is, ie+1 ), ldd,c( ie+1, js ), 1_${ik}$ )
                                    
                       end if
                    else if( ( mb==1_${ik}$ ) .and. ( nb==2_${ik}$ ) ) then
                       ! build a 4-by-4 system z**t * x = rhs
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = zero
                       z( 3_${ik}$, 1_${ik}$ ) = -b( js, js )
                       z( 4_${ik}$, 1_${ik}$ ) = -b( jsp1, js )
                       z( 1_${ik}$, 2_${ik}$ ) = zero
                       z( 2_${ik}$, 2_${ik}$ ) = a( is, is )
                       z( 3_${ik}$, 2_${ik}$ ) = -b( js, jsp1 )
                       z( 4_${ik}$, 2_${ik}$ ) = -b( jsp1, jsp1 )
                       z( 1_${ik}$, 3_${ik}$ ) = d( is, is )
                       z( 2_${ik}$, 3_${ik}$ ) = zero
                       z( 3_${ik}$, 3_${ik}$ ) = -e( js, js )
                       z( 4_${ik}$, 3_${ik}$ ) = zero
                       z( 1_${ik}$, 4_${ik}$ ) = zero
                       z( 2_${ik}$, 4_${ik}$ ) = d( is, is )
                       z( 3_${ik}$, 4_${ik}$ ) = -e( js, jsp1 )
                       z( 4_${ik}$, 4_${ik}$ ) = -e( jsp1, jsp1 )
                       ! set up right hand side(s)
                       rhs( 1_${ik}$ ) = c( is, js )
                       rhs( 2_${ik}$ ) = c( is, jsp1 )
                       rhs( 3_${ik}$ ) = f( is, js )
                       rhs( 4_${ik}$ ) = f( is, jsp1 )
                       ! solve z**t * x = rhs
                       call stdlib${ii}$_dgetc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       call stdlib${ii}$_dgesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc )
                       if( scaloc/=one ) then
                          do k = 1, n
                             call stdlib${ii}$_dscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_dscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          scale = scale*scaloc
                       end if
                       ! unpack solution vector(s)
                       c( is, js ) = rhs( 1_${ik}$ )
                       c( is, jsp1 ) = rhs( 2_${ik}$ )
                       f( is, js ) = rhs( 3_${ik}$ )
                       f( is, jsp1 ) = rhs( 4_${ik}$ )
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( j>p+2 ) then
                          call stdlib${ii}$_daxpy( js-1, rhs( 1_${ik}$ ), b( 1_${ik}$, js ), 1_${ik}$,f( is, 1_${ik}$ ), ldf )
                                    
                          call stdlib${ii}$_daxpy( js-1, rhs( 2_${ik}$ ), b( 1_${ik}$, jsp1 ), 1_${ik}$,f( is, 1_${ik}$ ), ldf )
                                    
                          call stdlib${ii}$_daxpy( js-1, rhs( 3_${ik}$ ), e( 1_${ik}$, js ), 1_${ik}$,f( is, 1_${ik}$ ), ldf )
                                    
                          call stdlib${ii}$_daxpy( js-1, rhs( 4_${ik}$ ), e( 1_${ik}$, jsp1 ), 1_${ik}$,f( is, 1_${ik}$ ), ldf )
                                    
                       end if
                       if( i<p ) then
                          call stdlib${ii}$_dger( m-ie, nb, -one, a( is, ie+1 ), lda,rhs( 1_${ik}$ ), 1_${ik}$, c( ie+&
                                    1_${ik}$, js ), ldc )
                          call stdlib${ii}$_dger( m-ie, nb, -one, d( is, ie+1 ), ldd,rhs( 3_${ik}$ ), 1_${ik}$, c( ie+&
                                    1_${ik}$, js ), ldc )
                       end if
                    else if( ( mb==2_${ik}$ ) .and. ( nb==1_${ik}$ ) ) then
                       ! build a 4-by-4 system z**t * x = rhs
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = a( is, isp1 )
                       z( 3_${ik}$, 1_${ik}$ ) = -b( js, js )
                       z( 4_${ik}$, 1_${ik}$ ) = zero
                       z( 1_${ik}$, 2_${ik}$ ) = a( isp1, is )
                       z( 2_${ik}$, 2_${ik}$ ) = a( isp1, isp1 )
                       z( 3_${ik}$, 2_${ik}$ ) = zero
                       z( 4_${ik}$, 2_${ik}$ ) = -b( js, js )
                       z( 1_${ik}$, 3_${ik}$ ) = d( is, is )
                       z( 2_${ik}$, 3_${ik}$ ) = d( is, isp1 )
                       z( 3_${ik}$, 3_${ik}$ ) = -e( js, js )
                       z( 4_${ik}$, 3_${ik}$ ) = zero
                       z( 1_${ik}$, 4_${ik}$ ) = zero
                       z( 2_${ik}$, 4_${ik}$ ) = d( isp1, isp1 )
                       z( 3_${ik}$, 4_${ik}$ ) = zero
                       z( 4_${ik}$, 4_${ik}$ ) = -e( js, js )
                       ! set up right hand side(s)
                       rhs( 1_${ik}$ ) = c( is, js )
                       rhs( 2_${ik}$ ) = c( isp1, js )
                       rhs( 3_${ik}$ ) = f( is, js )
                       rhs( 4_${ik}$ ) = f( isp1, js )
                       ! solve z**t * x = rhs
                       call stdlib${ii}$_dgetc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       call stdlib${ii}$_dgesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc )
                       if( scaloc/=one ) then
                          do k = 1, n
                             call stdlib${ii}$_dscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_dscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          scale = scale*scaloc
                       end if
                       ! unpack solution vector(s)
                       c( is, js ) = rhs( 1_${ik}$ )
                       c( isp1, js ) = rhs( 2_${ik}$ )
                       f( is, js ) = rhs( 3_${ik}$ )
                       f( isp1, js ) = rhs( 4_${ik}$ )
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( j>p+2 ) then
                          call stdlib${ii}$_dger( mb, js-1, one, rhs( 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, js ),1_${ik}$, f( is, 1_${ik}$ ), &
                                    ldf )
                          call stdlib${ii}$_dger( mb, js-1, one, rhs( 3_${ik}$ ), 1_${ik}$, e( 1_${ik}$, js ),1_${ik}$, f( is, 1_${ik}$ ), &
                                    ldf )
                       end if
                       if( i<p ) then
                          call stdlib${ii}$_dgemv( 'T', mb, m-ie, -one, a( is, ie+1 ),lda, rhs( 1_${ik}$ ), 1_${ik}$, &
                                    one, c( ie+1, js ),1_${ik}$ )
                          call stdlib${ii}$_dgemv( 'T', mb, m-ie, -one, d( is, ie+1 ),ldd, rhs( 3_${ik}$ ), 1_${ik}$, &
                                    one, c( ie+1, js ),1_${ik}$ )
                       end if
                    else if( ( mb==2_${ik}$ ) .and. ( nb==2_${ik}$ ) ) then
                       ! build an 8-by-8 system z**t * x = rhs
                       call stdlib${ii}$_dlaset( 'F', ldz, ldz, zero, zero, z, ldz )
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = a( is, isp1 )
                       z( 5_${ik}$, 1_${ik}$ ) = -b( js, js )
                       z( 7_${ik}$, 1_${ik}$ ) = -b( jsp1, js )
                       z( 1_${ik}$, 2_${ik}$ ) = a( isp1, is )
                       z( 2_${ik}$, 2_${ik}$ ) = a( isp1, isp1 )
                       z( 6_${ik}$, 2_${ik}$ ) = -b( js, js )
                       z( 8_${ik}$, 2_${ik}$ ) = -b( jsp1, js )
                       z( 3_${ik}$, 3_${ik}$ ) = a( is, is )
                       z( 4_${ik}$, 3_${ik}$ ) = a( is, isp1 )
                       z( 5_${ik}$, 3_${ik}$ ) = -b( js, jsp1 )
                       z( 7_${ik}$, 3_${ik}$ ) = -b( jsp1, jsp1 )
                       z( 3_${ik}$, 4_${ik}$ ) = a( isp1, is )
                       z( 4_${ik}$, 4_${ik}$ ) = a( isp1, isp1 )
                       z( 6_${ik}$, 4_${ik}$ ) = -b( js, jsp1 )
                       z( 8_${ik}$, 4_${ik}$ ) = -b( jsp1, jsp1 )
                       z( 1_${ik}$, 5_${ik}$ ) = d( is, is )
                       z( 2_${ik}$, 5_${ik}$ ) = d( is, isp1 )
                       z( 5_${ik}$, 5_${ik}$ ) = -e( js, js )
                       z( 2_${ik}$, 6_${ik}$ ) = d( isp1, isp1 )
                       z( 6_${ik}$, 6_${ik}$ ) = -e( js, js )
                       z( 3_${ik}$, 7_${ik}$ ) = d( is, is )
                       z( 4_${ik}$, 7_${ik}$ ) = d( is, isp1 )
                       z( 5_${ik}$, 7_${ik}$ ) = -e( js, jsp1 )
                       z( 7_${ik}$, 7_${ik}$ ) = -e( jsp1, jsp1 )
                       z( 4_${ik}$, 8_${ik}$ ) = d( isp1, isp1 )
                       z( 6_${ik}$, 8_${ik}$ ) = -e( js, jsp1 )
                       z( 8_${ik}$, 8_${ik}$ ) = -e( jsp1, jsp1 )
                       ! set up right hand side(s)
                       k = 1_${ik}$
                       ii = mb*nb + 1_${ik}$
                       do jj = 0, nb - 1
                          call stdlib${ii}$_dcopy( mb, c( is, js+jj ), 1_${ik}$, rhs( k ), 1_${ik}$ )
                          call stdlib${ii}$_dcopy( mb, f( is, js+jj ), 1_${ik}$, rhs( ii ), 1_${ik}$ )
                          k = k + mb
                          ii = ii + mb
                       end do
                       ! solve z**t * x = rhs
                       call stdlib${ii}$_dgetc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       call stdlib${ii}$_dgesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc )
                       if( scaloc/=one ) then
                          do k = 1, n
                             call stdlib${ii}$_dscal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_dscal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          scale = scale*scaloc
                       end if
                       ! unpack solution vector(s)
                       k = 1_${ik}$
                       ii = mb*nb + 1_${ik}$
                       do jj = 0, nb - 1
                          call stdlib${ii}$_dcopy( mb, rhs( k ), 1_${ik}$, c( is, js+jj ), 1_${ik}$ )
                          call stdlib${ii}$_dcopy( mb, rhs( ii ), 1_${ik}$, f( is, js+jj ), 1_${ik}$ )
                          k = k + mb
                          ii = ii + mb
                       end do
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( j>p+2 ) then
                          call stdlib${ii}$_dgemm( 'N', 'T', mb, js-1, nb, one,c( is, js ), ldc, b( 1_${ik}$, &
                                    js ), ldb, one,f( is, 1_${ik}$ ), ldf )
                          call stdlib${ii}$_dgemm( 'N', 'T', mb, js-1, nb, one,f( is, js ), ldf, e( 1_${ik}$, &
                                    js ), lde, one,f( is, 1_${ik}$ ), ldf )
                       end if
                       if( i<p ) then
                          call stdlib${ii}$_dgemm( 'T', 'N', m-ie, nb, mb, -one,a( is, ie+1 ), lda, c( &
                                    is, js ), ldc,one, c( ie+1, js ), ldc )
                          call stdlib${ii}$_dgemm( 'T', 'N', m-ie, nb, mb, -one,d( is, ie+1 ), ldd, f( &
                                    is, js ), ldf,one, c( ie+1, js ), ldc )
                       end if
                    end if
                 end do loop_190
              end do loop_200
           end if
           return
     end subroutine stdlib${ii}$_dtgsy2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$tgsy2( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, &
     !! DTGSY2: solves the generalized Sylvester equation:
     !! A * R - L * B = scale * C                (1)
     !! D * R - L * E = scale * F,
     !! using Level 1 and 2 BLAS. where R and L are unknown M-by-N matrices,
     !! (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M,
     !! N-by-N and M-by-N, respectively, with real entries. (A, D) and (B, E)
     !! must be in generalized Schur canonical form, i.e. A, B are upper
     !! quasi triangular and D, E are upper triangular. The solution (R, L)
     !! overwrites (C, F). 0 <= SCALE <= 1 is an output scaling factor
     !! chosen to avoid overflow.
     !! In matrix notation solving equation (1) corresponds to solve
     !! Z*x = scale*b, where Z is defined as
     !! Z = [ kron(In, A)  -kron(B**T, Im) ]             (2)
     !! [ kron(In, D)  -kron(E**T, Im) ],
     !! Ik is the identity matrix of size k and X**T is the transpose of X.
     !! kron(X, Y) is the Kronecker product between the matrices X and Y.
     !! In the process of solving (1), we solve a number of such systems
     !! where Dim(In), Dim(In) = 1 or 2.
     !! If TRANS = 'T', solve the transposed system Z**T*y = scale*b for y,
     !! which is equivalent to solve for R and L in
     !! A**T * R  + D**T * L   = scale * C           (3)
     !! R  * B**T + L  * E**T  = scale * -F
     !! This case is used to compute an estimate of Dif[(A, D), (B, E)] =
     !! sigma_min(Z) using reverse communication with DLACON.
     !! DTGSY2 also (IJOB >= 1) contributes to the computation in DTGSYL
     !! of an upper bound on the separation between to matrix pairs. Then
     !! the input (A, D), (B, E) are sub-pencils of the matrix pair in
     !! DTGSYL. See DTGSYL for details.
               ldf, scale, rdsum, rdscal,iwork, pq, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, m, n
           integer(${ik}$), intent(out) :: info, pq
           real(${rk}$), intent(inout) :: rdscal, rdsum
           real(${rk}$), intent(out) :: scale
           ! Array Arguments 
           integer(${ik}$), intent(out) :: iwork(*)
           real(${rk}$), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*)
           real(${rk}$), intent(inout) :: c(ldc,*), f(ldf,*)
        ! =====================================================================
        ! replaced various illegal calls to stdlib${ii}$_${ri}$copy by calls to stdlib${ii}$_${ri}$laset.
        ! sven hammarling, 27/5/02.
           ! Parameters 
           integer(${ik}$), parameter :: ldz = 8_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: notran
           integer(${ik}$) :: i, ie, ierr, ii, is, isp1, j, je, jj, js, jsp1, k, mb, nb, p, q, &
                     zdim
           real(${rk}$) :: alpha, scaloc
           ! Local Arrays 
           integer(${ik}$) :: ipiv(ldz), jpiv(ldz)
           real(${rk}$) :: rhs(ldz), z(ldz,ldz)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test input parameters
           info = 0_${ik}$
           ierr = 0_${ik}$
           notran = stdlib_lsame( trans, 'N' )
           if( .not.notran .and. .not.stdlib_lsame( trans, 'T' ) ) then
              info = -1_${ik}$
           else if( notran ) then
              if( ( ijob<0_${ik}$ ) .or. ( ijob>2_${ik}$ ) ) then
                 info = -2_${ik}$
              end if
           end if
           if( info==0_${ik}$ ) then
              if( m<=0_${ik}$ ) then
                 info = -3_${ik}$
              else if( n<=0_${ik}$ ) then
                 info = -4_${ik}$
              else if( lda<max( 1_${ik}$, m ) ) then
                 info = -6_${ik}$
              else if( ldb<max( 1_${ik}$, n ) ) then
                 info = -8_${ik}$
              else if( ldc<max( 1_${ik}$, m ) ) then
                 info = -10_${ik}$
              else if( ldd<max( 1_${ik}$, m ) ) then
                 info = -12_${ik}$
              else if( lde<max( 1_${ik}$, n ) ) then
                 info = -14_${ik}$
              else if( ldf<max( 1_${ik}$, m ) ) then
                 info = -16_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTGSY2', -info )
              return
           end if
           ! determine block structure of a
           pq = 0_${ik}$
           p = 0_${ik}$
           i = 1_${ik}$
           10 continue
           if( i>m )go to 20
           p = p + 1_${ik}$
           iwork( p ) = i
           if( i==m )go to 20
           if( a( i+1, i )/=zero ) then
              i = i + 2_${ik}$
           else
              i = i + 1_${ik}$
           end if
           go to 10
           20 continue
           iwork( p+1 ) = m + 1_${ik}$
           ! determine block structure of b
           q = p + 1_${ik}$
           j = 1_${ik}$
           30 continue
           if( j>n )go to 40
           q = q + 1_${ik}$
           iwork( q ) = j
           if( j==n )go to 40
           if( b( j+1, j )/=zero ) then
              j = j + 2_${ik}$
           else
              j = j + 1_${ik}$
           end if
           go to 30
           40 continue
           iwork( q+1 ) = n + 1_${ik}$
           pq = p*( q-p-1 )
           if( notran ) then
              ! solve (i, j) - subsystem
                 ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j)
                 ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j)
              ! for i = p, p - 1, ..., 1; j = 1, 2, ..., q
              scale = one
              scaloc = one
              loop_120: do j = p + 2, q
                 js = iwork( j )
                 jsp1 = js + 1_${ik}$
                 je = iwork( j+1 ) - 1_${ik}$
                 nb = je - js + 1_${ik}$
                 loop_110: do i = p, 1, -1
                    is = iwork( i )
                    isp1 = is + 1_${ik}$
                    ie = iwork( i+1 ) - 1_${ik}$
                    mb = ie - is + 1_${ik}$
                    zdim = mb*nb*2_${ik}$
                    if( ( mb==1_${ik}$ ) .and. ( nb==1_${ik}$ ) ) then
                       ! build a 2-by-2 system z * x = rhs
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = d( is, is )
                       z( 1_${ik}$, 2_${ik}$ ) = -b( js, js )
                       z( 2_${ik}$, 2_${ik}$ ) = -e( js, js )
                       ! set up right hand side(s)
                       rhs( 1_${ik}$ ) = c( is, js )
                       rhs( 2_${ik}$ ) = f( is, js )
                       ! solve z * x = rhs
                       call stdlib${ii}$_${ri}$getc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       if( ijob==0_${ik}$ ) then
                          call stdlib${ii}$_${ri}$gesc2( zdim, z, ldz, rhs, ipiv, jpiv,scaloc )
                          if( scaloc/=one ) then
                             do k = 1, n
                                call stdlib${ii}$_${ri}$scal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                                call stdlib${ii}$_${ri}$scal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                             end do
                             scale = scale*scaloc
                          end if
                       else
                          call stdlib${ii}$_${ri}$latdf( ijob, zdim, z, ldz, rhs, rdsum,rdscal, ipiv, jpiv )
                                    
                       end if
                       ! unpack solution vector(s)
                       c( is, js ) = rhs( 1_${ik}$ )
                       f( is, js ) = rhs( 2_${ik}$ )
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( i>1_${ik}$ ) then
                          alpha = -rhs( 1_${ik}$ )
                          call stdlib${ii}$_${ri}$axpy( is-1, alpha, a( 1_${ik}$, is ), 1_${ik}$, c( 1_${ik}$, js ),1_${ik}$ )
                          call stdlib${ii}$_${ri}$axpy( is-1, alpha, d( 1_${ik}$, is ), 1_${ik}$, f( 1_${ik}$, js ),1_${ik}$ )
                       end if
                       if( j<q ) then
                          call stdlib${ii}$_${ri}$axpy( n-je, rhs( 2_${ik}$ ), b( js, je+1 ), ldb,c( is, je+1 ), &
                                    ldc )
                          call stdlib${ii}$_${ri}$axpy( n-je, rhs( 2_${ik}$ ), e( js, je+1 ), lde,f( is, je+1 ), &
                                    ldf )
                       end if
                    else if( ( mb==1_${ik}$ ) .and. ( nb==2_${ik}$ ) ) then
                       ! build a 4-by-4 system z * x = rhs
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = zero
                       z( 3_${ik}$, 1_${ik}$ ) = d( is, is )
                       z( 4_${ik}$, 1_${ik}$ ) = zero
                       z( 1_${ik}$, 2_${ik}$ ) = zero
                       z( 2_${ik}$, 2_${ik}$ ) = a( is, is )
                       z( 3_${ik}$, 2_${ik}$ ) = zero
                       z( 4_${ik}$, 2_${ik}$ ) = d( is, is )
                       z( 1_${ik}$, 3_${ik}$ ) = -b( js, js )
                       z( 2_${ik}$, 3_${ik}$ ) = -b( js, jsp1 )
                       z( 3_${ik}$, 3_${ik}$ ) = -e( js, js )
                       z( 4_${ik}$, 3_${ik}$ ) = -e( js, jsp1 )
                       z( 1_${ik}$, 4_${ik}$ ) = -b( jsp1, js )
                       z( 2_${ik}$, 4_${ik}$ ) = -b( jsp1, jsp1 )
                       z( 3_${ik}$, 4_${ik}$ ) = zero
                       z( 4_${ik}$, 4_${ik}$ ) = -e( jsp1, jsp1 )
                       ! set up right hand side(s)
                       rhs( 1_${ik}$ ) = c( is, js )
                       rhs( 2_${ik}$ ) = c( is, jsp1 )
                       rhs( 3_${ik}$ ) = f( is, js )
                       rhs( 4_${ik}$ ) = f( is, jsp1 )
                       ! solve z * x = rhs
                       call stdlib${ii}$_${ri}$getc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       if( ijob==0_${ik}$ ) then
                          call stdlib${ii}$_${ri}$gesc2( zdim, z, ldz, rhs, ipiv, jpiv,scaloc )
                          if( scaloc/=one ) then
                             do k = 1, n
                                call stdlib${ii}$_${ri}$scal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                                call stdlib${ii}$_${ri}$scal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                             end do
                             scale = scale*scaloc
                          end if
                       else
                          call stdlib${ii}$_${ri}$latdf( ijob, zdim, z, ldz, rhs, rdsum,rdscal, ipiv, jpiv )
                                    
                       end if
                       ! unpack solution vector(s)
                       c( is, js ) = rhs( 1_${ik}$ )
                       c( is, jsp1 ) = rhs( 2_${ik}$ )
                       f( is, js ) = rhs( 3_${ik}$ )
                       f( is, jsp1 ) = rhs( 4_${ik}$ )
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( i>1_${ik}$ ) then
                          call stdlib${ii}$_${ri}$ger( is-1, nb, -one, a( 1_${ik}$, is ), 1_${ik}$, rhs( 1_${ik}$ ),1_${ik}$, c( 1_${ik}$, js ),&
                                     ldc )
                          call stdlib${ii}$_${ri}$ger( is-1, nb, -one, d( 1_${ik}$, is ), 1_${ik}$, rhs( 1_${ik}$ ),1_${ik}$, f( 1_${ik}$, js ),&
                                     ldf )
                       end if
                       if( j<q ) then
                          call stdlib${ii}$_${ri}$axpy( n-je, rhs( 3_${ik}$ ), b( js, je+1 ), ldb,c( is, je+1 ), &
                                    ldc )
                          call stdlib${ii}$_${ri}$axpy( n-je, rhs( 3_${ik}$ ), e( js, je+1 ), lde,f( is, je+1 ), &
                                    ldf )
                          call stdlib${ii}$_${ri}$axpy( n-je, rhs( 4_${ik}$ ), b( jsp1, je+1 ), ldb,c( is, je+1 ), &
                                    ldc )
                          call stdlib${ii}$_${ri}$axpy( n-je, rhs( 4_${ik}$ ), e( jsp1, je+1 ), lde,f( is, je+1 ), &
                                    ldf )
                       end if
                    else if( ( mb==2_${ik}$ ) .and. ( nb==1_${ik}$ ) ) then
                       ! build a 4-by-4 system z * x = rhs
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = a( isp1, is )
                       z( 3_${ik}$, 1_${ik}$ ) = d( is, is )
                       z( 4_${ik}$, 1_${ik}$ ) = zero
                       z( 1_${ik}$, 2_${ik}$ ) = a( is, isp1 )
                       z( 2_${ik}$, 2_${ik}$ ) = a( isp1, isp1 )
                       z( 3_${ik}$, 2_${ik}$ ) = d( is, isp1 )
                       z( 4_${ik}$, 2_${ik}$ ) = d( isp1, isp1 )
                       z( 1_${ik}$, 3_${ik}$ ) = -b( js, js )
                       z( 2_${ik}$, 3_${ik}$ ) = zero
                       z( 3_${ik}$, 3_${ik}$ ) = -e( js, js )
                       z( 4_${ik}$, 3_${ik}$ ) = zero
                       z( 1_${ik}$, 4_${ik}$ ) = zero
                       z( 2_${ik}$, 4_${ik}$ ) = -b( js, js )
                       z( 3_${ik}$, 4_${ik}$ ) = zero
                       z( 4_${ik}$, 4_${ik}$ ) = -e( js, js )
                       ! set up right hand side(s)
                       rhs( 1_${ik}$ ) = c( is, js )
                       rhs( 2_${ik}$ ) = c( isp1, js )
                       rhs( 3_${ik}$ ) = f( is, js )
                       rhs( 4_${ik}$ ) = f( isp1, js )
                       ! solve z * x = rhs
                       call stdlib${ii}$_${ri}$getc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       if( ijob==0_${ik}$ ) then
                          call stdlib${ii}$_${ri}$gesc2( zdim, z, ldz, rhs, ipiv, jpiv,scaloc )
                          if( scaloc/=one ) then
                             do k = 1, n
                                call stdlib${ii}$_${ri}$scal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                                call stdlib${ii}$_${ri}$scal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                             end do
                             scale = scale*scaloc
                          end if
                       else
                          call stdlib${ii}$_${ri}$latdf( ijob, zdim, z, ldz, rhs, rdsum,rdscal, ipiv, jpiv )
                                    
                       end if
                       ! unpack solution vector(s)
                       c( is, js ) = rhs( 1_${ik}$ )
                       c( isp1, js ) = rhs( 2_${ik}$ )
                       f( is, js ) = rhs( 3_${ik}$ )
                       f( isp1, js ) = rhs( 4_${ik}$ )
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( i>1_${ik}$ ) then
                          call stdlib${ii}$_${ri}$gemv( 'N', is-1, mb, -one, a( 1_${ik}$, is ), lda,rhs( 1_${ik}$ ), 1_${ik}$, &
                                    one, c( 1_${ik}$, js ), 1_${ik}$ )
                          call stdlib${ii}$_${ri}$gemv( 'N', is-1, mb, -one, d( 1_${ik}$, is ), ldd,rhs( 1_${ik}$ ), 1_${ik}$, &
                                    one, f( 1_${ik}$, js ), 1_${ik}$ )
                       end if
                       if( j<q ) then
                          call stdlib${ii}$_${ri}$ger( mb, n-je, one, rhs( 3_${ik}$ ), 1_${ik}$,b( js, je+1 ), ldb, c( is, &
                                    je+1 ), ldc )
                          call stdlib${ii}$_${ri}$ger( mb, n-je, one, rhs( 3_${ik}$ ), 1_${ik}$,e( js, je+1 ), lde, f( is, &
                                    je+1 ), ldf )
                       end if
                    else if( ( mb==2_${ik}$ ) .and. ( nb==2_${ik}$ ) ) then
                       ! build an 8-by-8 system z * x = rhs
                       call stdlib${ii}$_${ri}$laset( 'F', ldz, ldz, zero, zero, z, ldz )
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = a( isp1, is )
                       z( 5_${ik}$, 1_${ik}$ ) = d( is, is )
                       z( 1_${ik}$, 2_${ik}$ ) = a( is, isp1 )
                       z( 2_${ik}$, 2_${ik}$ ) = a( isp1, isp1 )
                       z( 5_${ik}$, 2_${ik}$ ) = d( is, isp1 )
                       z( 6_${ik}$, 2_${ik}$ ) = d( isp1, isp1 )
                       z( 3_${ik}$, 3_${ik}$ ) = a( is, is )
                       z( 4_${ik}$, 3_${ik}$ ) = a( isp1, is )
                       z( 7_${ik}$, 3_${ik}$ ) = d( is, is )
                       z( 3_${ik}$, 4_${ik}$ ) = a( is, isp1 )
                       z( 4_${ik}$, 4_${ik}$ ) = a( isp1, isp1 )
                       z( 7_${ik}$, 4_${ik}$ ) = d( is, isp1 )
                       z( 8_${ik}$, 4_${ik}$ ) = d( isp1, isp1 )
                       z( 1_${ik}$, 5_${ik}$ ) = -b( js, js )
                       z( 3_${ik}$, 5_${ik}$ ) = -b( js, jsp1 )
                       z( 5_${ik}$, 5_${ik}$ ) = -e( js, js )
                       z( 7_${ik}$, 5_${ik}$ ) = -e( js, jsp1 )
                       z( 2_${ik}$, 6_${ik}$ ) = -b( js, js )
                       z( 4_${ik}$, 6_${ik}$ ) = -b( js, jsp1 )
                       z( 6_${ik}$, 6_${ik}$ ) = -e( js, js )
                       z( 8_${ik}$, 6_${ik}$ ) = -e( js, jsp1 )
                       z( 1_${ik}$, 7_${ik}$ ) = -b( jsp1, js )
                       z( 3_${ik}$, 7_${ik}$ ) = -b( jsp1, jsp1 )
                       z( 7_${ik}$, 7_${ik}$ ) = -e( jsp1, jsp1 )
                       z( 2_${ik}$, 8_${ik}$ ) = -b( jsp1, js )
                       z( 4_${ik}$, 8_${ik}$ ) = -b( jsp1, jsp1 )
                       z( 8_${ik}$, 8_${ik}$ ) = -e( jsp1, jsp1 )
                       ! set up right hand side(s)
                       k = 1_${ik}$
                       ii = mb*nb + 1_${ik}$
                       do jj = 0, nb - 1
                          call stdlib${ii}$_${ri}$copy( mb, c( is, js+jj ), 1_${ik}$, rhs( k ), 1_${ik}$ )
                          call stdlib${ii}$_${ri}$copy( mb, f( is, js+jj ), 1_${ik}$, rhs( ii ), 1_${ik}$ )
                          k = k + mb
                          ii = ii + mb
                       end do
                       ! solve z * x = rhs
                       call stdlib${ii}$_${ri}$getc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       if( ijob==0_${ik}$ ) then
                          call stdlib${ii}$_${ri}$gesc2( zdim, z, ldz, rhs, ipiv, jpiv,scaloc )
                          if( scaloc/=one ) then
                             do k = 1, n
                                call stdlib${ii}$_${ri}$scal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                                call stdlib${ii}$_${ri}$scal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                             end do
                             scale = scale*scaloc
                          end if
                       else
                          call stdlib${ii}$_${ri}$latdf( ijob, zdim, z, ldz, rhs, rdsum,rdscal, ipiv, jpiv )
                                    
                       end if
                       ! unpack solution vector(s)
                       k = 1_${ik}$
                       ii = mb*nb + 1_${ik}$
                       do jj = 0, nb - 1
                          call stdlib${ii}$_${ri}$copy( mb, rhs( k ), 1_${ik}$, c( is, js+jj ), 1_${ik}$ )
                          call stdlib${ii}$_${ri}$copy( mb, rhs( ii ), 1_${ik}$, f( is, js+jj ), 1_${ik}$ )
                          k = k + mb
                          ii = ii + mb
                       end do
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( i>1_${ik}$ ) then
                          call stdlib${ii}$_${ri}$gemm( 'N', 'N', is-1, nb, mb, -one,a( 1_${ik}$, is ), lda, rhs( 1_${ik}$ &
                                    ), mb, one,c( 1_${ik}$, js ), ldc )
                          call stdlib${ii}$_${ri}$gemm( 'N', 'N', is-1, nb, mb, -one,d( 1_${ik}$, is ), ldd, rhs( 1_${ik}$ &
                                    ), mb, one,f( 1_${ik}$, js ), ldf )
                       end if
                       if( j<q ) then
                          k = mb*nb + 1_${ik}$
                          call stdlib${ii}$_${ri}$gemm( 'N', 'N', mb, n-je, nb, one, rhs( k ),mb, b( js, je+&
                                    1_${ik}$ ), ldb, one,c( is, je+1 ), ldc )
                          call stdlib${ii}$_${ri}$gemm( 'N', 'N', mb, n-je, nb, one, rhs( k ),mb, e( js, je+&
                                    1_${ik}$ ), lde, one,f( is, je+1 ), ldf )
                       end if
                    end if
                 end do loop_110
              end do loop_120
           else
              ! solve (i, j) - subsystem
                   ! a(i, i)**t * r(i, j) + d(i, i)**t * l(j, j)  =  c(i, j)
                   ! r(i, i)  * b(j, j) + l(i, j)  * e(j, j)  = -f(i, j)
              ! for i = 1, 2, ..., p, j = q, q - 1, ..., 1
              scale = one
              scaloc = one
              loop_200: do i = 1, p
                 is = iwork( i )
                 isp1 = is + 1_${ik}$
                 ie = iwork ( i+1 ) - 1_${ik}$
                 mb = ie - is + 1_${ik}$
                 loop_190: do j = q, p + 2, -1
                    js = iwork( j )
                    jsp1 = js + 1_${ik}$
                    je = iwork( j+1 ) - 1_${ik}$
                    nb = je - js + 1_${ik}$
                    zdim = mb*nb*2_${ik}$
                    if( ( mb==1_${ik}$ ) .and. ( nb==1_${ik}$ ) ) then
                       ! build a 2-by-2 system z**t * x = rhs
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = -b( js, js )
                       z( 1_${ik}$, 2_${ik}$ ) = d( is, is )
                       z( 2_${ik}$, 2_${ik}$ ) = -e( js, js )
                       ! set up right hand side(s)
                       rhs( 1_${ik}$ ) = c( is, js )
                       rhs( 2_${ik}$ ) = f( is, js )
                       ! solve z**t * x = rhs
                       call stdlib${ii}$_${ri}$getc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       call stdlib${ii}$_${ri}$gesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc )
                       if( scaloc/=one ) then
                          do k = 1, n
                             call stdlib${ii}$_${ri}$scal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_${ri}$scal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          scale = scale*scaloc
                       end if
                       ! unpack solution vector(s)
                       c( is, js ) = rhs( 1_${ik}$ )
                       f( is, js ) = rhs( 2_${ik}$ )
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( j>p+2 ) then
                          alpha = rhs( 1_${ik}$ )
                          call stdlib${ii}$_${ri}$axpy( js-1, alpha, b( 1_${ik}$, js ), 1_${ik}$, f( is, 1_${ik}$ ),ldf )
                          alpha = rhs( 2_${ik}$ )
                          call stdlib${ii}$_${ri}$axpy( js-1, alpha, e( 1_${ik}$, js ), 1_${ik}$, f( is, 1_${ik}$ ),ldf )
                       end if
                       if( i<p ) then
                          alpha = -rhs( 1_${ik}$ )
                          call stdlib${ii}$_${ri}$axpy( m-ie, alpha, a( is, ie+1 ), lda,c( ie+1, js ), 1_${ik}$ )
                                    
                          alpha = -rhs( 2_${ik}$ )
                          call stdlib${ii}$_${ri}$axpy( m-ie, alpha, d( is, ie+1 ), ldd,c( ie+1, js ), 1_${ik}$ )
                                    
                       end if
                    else if( ( mb==1_${ik}$ ) .and. ( nb==2_${ik}$ ) ) then
                       ! build a 4-by-4 system z**t * x = rhs
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = zero
                       z( 3_${ik}$, 1_${ik}$ ) = -b( js, js )
                       z( 4_${ik}$, 1_${ik}$ ) = -b( jsp1, js )
                       z( 1_${ik}$, 2_${ik}$ ) = zero
                       z( 2_${ik}$, 2_${ik}$ ) = a( is, is )
                       z( 3_${ik}$, 2_${ik}$ ) = -b( js, jsp1 )
                       z( 4_${ik}$, 2_${ik}$ ) = -b( jsp1, jsp1 )
                       z( 1_${ik}$, 3_${ik}$ ) = d( is, is )
                       z( 2_${ik}$, 3_${ik}$ ) = zero
                       z( 3_${ik}$, 3_${ik}$ ) = -e( js, js )
                       z( 4_${ik}$, 3_${ik}$ ) = zero
                       z( 1_${ik}$, 4_${ik}$ ) = zero
                       z( 2_${ik}$, 4_${ik}$ ) = d( is, is )
                       z( 3_${ik}$, 4_${ik}$ ) = -e( js, jsp1 )
                       z( 4_${ik}$, 4_${ik}$ ) = -e( jsp1, jsp1 )
                       ! set up right hand side(s)
                       rhs( 1_${ik}$ ) = c( is, js )
                       rhs( 2_${ik}$ ) = c( is, jsp1 )
                       rhs( 3_${ik}$ ) = f( is, js )
                       rhs( 4_${ik}$ ) = f( is, jsp1 )
                       ! solve z**t * x = rhs
                       call stdlib${ii}$_${ri}$getc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       call stdlib${ii}$_${ri}$gesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc )
                       if( scaloc/=one ) then
                          do k = 1, n
                             call stdlib${ii}$_${ri}$scal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_${ri}$scal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          scale = scale*scaloc
                       end if
                       ! unpack solution vector(s)
                       c( is, js ) = rhs( 1_${ik}$ )
                       c( is, jsp1 ) = rhs( 2_${ik}$ )
                       f( is, js ) = rhs( 3_${ik}$ )
                       f( is, jsp1 ) = rhs( 4_${ik}$ )
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( j>p+2 ) then
                          call stdlib${ii}$_${ri}$axpy( js-1, rhs( 1_${ik}$ ), b( 1_${ik}$, js ), 1_${ik}$,f( is, 1_${ik}$ ), ldf )
                                    
                          call stdlib${ii}$_${ri}$axpy( js-1, rhs( 2_${ik}$ ), b( 1_${ik}$, jsp1 ), 1_${ik}$,f( is, 1_${ik}$ ), ldf )
                                    
                          call stdlib${ii}$_${ri}$axpy( js-1, rhs( 3_${ik}$ ), e( 1_${ik}$, js ), 1_${ik}$,f( is, 1_${ik}$ ), ldf )
                                    
                          call stdlib${ii}$_${ri}$axpy( js-1, rhs( 4_${ik}$ ), e( 1_${ik}$, jsp1 ), 1_${ik}$,f( is, 1_${ik}$ ), ldf )
                                    
                       end if
                       if( i<p ) then
                          call stdlib${ii}$_${ri}$ger( m-ie, nb, -one, a( is, ie+1 ), lda,rhs( 1_${ik}$ ), 1_${ik}$, c( ie+&
                                    1_${ik}$, js ), ldc )
                          call stdlib${ii}$_${ri}$ger( m-ie, nb, -one, d( is, ie+1 ), ldd,rhs( 3_${ik}$ ), 1_${ik}$, c( ie+&
                                    1_${ik}$, js ), ldc )
                       end if
                    else if( ( mb==2_${ik}$ ) .and. ( nb==1_${ik}$ ) ) then
                       ! build a 4-by-4 system z**t * x = rhs
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = a( is, isp1 )
                       z( 3_${ik}$, 1_${ik}$ ) = -b( js, js )
                       z( 4_${ik}$, 1_${ik}$ ) = zero
                       z( 1_${ik}$, 2_${ik}$ ) = a( isp1, is )
                       z( 2_${ik}$, 2_${ik}$ ) = a( isp1, isp1 )
                       z( 3_${ik}$, 2_${ik}$ ) = zero
                       z( 4_${ik}$, 2_${ik}$ ) = -b( js, js )
                       z( 1_${ik}$, 3_${ik}$ ) = d( is, is )
                       z( 2_${ik}$, 3_${ik}$ ) = d( is, isp1 )
                       z( 3_${ik}$, 3_${ik}$ ) = -e( js, js )
                       z( 4_${ik}$, 3_${ik}$ ) = zero
                       z( 1_${ik}$, 4_${ik}$ ) = zero
                       z( 2_${ik}$, 4_${ik}$ ) = d( isp1, isp1 )
                       z( 3_${ik}$, 4_${ik}$ ) = zero
                       z( 4_${ik}$, 4_${ik}$ ) = -e( js, js )
                       ! set up right hand side(s)
                       rhs( 1_${ik}$ ) = c( is, js )
                       rhs( 2_${ik}$ ) = c( isp1, js )
                       rhs( 3_${ik}$ ) = f( is, js )
                       rhs( 4_${ik}$ ) = f( isp1, js )
                       ! solve z**t * x = rhs
                       call stdlib${ii}$_${ri}$getc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       call stdlib${ii}$_${ri}$gesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc )
                       if( scaloc/=one ) then
                          do k = 1, n
                             call stdlib${ii}$_${ri}$scal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_${ri}$scal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          scale = scale*scaloc
                       end if
                       ! unpack solution vector(s)
                       c( is, js ) = rhs( 1_${ik}$ )
                       c( isp1, js ) = rhs( 2_${ik}$ )
                       f( is, js ) = rhs( 3_${ik}$ )
                       f( isp1, js ) = rhs( 4_${ik}$ )
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( j>p+2 ) then
                          call stdlib${ii}$_${ri}$ger( mb, js-1, one, rhs( 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, js ),1_${ik}$, f( is, 1_${ik}$ ), &
                                    ldf )
                          call stdlib${ii}$_${ri}$ger( mb, js-1, one, rhs( 3_${ik}$ ), 1_${ik}$, e( 1_${ik}$, js ),1_${ik}$, f( is, 1_${ik}$ ), &
                                    ldf )
                       end if
                       if( i<p ) then
                          call stdlib${ii}$_${ri}$gemv( 'T', mb, m-ie, -one, a( is, ie+1 ),lda, rhs( 1_${ik}$ ), 1_${ik}$, &
                                    one, c( ie+1, js ),1_${ik}$ )
                          call stdlib${ii}$_${ri}$gemv( 'T', mb, m-ie, -one, d( is, ie+1 ),ldd, rhs( 3_${ik}$ ), 1_${ik}$, &
                                    one, c( ie+1, js ),1_${ik}$ )
                       end if
                    else if( ( mb==2_${ik}$ ) .and. ( nb==2_${ik}$ ) ) then
                       ! build an 8-by-8 system z**t * x = rhs
                       call stdlib${ii}$_${ri}$laset( 'F', ldz, ldz, zero, zero, z, ldz )
                       z( 1_${ik}$, 1_${ik}$ ) = a( is, is )
                       z( 2_${ik}$, 1_${ik}$ ) = a( is, isp1 )
                       z( 5_${ik}$, 1_${ik}$ ) = -b( js, js )
                       z( 7_${ik}$, 1_${ik}$ ) = -b( jsp1, js )
                       z( 1_${ik}$, 2_${ik}$ ) = a( isp1, is )
                       z( 2_${ik}$, 2_${ik}$ ) = a( isp1, isp1 )
                       z( 6_${ik}$, 2_${ik}$ ) = -b( js, js )
                       z( 8_${ik}$, 2_${ik}$ ) = -b( jsp1, js )
                       z( 3_${ik}$, 3_${ik}$ ) = a( is, is )
                       z( 4_${ik}$, 3_${ik}$ ) = a( is, isp1 )
                       z( 5_${ik}$, 3_${ik}$ ) = -b( js, jsp1 )
                       z( 7_${ik}$, 3_${ik}$ ) = -b( jsp1, jsp1 )
                       z( 3_${ik}$, 4_${ik}$ ) = a( isp1, is )
                       z( 4_${ik}$, 4_${ik}$ ) = a( isp1, isp1 )
                       z( 6_${ik}$, 4_${ik}$ ) = -b( js, jsp1 )
                       z( 8_${ik}$, 4_${ik}$ ) = -b( jsp1, jsp1 )
                       z( 1_${ik}$, 5_${ik}$ ) = d( is, is )
                       z( 2_${ik}$, 5_${ik}$ ) = d( is, isp1 )
                       z( 5_${ik}$, 5_${ik}$ ) = -e( js, js )
                       z( 2_${ik}$, 6_${ik}$ ) = d( isp1, isp1 )
                       z( 6_${ik}$, 6_${ik}$ ) = -e( js, js )
                       z( 3_${ik}$, 7_${ik}$ ) = d( is, is )
                       z( 4_${ik}$, 7_${ik}$ ) = d( is, isp1 )
                       z( 5_${ik}$, 7_${ik}$ ) = -e( js, jsp1 )
                       z( 7_${ik}$, 7_${ik}$ ) = -e( jsp1, jsp1 )
                       z( 4_${ik}$, 8_${ik}$ ) = d( isp1, isp1 )
                       z( 6_${ik}$, 8_${ik}$ ) = -e( js, jsp1 )
                       z( 8_${ik}$, 8_${ik}$ ) = -e( jsp1, jsp1 )
                       ! set up right hand side(s)
                       k = 1_${ik}$
                       ii = mb*nb + 1_${ik}$
                       do jj = 0, nb - 1
                          call stdlib${ii}$_${ri}$copy( mb, c( is, js+jj ), 1_${ik}$, rhs( k ), 1_${ik}$ )
                          call stdlib${ii}$_${ri}$copy( mb, f( is, js+jj ), 1_${ik}$, rhs( ii ), 1_${ik}$ )
                          k = k + mb
                          ii = ii + mb
                       end do
                       ! solve z**t * x = rhs
                       call stdlib${ii}$_${ri}$getc2( zdim, z, ldz, ipiv, jpiv, ierr )
                       if( ierr>0_${ik}$ )info = ierr
                       call stdlib${ii}$_${ri}$gesc2( zdim, z, ldz, rhs, ipiv, jpiv, scaloc )
                       if( scaloc/=one ) then
                          do k = 1, n
                             call stdlib${ii}$_${ri}$scal( m, scaloc, c( 1_${ik}$, k ), 1_${ik}$ )
                             call stdlib${ii}$_${ri}$scal( m, scaloc, f( 1_${ik}$, k ), 1_${ik}$ )
                          end do
                          scale = scale*scaloc
                       end if
                       ! unpack solution vector(s)
                       k = 1_${ik}$
                       ii = mb*nb + 1_${ik}$
                       do jj = 0, nb - 1
                          call stdlib${ii}$_${ri}$copy( mb, rhs( k ), 1_${ik}$, c( is, js+jj ), 1_${ik}$ )
                          call stdlib${ii}$_${ri}$copy( mb, rhs( ii ), 1_${ik}$, f( is, js+jj ), 1_${ik}$ )
                          k = k + mb
                          ii = ii + mb
                       end do
                       ! substitute r(i, j) and l(i, j) into remaining
                       ! equation.
                       if( j>p+2 ) then
                          call stdlib${ii}$_${ri}$gemm( 'N', 'T', mb, js-1, nb, one,c( is, js ), ldc, b( 1_${ik}$, &
                                    js ), ldb, one,f( is, 1_${ik}$ ), ldf )
                          call stdlib${ii}$_${ri}$gemm( 'N', 'T', mb, js-1, nb, one,f( is, js ), ldf, e( 1_${ik}$, &
                                    js ), lde, one,f( is, 1_${ik}$ ), ldf )
                       end if
                       if( i<p ) then
                          call stdlib${ii}$_${ri}$gemm( 'T', 'N', m-ie, nb, mb, -one,a( is, ie+1 ), lda, c( &
                                    is, js ), ldc,one, c( ie+1, js ), ldc )
                          call stdlib${ii}$_${ri}$gemm( 'T', 'N', m-ie, nb, mb, -one,d( is, ie+1 ), ldd, f( &
                                    is, js ), ldf,one, c( ie+1, js ), ldc )
                       end if
                    end if
                 end do loop_190
              end do loop_200
           end if
           return
     end subroutine stdlib${ii}$_${ri}$tgsy2

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_ctgsy2( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, &
     !! CTGSY2 solves the generalized Sylvester equation
     !! A * R - L * B = scale *  C               (1)
     !! D * R - L * E = scale * F
     !! using Level 1 and 2 BLAS, where R and L are unknown M-by-N matrices,
     !! (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M,
     !! N-by-N and M-by-N, respectively. A, B, D and E are upper triangular
     !! (i.e., (A,D) and (B,E) in generalized Schur form).
     !! The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output
     !! scaling factor chosen to avoid overflow.
     !! In matrix notation solving equation (1) corresponds to solve
     !! Zx = scale * b, where Z is defined as
     !! Z = [ kron(In, A)  -kron(B**H, Im) ]             (2)
     !! [ kron(In, D)  -kron(E**H, Im) ],
     !! Ik is the identity matrix of size k and X**H is the transpose of X.
     !! kron(X, Y) is the Kronecker product between the matrices X and Y.
     !! If TRANS = 'C', y in the conjugate transposed system Z**H*y = scale*b
     !! is solved for, which is equivalent to solve for R and L in
     !! A**H * R  + D**H * L   = scale * C           (3)
     !! R  * B**H + L  * E**H  = scale * -F
     !! This case is used to compute an estimate of Dif[(A, D), (B, E)] =
     !! = sigma_min(Z) using reverse communication with CLACON.
     !! CTGSY2 also (IJOB >= 1) contributes to the computation in CTGSYL
     !! of an upper bound on the separation between to matrix pairs. Then
     !! the input (A, D), (B, E) are sub-pencils of two matrix pairs in
     !! CTGSYL.
               ldf, scale, rdsum, rdscal,info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, m, n
           integer(${ik}$), intent(out) :: info
           real(sp), intent(inout) :: rdscal, rdsum
           real(sp), intent(out) :: scale
           ! Array Arguments 
           complex(sp), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*)
           complex(sp), intent(inout) :: c(ldc,*), f(ldf,*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: ldz = 2_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: notran
           integer(${ik}$) :: i, ierr, j, k
           real(sp) :: scaloc
           complex(sp) :: alpha
           ! Local Arrays 
           integer(${ik}$) :: ipiv(ldz), jpiv(ldz)
           complex(sp) :: rhs(ldz), z(ldz,ldz)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test input parameters
           info = 0_${ik}$
           ierr = 0_${ik}$
           notran = stdlib_lsame( trans, 'N' )
           if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -1_${ik}$
           else if( notran ) then
              if( ( ijob<0_${ik}$ ) .or. ( ijob>2_${ik}$ ) ) then
                 info = -2_${ik}$
              end if
           end if
           if( info==0_${ik}$ ) then
              if( m<=0_${ik}$ ) then
                 info = -3_${ik}$
              else if( n<=0_${ik}$ ) then
                 info = -4_${ik}$
              else if( lda<max( 1_${ik}$, m ) ) then
                 info = -6_${ik}$
              else if( ldb<max( 1_${ik}$, n ) ) then
                 info = -8_${ik}$
              else if( ldc<max( 1_${ik}$, m ) ) then
                 info = -10_${ik}$
              else if( ldd<max( 1_${ik}$, m ) ) then
                 info = -12_${ik}$
              else if( lde<max( 1_${ik}$, n ) ) then
                 info = -14_${ik}$
              else if( ldf<max( 1_${ik}$, m ) ) then
                 info = -16_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CTGSY2', -info )
              return
           end if
           if( notran ) then
              ! solve (i, j) - system
                 ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j)
                 ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j)
              ! for i = m, m - 1, ..., 1; j = 1, 2, ..., n
              scale = one
              scaloc = one
              loop_30: do j = 1, n
                 loop_20: do i = m, 1, -1
                    ! build 2 by 2 system
                    z( 1_${ik}$, 1_${ik}$ ) = a( i, i )
                    z( 2_${ik}$, 1_${ik}$ ) = d( i, i )
                    z( 1_${ik}$, 2_${ik}$ ) = -b( j, j )
                    z( 2_${ik}$, 2_${ik}$ ) = -e( j, j )
                    ! set up right hand side(s)
                    rhs( 1_${ik}$ ) = c( i, j )
                    rhs( 2_${ik}$ ) = f( i, j )
                    ! solve z * x = rhs
                    call stdlib${ii}$_cgetc2( ldz, z, ldz, ipiv, jpiv, ierr )
                    if( ierr>0_${ik}$ )info = ierr
                    if( ijob==0_${ik}$ ) then
                       call stdlib${ii}$_cgesc2( ldz, z, ldz, rhs, ipiv, jpiv, scaloc )
                       if( scaloc/=one ) then
                          do k = 1, n
                             call stdlib${ii}$_cscal( m, cmplx( scaloc, zero,KIND=sp), c( 1_${ik}$, k ),1_${ik}$ )
                                       
                             call stdlib${ii}$_cscal( m, cmplx( scaloc, zero,KIND=sp), f( 1_${ik}$, k ),1_${ik}$ )
                                       
                          end do
                          scale = scale*scaloc
                       end if
                    else
                       call stdlib${ii}$_clatdf( ijob, ldz, z, ldz, rhs, rdsum, rdscal,ipiv, jpiv )
                                 
                    end if
                    ! unpack solution vector(s)
                    c( i, j ) = rhs( 1_${ik}$ )
                    f( i, j ) = rhs( 2_${ik}$ )
                    ! substitute r(i, j) and l(i, j) into remaining equation.
                    if( i>1_${ik}$ ) then
                       alpha = -rhs( 1_${ik}$ )
                       call stdlib${ii}$_caxpy( i-1, alpha, a( 1_${ik}$, i ), 1_${ik}$, c( 1_${ik}$, j ), 1_${ik}$ )
                       call stdlib${ii}$_caxpy( i-1, alpha, d( 1_${ik}$, i ), 1_${ik}$, f( 1_${ik}$, j ), 1_${ik}$ )
                    end if
                    if( j<n ) then
                       call stdlib${ii}$_caxpy( n-j, rhs( 2_${ik}$ ), b( j, j+1 ), ldb,c( i, j+1 ), ldc )
                                 
                       call stdlib${ii}$_caxpy( n-j, rhs( 2_${ik}$ ), e( j, j+1 ), lde,f( i, j+1 ), ldf )
                                 
                    end if
                 end do loop_20
              end do loop_30
           else
              ! solve transposed (i, j) - system:
                 ! a(i, i)**h * r(i, j) + d(i, i)**h * l(j, j) = c(i, j)
                 ! r(i, i) * b(j, j) + l(i, j) * e(j, j)   = -f(i, j)
              ! for i = 1, 2, ..., m, j = n, n - 1, ..., 1
              scale = one
              scaloc = one
              loop_80: do i = 1, m
                 loop_70: do j = n, 1, -1
                    ! build 2 by 2 system z**h
                    z( 1_${ik}$, 1_${ik}$ ) = conjg( a( i, i ) )
                    z( 2_${ik}$, 1_${ik}$ ) = -conjg( b( j, j ) )
                    z( 1_${ik}$, 2_${ik}$ ) = conjg( d( i, i ) )
                    z( 2_${ik}$, 2_${ik}$ ) = -conjg( e( j, j ) )
                    ! set up right hand side(s)
                    rhs( 1_${ik}$ ) = c( i, j )
                    rhs( 2_${ik}$ ) = f( i, j )
                    ! solve z**h * x = rhs
                    call stdlib${ii}$_cgetc2( ldz, z, ldz, ipiv, jpiv, ierr )
                    if( ierr>0_${ik}$ )info = ierr
                    call stdlib${ii}$_cgesc2( ldz, z, ldz, rhs, ipiv, jpiv, scaloc )
                    if( scaloc/=one ) then
                       do k = 1, n
                          call stdlib${ii}$_cscal( m, cmplx( scaloc, zero,KIND=sp), c( 1_${ik}$, k ),1_${ik}$ )
                                    
                          call stdlib${ii}$_cscal( m, cmplx( scaloc, zero,KIND=sp), f( 1_${ik}$, k ),1_${ik}$ )
                                    
                       end do
                       scale = scale*scaloc
                    end if
                    ! unpack solution vector(s)
                    c( i, j ) = rhs( 1_${ik}$ )
                    f( i, j ) = rhs( 2_${ik}$ )
                    ! substitute r(i, j) and l(i, j) into remaining equation.
                    do k = 1, j - 1
                       f( i, k ) = f( i, k ) + rhs( 1_${ik}$ )*conjg( b( k, j ) ) +rhs( 2_${ik}$ )*conjg( e( k, &
                                 j ) )
                    end do
                    do k = i + 1, m
                       c( k, j ) = c( k, j ) - conjg( a( i, k ) )*rhs( 1_${ik}$ ) -conjg( d( i, k ) )&
                                 *rhs( 2_${ik}$ )
                    end do
                 end do loop_70
              end do loop_80
           end if
           return
     end subroutine stdlib${ii}$_ctgsy2

     pure module subroutine stdlib${ii}$_ztgsy2( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, &
     !! ZTGSY2 solves the generalized Sylvester equation
     !! A * R - L * B = scale * C               (1)
     !! D * R - L * E = scale * F
     !! using Level 1 and 2 BLAS, where R and L are unknown M-by-N matrices,
     !! (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M,
     !! N-by-N and M-by-N, respectively. A, B, D and E are upper triangular
     !! (i.e., (A,D) and (B,E) in generalized Schur form).
     !! The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output
     !! scaling factor chosen to avoid overflow.
     !! In matrix notation solving equation (1) corresponds to solve
     !! Zx = scale * b, where Z is defined as
     !! Z = [ kron(In, A)  -kron(B**H, Im) ]             (2)
     !! [ kron(In, D)  -kron(E**H, Im) ],
     !! Ik is the identity matrix of size k and X**H is the conjuguate transpose of X.
     !! kron(X, Y) is the Kronecker product between the matrices X and Y.
     !! If TRANS = 'C', y in the conjugate transposed system Z**H*y = scale*b
     !! is solved for, which is equivalent to solve for R and L in
     !! A**H * R  + D**H * L   = scale * C           (3)
     !! R  * B**H + L  * E**H  = scale * -F
     !! This case is used to compute an estimate of Dif[(A, D), (B, E)] =
     !! = sigma_min(Z) using reverse communication with ZLACON.
     !! ZTGSY2 also (IJOB >= 1) contributes to the computation in ZTGSYL
     !! of an upper bound on the separation between to matrix pairs. Then
     !! the input (A, D), (B, E) are sub-pencils of two matrix pairs in
     !! ZTGSYL.
               ldf, scale, rdsum, rdscal,info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, m, n
           integer(${ik}$), intent(out) :: info
           real(dp), intent(inout) :: rdscal, rdsum
           real(dp), intent(out) :: scale
           ! Array Arguments 
           complex(dp), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*)
           complex(dp), intent(inout) :: c(ldc,*), f(ldf,*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: ldz = 2_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: notran
           integer(${ik}$) :: i, ierr, j, k
           real(dp) :: scaloc
           complex(dp) :: alpha
           ! Local Arrays 
           integer(${ik}$) :: ipiv(ldz), jpiv(ldz)
           complex(dp) :: rhs(ldz), z(ldz,ldz)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test input parameters
           info = 0_${ik}$
           ierr = 0_${ik}$
           notran = stdlib_lsame( trans, 'N' )
           if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -1_${ik}$
           else if( notran ) then
              if( ( ijob<0_${ik}$ ) .or. ( ijob>2_${ik}$ ) ) then
                 info = -2_${ik}$
              end if
           end if
           if( info==0_${ik}$ ) then
              if( m<=0_${ik}$ ) then
                 info = -3_${ik}$
              else if( n<=0_${ik}$ ) then
                 info = -4_${ik}$
              else if( lda<max( 1_${ik}$, m ) ) then
                 info = -6_${ik}$
              else if( ldb<max( 1_${ik}$, n ) ) then
                 info = -8_${ik}$
              else if( ldc<max( 1_${ik}$, m ) ) then
                 info = -10_${ik}$
              else if( ldd<max( 1_${ik}$, m ) ) then
                 info = -12_${ik}$
              else if( lde<max( 1_${ik}$, n ) ) then
                 info = -14_${ik}$
              else if( ldf<max( 1_${ik}$, m ) ) then
                 info = -16_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTGSY2', -info )
              return
           end if
           if( notran ) then
              ! solve (i, j) - system
                 ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j)
                 ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j)
              ! for i = m, m - 1, ..., 1; j = 1, 2, ..., n
              scale = one
              scaloc = one
              loop_30: do j = 1, n
                 loop_20: do i = m, 1, -1
                    ! build 2 by 2 system
                    z( 1_${ik}$, 1_${ik}$ ) = a( i, i )
                    z( 2_${ik}$, 1_${ik}$ ) = d( i, i )
                    z( 1_${ik}$, 2_${ik}$ ) = -b( j, j )
                    z( 2_${ik}$, 2_${ik}$ ) = -e( j, j )
                    ! set up right hand side(s)
                    rhs( 1_${ik}$ ) = c( i, j )
                    rhs( 2_${ik}$ ) = f( i, j )
                    ! solve z * x = rhs
                    call stdlib${ii}$_zgetc2( ldz, z, ldz, ipiv, jpiv, ierr )
                    if( ierr>0_${ik}$ )info = ierr
                    if( ijob==0_${ik}$ ) then
                       call stdlib${ii}$_zgesc2( ldz, z, ldz, rhs, ipiv, jpiv, scaloc )
                       if( scaloc/=one ) then
                          do k = 1, n
                             call stdlib${ii}$_zscal( m, cmplx( scaloc, zero,KIND=dp),c( 1_${ik}$, k ), 1_${ik}$ )
                                       
                             call stdlib${ii}$_zscal( m, cmplx( scaloc, zero,KIND=dp),f( 1_${ik}$, k ), 1_${ik}$ )
                                       
                          end do
                          scale = scale*scaloc
                       end if
                    else
                       call stdlib${ii}$_zlatdf( ijob, ldz, z, ldz, rhs, rdsum, rdscal,ipiv, jpiv )
                                 
                    end if
                    ! unpack solution vector(s)
                    c( i, j ) = rhs( 1_${ik}$ )
                    f( i, j ) = rhs( 2_${ik}$ )
                    ! substitute r(i, j) and l(i, j) into remaining equation.
                    if( i>1_${ik}$ ) then
                       alpha = -rhs( 1_${ik}$ )
                       call stdlib${ii}$_zaxpy( i-1, alpha, a( 1_${ik}$, i ), 1_${ik}$, c( 1_${ik}$, j ), 1_${ik}$ )
                       call stdlib${ii}$_zaxpy( i-1, alpha, d( 1_${ik}$, i ), 1_${ik}$, f( 1_${ik}$, j ), 1_${ik}$ )
                    end if
                    if( j<n ) then
                       call stdlib${ii}$_zaxpy( n-j, rhs( 2_${ik}$ ), b( j, j+1 ), ldb,c( i, j+1 ), ldc )
                                 
                       call stdlib${ii}$_zaxpy( n-j, rhs( 2_${ik}$ ), e( j, j+1 ), lde,f( i, j+1 ), ldf )
                                 
                    end if
                 end do loop_20
              end do loop_30
           else
              ! solve transposed (i, j) - system:
                 ! a(i, i)**h * r(i, j) + d(i, i)**h * l(j, j) = c(i, j)
                 ! r(i, i) * b(j, j) + l(i, j) * e(j, j)   = -f(i, j)
              ! for i = 1, 2, ..., m, j = n, n - 1, ..., 1
              scale = one
              scaloc = one
              loop_80: do i = 1, m
                 loop_70: do j = n, 1, -1
                    ! build 2 by 2 system z**h
                    z( 1_${ik}$, 1_${ik}$ ) = conjg( a( i, i ) )
                    z( 2_${ik}$, 1_${ik}$ ) = -conjg( b( j, j ) )
                    z( 1_${ik}$, 2_${ik}$ ) = conjg( d( i, i ) )
                    z( 2_${ik}$, 2_${ik}$ ) = -conjg( e( j, j ) )
                    ! set up right hand side(s)
                    rhs( 1_${ik}$ ) = c( i, j )
                    rhs( 2_${ik}$ ) = f( i, j )
                    ! solve z**h * x = rhs
                    call stdlib${ii}$_zgetc2( ldz, z, ldz, ipiv, jpiv, ierr )
                    if( ierr>0_${ik}$ )info = ierr
                    call stdlib${ii}$_zgesc2( ldz, z, ldz, rhs, ipiv, jpiv, scaloc )
                    if( scaloc/=one ) then
                       do k = 1, n
                          call stdlib${ii}$_zscal( m, cmplx( scaloc, zero,KIND=dp), c( 1_${ik}$, k ),1_${ik}$ )
                                    
                          call stdlib${ii}$_zscal( m, cmplx( scaloc, zero,KIND=dp), f( 1_${ik}$, k ),1_${ik}$ )
                                    
                       end do
                       scale = scale*scaloc
                    end if
                    ! unpack solution vector(s)
                    c( i, j ) = rhs( 1_${ik}$ )
                    f( i, j ) = rhs( 2_${ik}$ )
                    ! substitute r(i, j) and l(i, j) into remaining equation.
                    do k = 1, j - 1
                       f( i, k ) = f( i, k ) + rhs( 1_${ik}$ )*conjg( b( k, j ) ) +rhs( 2_${ik}$ )*conjg( e( k, &
                                 j ) )
                    end do
                    do k = i + 1, m
                       c( k, j ) = c( k, j ) - conjg( a( i, k ) )*rhs( 1_${ik}$ ) -conjg( d( i, k ) )&
                                 *rhs( 2_${ik}$ )
                    end do
                 end do loop_70
              end do loop_80
           end if
           return
     end subroutine stdlib${ii}$_ztgsy2

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$tgsy2( trans, ijob, m, n, a, lda, b, ldb, c, ldc, d,ldd, e, lde, f, &
     !! ZTGSY2: solves the generalized Sylvester equation
     !! A * R - L * B = scale * C               (1)
     !! D * R - L * E = scale * F
     !! using Level 1 and 2 BLAS, where R and L are unknown M-by-N matrices,
     !! (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M,
     !! N-by-N and M-by-N, respectively. A, B, D and E are upper triangular
     !! (i.e., (A,D) and (B,E) in generalized Schur form).
     !! The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output
     !! scaling factor chosen to avoid overflow.
     !! In matrix notation solving equation (1) corresponds to solve
     !! Zx = scale * b, where Z is defined as
     !! Z = [ kron(In, A)  -kron(B**H, Im) ]             (2)
     !! [ kron(In, D)  -kron(E**H, Im) ],
     !! Ik is the identity matrix of size k and X**H is the conjuguate transpose of X.
     !! kron(X, Y) is the Kronecker product between the matrices X and Y.
     !! If TRANS = 'C', y in the conjugate transposed system Z**H*y = scale*b
     !! is solved for, which is equivalent to solve for R and L in
     !! A**H * R  + D**H * L   = scale * C           (3)
     !! R  * B**H + L  * E**H  = scale * -F
     !! This case is used to compute an estimate of Dif[(A, D), (B, E)] =
     !! = sigma_min(Z) using reverse communication with ZLACON.
     !! ZTGSY2 also (IJOB >= 1) contributes to the computation in ZTGSYL
     !! of an upper bound on the separation between to matrix pairs. Then
     !! the input (A, D), (B, E) are sub-pencils of two matrix pairs in
     !! ZTGSYL.
               ldf, scale, rdsum, rdscal,info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: trans
           integer(${ik}$), intent(in) :: ijob, lda, ldb, ldc, ldd, lde, ldf, m, n
           integer(${ik}$), intent(out) :: info
           real(${ck}$), intent(inout) :: rdscal, rdsum
           real(${ck}$), intent(out) :: scale
           ! Array Arguments 
           complex(${ck}$), intent(in) :: a(lda,*), b(ldb,*), d(ldd,*), e(lde,*)
           complex(${ck}$), intent(inout) :: c(ldc,*), f(ldf,*)
        ! =====================================================================
           ! Parameters 
           integer(${ik}$), parameter :: ldz = 2_${ik}$
           
           
           ! Local Scalars 
           logical(lk) :: notran
           integer(${ik}$) :: i, ierr, j, k
           real(${ck}$) :: scaloc
           complex(${ck}$) :: alpha
           ! Local Arrays 
           integer(${ik}$) :: ipiv(ldz), jpiv(ldz)
           complex(${ck}$) :: rhs(ldz), z(ldz,ldz)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test input parameters
           info = 0_${ik}$
           ierr = 0_${ik}$
           notran = stdlib_lsame( trans, 'N' )
           if( .not.notran .and. .not.stdlib_lsame( trans, 'C' ) ) then
              info = -1_${ik}$
           else if( notran ) then
              if( ( ijob<0_${ik}$ ) .or. ( ijob>2_${ik}$ ) ) then
                 info = -2_${ik}$
              end if
           end if
           if( info==0_${ik}$ ) then
              if( m<=0_${ik}$ ) then
                 info = -3_${ik}$
              else if( n<=0_${ik}$ ) then
                 info = -4_${ik}$
              else if( lda<max( 1_${ik}$, m ) ) then
                 info = -6_${ik}$
              else if( ldb<max( 1_${ik}$, n ) ) then
                 info = -8_${ik}$
              else if( ldc<max( 1_${ik}$, m ) ) then
                 info = -10_${ik}$
              else if( ldd<max( 1_${ik}$, m ) ) then
                 info = -12_${ik}$
              else if( lde<max( 1_${ik}$, n ) ) then
                 info = -14_${ik}$
              else if( ldf<max( 1_${ik}$, m ) ) then
                 info = -16_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTGSY2', -info )
              return
           end if
           if( notran ) then
              ! solve (i, j) - system
                 ! a(i, i) * r(i, j) - l(i, j) * b(j, j) = c(i, j)
                 ! d(i, i) * r(i, j) - l(i, j) * e(j, j) = f(i, j)
              ! for i = m, m - 1, ..., 1; j = 1, 2, ..., n
              scale = one
              scaloc = one
              loop_30: do j = 1, n
                 loop_20: do i = m, 1, -1
                    ! build 2 by 2 system
                    z( 1_${ik}$, 1_${ik}$ ) = a( i, i )
                    z( 2_${ik}$, 1_${ik}$ ) = d( i, i )
                    z( 1_${ik}$, 2_${ik}$ ) = -b( j, j )
                    z( 2_${ik}$, 2_${ik}$ ) = -e( j, j )
                    ! set up right hand side(s)
                    rhs( 1_${ik}$ ) = c( i, j )
                    rhs( 2_${ik}$ ) = f( i, j )
                    ! solve z * x = rhs
                    call stdlib${ii}$_${ci}$getc2( ldz, z, ldz, ipiv, jpiv, ierr )
                    if( ierr>0_${ik}$ )info = ierr
                    if( ijob==0_${ik}$ ) then
                       call stdlib${ii}$_${ci}$gesc2( ldz, z, ldz, rhs, ipiv, jpiv, scaloc )
                       if( scaloc/=one ) then
                          do k = 1, n
                             call stdlib${ii}$_${ci}$scal( m, cmplx( scaloc, zero,KIND=${ck}$),c( 1_${ik}$, k ), 1_${ik}$ )
                                       
                             call stdlib${ii}$_${ci}$scal( m, cmplx( scaloc, zero,KIND=${ck}$),f( 1_${ik}$, k ), 1_${ik}$ )
                                       
                          end do
                          scale = scale*scaloc
                       end if
                    else
                       call stdlib${ii}$_${ci}$latdf( ijob, ldz, z, ldz, rhs, rdsum, rdscal,ipiv, jpiv )
                                 
                    end if
                    ! unpack solution vector(s)
                    c( i, j ) = rhs( 1_${ik}$ )
                    f( i, j ) = rhs( 2_${ik}$ )
                    ! substitute r(i, j) and l(i, j) into remaining equation.
                    if( i>1_${ik}$ ) then
                       alpha = -rhs( 1_${ik}$ )
                       call stdlib${ii}$_${ci}$axpy( i-1, alpha, a( 1_${ik}$, i ), 1_${ik}$, c( 1_${ik}$, j ), 1_${ik}$ )
                       call stdlib${ii}$_${ci}$axpy( i-1, alpha, d( 1_${ik}$, i ), 1_${ik}$, f( 1_${ik}$, j ), 1_${ik}$ )
                    end if
                    if( j<n ) then
                       call stdlib${ii}$_${ci}$axpy( n-j, rhs( 2_${ik}$ ), b( j, j+1 ), ldb,c( i, j+1 ), ldc )
                                 
                       call stdlib${ii}$_${ci}$axpy( n-j, rhs( 2_${ik}$ ), e( j, j+1 ), lde,f( i, j+1 ), ldf )
                                 
                    end if
                 end do loop_20
              end do loop_30
           else
              ! solve transposed (i, j) - system:
                 ! a(i, i)**h * r(i, j) + d(i, i)**h * l(j, j) = c(i, j)
                 ! r(i, i) * b(j, j) + l(i, j) * e(j, j)   = -f(i, j)
              ! for i = 1, 2, ..., m, j = n, n - 1, ..., 1
              scale = one
              scaloc = one
              loop_80: do i = 1, m
                 loop_70: do j = n, 1, -1
                    ! build 2 by 2 system z**h
                    z( 1_${ik}$, 1_${ik}$ ) = conjg( a( i, i ) )
                    z( 2_${ik}$, 1_${ik}$ ) = -conjg( b( j, j ) )
                    z( 1_${ik}$, 2_${ik}$ ) = conjg( d( i, i ) )
                    z( 2_${ik}$, 2_${ik}$ ) = -conjg( e( j, j ) )
                    ! set up right hand side(s)
                    rhs( 1_${ik}$ ) = c( i, j )
                    rhs( 2_${ik}$ ) = f( i, j )
                    ! solve z**h * x = rhs
                    call stdlib${ii}$_${ci}$getc2( ldz, z, ldz, ipiv, jpiv, ierr )
                    if( ierr>0_${ik}$ )info = ierr
                    call stdlib${ii}$_${ci}$gesc2( ldz, z, ldz, rhs, ipiv, jpiv, scaloc )
                    if( scaloc/=one ) then
                       do k = 1, n
                          call stdlib${ii}$_${ci}$scal( m, cmplx( scaloc, zero,KIND=${ck}$), c( 1_${ik}$, k ),1_${ik}$ )
                                    
                          call stdlib${ii}$_${ci}$scal( m, cmplx( scaloc, zero,KIND=${ck}$), f( 1_${ik}$, k ),1_${ik}$ )
                                    
                       end do
                       scale = scale*scaloc
                    end if
                    ! unpack solution vector(s)
                    c( i, j ) = rhs( 1_${ik}$ )
                    f( i, j ) = rhs( 2_${ik}$ )
                    ! substitute r(i, j) and l(i, j) into remaining equation.
                    do k = 1, j - 1
                       f( i, k ) = f( i, k ) + rhs( 1_${ik}$ )*conjg( b( k, j ) ) +rhs( 2_${ik}$ )*conjg( e( k, &
                                 j ) )
                    end do
                    do k = i + 1, m
                       c( k, j ) = c( k, j ) - conjg( a( i, k ) )*rhs( 1_${ik}$ ) -conjg( d( i, k ) )&
                                 *rhs( 2_${ik}$ )
                    end do
                 end do loop_70
              end do loop_80
           end if
           return
     end subroutine stdlib${ii}$_${ci}$tgsy2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_slagv2( a, lda, b, ldb, alphar, alphai, beta, csl, snl,csr, snr )
     !! SLAGV2 computes the Generalized Schur factorization of a real 2-by-2
     !! matrix pencil (A,B) where B is upper triangular. This routine
     !! computes orthogonal (rotation) matrices given by CSL, SNL and CSR,
     !! SNR such that
     !! 1) if the pencil (A,B) has two real eigenvalues (include 0/0 or 1/0
     !! types), then
     !! [ a11 a12 ] := [  CSL  SNL ] [ a11 a12 ] [  CSR -SNR ]
     !! [  0  a22 ]    [ -SNL  CSL ] [ a21 a22 ] [  SNR  CSR ]
     !! [ b11 b12 ] := [  CSL  SNL ] [ b11 b12 ] [  CSR -SNR ]
     !! [  0  b22 ]    [ -SNL  CSL ] [  0  b22 ] [  SNR  CSR ],
     !! 2) if the pencil (A,B) has a pair of complex conjugate eigenvalues,
     !! then
     !! [ a11 a12 ] := [  CSL  SNL ] [ a11 a12 ] [  CSR -SNR ]
     !! [ a21 a22 ]    [ -SNL  CSL ] [ a21 a22 ] [  SNR  CSR ]
     !! [ b11  0  ] := [  CSL  SNL ] [ b11 b12 ] [  CSR -SNR ]
     !! [  0  b22 ]    [ -SNL  CSL ] [  0  b22 ] [  SNR  CSR ]
     !! where b11 >= b22 > 0.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: lda, ldb
           real(sp), intent(out) :: csl, csr, snl, snr
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), b(ldb,*)
           real(sp), intent(out) :: alphai(2_${ik}$), alphar(2_${ik}$), beta(2_${ik}$)
        ! =====================================================================
           
           ! Local Scalars 
           real(sp) :: anorm, ascale, bnorm, bscale, h1, h2, h3, qq, r, rr, safmin, scale1, &
                     scale2, t, ulp, wi, wr1, wr2
           ! Intrinsic Functions 
           ! Executable Statements 
           safmin = stdlib${ii}$_slamch( 'S' )
           ulp = stdlib${ii}$_slamch( 'P' )
           ! scale a
           anorm = max( abs( a( 1_${ik}$, 1_${ik}$ ) )+abs( a( 2_${ik}$, 1_${ik}$ ) ),abs( a( 1_${ik}$, 2_${ik}$ ) )+abs( a( 2_${ik}$, 2_${ik}$ ) ), &
                     safmin )
           ascale = one / anorm
           a( 1_${ik}$, 1_${ik}$ ) = ascale*a( 1_${ik}$, 1_${ik}$ )
           a( 1_${ik}$, 2_${ik}$ ) = ascale*a( 1_${ik}$, 2_${ik}$ )
           a( 2_${ik}$, 1_${ik}$ ) = ascale*a( 2_${ik}$, 1_${ik}$ )
           a( 2_${ik}$, 2_${ik}$ ) = ascale*a( 2_${ik}$, 2_${ik}$ )
           ! scale b
           bnorm = max( abs( b( 1_${ik}$, 1_${ik}$ ) ), abs( b( 1_${ik}$, 2_${ik}$ ) )+abs( b( 2_${ik}$, 2_${ik}$ ) ),safmin )
           bscale = one / bnorm
           b( 1_${ik}$, 1_${ik}$ ) = bscale*b( 1_${ik}$, 1_${ik}$ )
           b( 1_${ik}$, 2_${ik}$ ) = bscale*b( 1_${ik}$, 2_${ik}$ )
           b( 2_${ik}$, 2_${ik}$ ) = bscale*b( 2_${ik}$, 2_${ik}$ )
           ! check if a can be deflated
           if( abs( a( 2_${ik}$, 1_${ik}$ ) )<=ulp ) then
              csl = one
              snl = zero
              csr = one
              snr = zero
              a( 2_${ik}$, 1_${ik}$ ) = zero
              b( 2_${ik}$, 1_${ik}$ ) = zero
              wi = zero
           ! check if b is singular
           else if( abs( b( 1_${ik}$, 1_${ik}$ ) )<=ulp ) then
              call stdlib${ii}$_slartg( a( 1_${ik}$, 1_${ik}$ ), a( 2_${ik}$, 1_${ik}$ ), csl, snl, r )
              csr = one
              snr = zero
              call stdlib${ii}$_srot( 2_${ik}$, a( 1_${ik}$, 1_${ik}$ ), lda, a( 2_${ik}$, 1_${ik}$ ), lda, csl, snl )
              call stdlib${ii}$_srot( 2_${ik}$, b( 1_${ik}$, 1_${ik}$ ), ldb, b( 2_${ik}$, 1_${ik}$ ), ldb, csl, snl )
              a( 2_${ik}$, 1_${ik}$ ) = zero
              b( 1_${ik}$, 1_${ik}$ ) = zero
              b( 2_${ik}$, 1_${ik}$ ) = zero
              wi = zero
           else if( abs( b( 2_${ik}$, 2_${ik}$ ) )<=ulp ) then
              call stdlib${ii}$_slartg( a( 2_${ik}$, 2_${ik}$ ), a( 2_${ik}$, 1_${ik}$ ), csr, snr, t )
              snr = -snr
              call stdlib${ii}$_srot( 2_${ik}$, a( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, csr, snr )
              call stdlib${ii}$_srot( 2_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, csr, snr )
              csl = one
              snl = zero
              a( 2_${ik}$, 1_${ik}$ ) = zero
              b( 2_${ik}$, 1_${ik}$ ) = zero
              b( 2_${ik}$, 2_${ik}$ ) = zero
              wi = zero
           else
              ! b is nonsingular, first compute the eigenvalues of (a,b)
              call stdlib${ii}$_slag2( a, lda, b, ldb, safmin, scale1, scale2, wr1, wr2,wi )
              if( wi==zero ) then
                 ! two real eigenvalues, compute s*a-w*b
                 h1 = scale1*a( 1_${ik}$, 1_${ik}$ ) - wr1*b( 1_${ik}$, 1_${ik}$ )
                 h2 = scale1*a( 1_${ik}$, 2_${ik}$ ) - wr1*b( 1_${ik}$, 2_${ik}$ )
                 h3 = scale1*a( 2_${ik}$, 2_${ik}$ ) - wr1*b( 2_${ik}$, 2_${ik}$ )
                 rr = stdlib${ii}$_slapy2( h1, h2 )
                 qq = stdlib${ii}$_slapy2( scale1*a( 2_${ik}$, 1_${ik}$ ), h3 )
                 if( rr>qq ) then
                    ! find right rotation matrix to zero 1,1 element of
                    ! (sa - wb)
                    call stdlib${ii}$_slartg( h2, h1, csr, snr, t )
                 else
                    ! find right rotation matrix to zero 2,1 element of
                    ! (sa - wb)
                    call stdlib${ii}$_slartg( h3, scale1*a( 2_${ik}$, 1_${ik}$ ), csr, snr, t )
                 end if
                 snr = -snr
                 call stdlib${ii}$_srot( 2_${ik}$, a( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, csr, snr )
                 call stdlib${ii}$_srot( 2_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, csr, snr )
                 ! compute inf norms of a and b
                 h1 = max( abs( a( 1_${ik}$, 1_${ik}$ ) )+abs( a( 1_${ik}$, 2_${ik}$ ) ),abs( a( 2_${ik}$, 1_${ik}$ ) )+abs( a( 2_${ik}$, 2_${ik}$ ) ) )
                           
                 h2 = max( abs( b( 1_${ik}$, 1_${ik}$ ) )+abs( b( 1_${ik}$, 2_${ik}$ ) ),abs( b( 2_${ik}$, 1_${ik}$ ) )+abs( b( 2_${ik}$, 2_${ik}$ ) ) )
                           
                 if( ( scale1*h1 )>=abs( wr1 )*h2 ) then
                    ! find left rotation matrix q to zero out b(2,1)
                    call stdlib${ii}$_slartg( b( 1_${ik}$, 1_${ik}$ ), b( 2_${ik}$, 1_${ik}$ ), csl, snl, r )
                 else
                    ! find left rotation matrix q to zero out a(2,1)
                    call stdlib${ii}$_slartg( a( 1_${ik}$, 1_${ik}$ ), a( 2_${ik}$, 1_${ik}$ ), csl, snl, r )
                 end if
                 call stdlib${ii}$_srot( 2_${ik}$, a( 1_${ik}$, 1_${ik}$ ), lda, a( 2_${ik}$, 1_${ik}$ ), lda, csl, snl )
                 call stdlib${ii}$_srot( 2_${ik}$, b( 1_${ik}$, 1_${ik}$ ), ldb, b( 2_${ik}$, 1_${ik}$ ), ldb, csl, snl )
                 a( 2_${ik}$, 1_${ik}$ ) = zero
                 b( 2_${ik}$, 1_${ik}$ ) = zero
              else
                 ! a pair of complex conjugate eigenvalues
                 ! first compute the svd of the matrix b
                 call stdlib${ii}$_slasv2( b( 1_${ik}$, 1_${ik}$ ), b( 1_${ik}$, 2_${ik}$ ), b( 2_${ik}$, 2_${ik}$ ), r, t, snr,csr, snl, csl )
                           
                 ! form (a,b) := q(a,b)z**t where q is left rotation matrix and
                 ! z is right rotation matrix computed from stdlib${ii}$_slasv2
                 call stdlib${ii}$_srot( 2_${ik}$, a( 1_${ik}$, 1_${ik}$ ), lda, a( 2_${ik}$, 1_${ik}$ ), lda, csl, snl )
                 call stdlib${ii}$_srot( 2_${ik}$, b( 1_${ik}$, 1_${ik}$ ), ldb, b( 2_${ik}$, 1_${ik}$ ), ldb, csl, snl )
                 call stdlib${ii}$_srot( 2_${ik}$, a( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, csr, snr )
                 call stdlib${ii}$_srot( 2_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, csr, snr )
                 b( 2_${ik}$, 1_${ik}$ ) = zero
                 b( 1_${ik}$, 2_${ik}$ ) = zero
              end if
           end if
           ! unscaling
           a( 1_${ik}$, 1_${ik}$ ) = anorm*a( 1_${ik}$, 1_${ik}$ )
           a( 2_${ik}$, 1_${ik}$ ) = anorm*a( 2_${ik}$, 1_${ik}$ )
           a( 1_${ik}$, 2_${ik}$ ) = anorm*a( 1_${ik}$, 2_${ik}$ )
           a( 2_${ik}$, 2_${ik}$ ) = anorm*a( 2_${ik}$, 2_${ik}$ )
           b( 1_${ik}$, 1_${ik}$ ) = bnorm*b( 1_${ik}$, 1_${ik}$ )
           b( 2_${ik}$, 1_${ik}$ ) = bnorm*b( 2_${ik}$, 1_${ik}$ )
           b( 1_${ik}$, 2_${ik}$ ) = bnorm*b( 1_${ik}$, 2_${ik}$ )
           b( 2_${ik}$, 2_${ik}$ ) = bnorm*b( 2_${ik}$, 2_${ik}$ )
           if( wi==zero ) then
              alphar( 1_${ik}$ ) = a( 1_${ik}$, 1_${ik}$ )
              alphar( 2_${ik}$ ) = a( 2_${ik}$, 2_${ik}$ )
              alphai( 1_${ik}$ ) = zero
              alphai( 2_${ik}$ ) = zero
              beta( 1_${ik}$ ) = b( 1_${ik}$, 1_${ik}$ )
              beta( 2_${ik}$ ) = b( 2_${ik}$, 2_${ik}$ )
           else
              alphar( 1_${ik}$ ) = anorm*wr1 / scale1 / bnorm
              alphai( 1_${ik}$ ) = anorm*wi / scale1 / bnorm
              alphar( 2_${ik}$ ) = alphar( 1_${ik}$ )
              alphai( 2_${ik}$ ) = -alphai( 1_${ik}$ )
              beta( 1_${ik}$ ) = one
              beta( 2_${ik}$ ) = one
           end if
           return
     end subroutine stdlib${ii}$_slagv2

     pure module subroutine stdlib${ii}$_dlagv2( a, lda, b, ldb, alphar, alphai, beta, csl, snl,csr, snr )
     !! DLAGV2 computes the Generalized Schur factorization of a real 2-by-2
     !! matrix pencil (A,B) where B is upper triangular. This routine
     !! computes orthogonal (rotation) matrices given by CSL, SNL and CSR,
     !! SNR such that
     !! 1) if the pencil (A,B) has two real eigenvalues (include 0/0 or 1/0
     !! types), then
     !! [ a11 a12 ] := [  CSL  SNL ] [ a11 a12 ] [  CSR -SNR ]
     !! [  0  a22 ]    [ -SNL  CSL ] [ a21 a22 ] [  SNR  CSR ]
     !! [ b11 b12 ] := [  CSL  SNL ] [ b11 b12 ] [  CSR -SNR ]
     !! [  0  b22 ]    [ -SNL  CSL ] [  0  b22 ] [  SNR  CSR ],
     !! 2) if the pencil (A,B) has a pair of complex conjugate eigenvalues,
     !! then
     !! [ a11 a12 ] := [  CSL  SNL ] [ a11 a12 ] [  CSR -SNR ]
     !! [ a21 a22 ]    [ -SNL  CSL ] [ a21 a22 ] [  SNR  CSR ]
     !! [ b11  0  ] := [  CSL  SNL ] [ b11 b12 ] [  CSR -SNR ]
     !! [  0  b22 ]    [ -SNL  CSL ] [  0  b22 ] [  SNR  CSR ]
     !! where b11 >= b22 > 0.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: lda, ldb
           real(dp), intent(out) :: csl, csr, snl, snr
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), b(ldb,*)
           real(dp), intent(out) :: alphai(2_${ik}$), alphar(2_${ik}$), beta(2_${ik}$)
        ! =====================================================================
           
           ! Local Scalars 
           real(dp) :: anorm, ascale, bnorm, bscale, h1, h2, h3, qq, r, rr, safmin, scale1, &
                     scale2, t, ulp, wi, wr1, wr2
           ! Intrinsic Functions 
           ! Executable Statements 
           safmin = stdlib${ii}$_dlamch( 'S' )
           ulp = stdlib${ii}$_dlamch( 'P' )
           ! scale a
           anorm = max( abs( a( 1_${ik}$, 1_${ik}$ ) )+abs( a( 2_${ik}$, 1_${ik}$ ) ),abs( a( 1_${ik}$, 2_${ik}$ ) )+abs( a( 2_${ik}$, 2_${ik}$ ) ), &
                     safmin )
           ascale = one / anorm
           a( 1_${ik}$, 1_${ik}$ ) = ascale*a( 1_${ik}$, 1_${ik}$ )
           a( 1_${ik}$, 2_${ik}$ ) = ascale*a( 1_${ik}$, 2_${ik}$ )
           a( 2_${ik}$, 1_${ik}$ ) = ascale*a( 2_${ik}$, 1_${ik}$ )
           a( 2_${ik}$, 2_${ik}$ ) = ascale*a( 2_${ik}$, 2_${ik}$ )
           ! scale b
           bnorm = max( abs( b( 1_${ik}$, 1_${ik}$ ) ), abs( b( 1_${ik}$, 2_${ik}$ ) )+abs( b( 2_${ik}$, 2_${ik}$ ) ),safmin )
           bscale = one / bnorm
           b( 1_${ik}$, 1_${ik}$ ) = bscale*b( 1_${ik}$, 1_${ik}$ )
           b( 1_${ik}$, 2_${ik}$ ) = bscale*b( 1_${ik}$, 2_${ik}$ )
           b( 2_${ik}$, 2_${ik}$ ) = bscale*b( 2_${ik}$, 2_${ik}$ )
           ! check if a can be deflated
           if( abs( a( 2_${ik}$, 1_${ik}$ ) )<=ulp ) then
              csl = one
              snl = zero
              csr = one
              snr = zero
              a( 2_${ik}$, 1_${ik}$ ) = zero
              b( 2_${ik}$, 1_${ik}$ ) = zero
              wi = zero
           ! check if b is singular
           else if( abs( b( 1_${ik}$, 1_${ik}$ ) )<=ulp ) then
              call stdlib${ii}$_dlartg( a( 1_${ik}$, 1_${ik}$ ), a( 2_${ik}$, 1_${ik}$ ), csl, snl, r )
              csr = one
              snr = zero
              call stdlib${ii}$_drot( 2_${ik}$, a( 1_${ik}$, 1_${ik}$ ), lda, a( 2_${ik}$, 1_${ik}$ ), lda, csl, snl )
              call stdlib${ii}$_drot( 2_${ik}$, b( 1_${ik}$, 1_${ik}$ ), ldb, b( 2_${ik}$, 1_${ik}$ ), ldb, csl, snl )
              a( 2_${ik}$, 1_${ik}$ ) = zero
              b( 1_${ik}$, 1_${ik}$ ) = zero
              b( 2_${ik}$, 1_${ik}$ ) = zero
              wi = zero
           else if( abs( b( 2_${ik}$, 2_${ik}$ ) )<=ulp ) then
              call stdlib${ii}$_dlartg( a( 2_${ik}$, 2_${ik}$ ), a( 2_${ik}$, 1_${ik}$ ), csr, snr, t )
              snr = -snr
              call stdlib${ii}$_drot( 2_${ik}$, a( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, csr, snr )
              call stdlib${ii}$_drot( 2_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, csr, snr )
              csl = one
              snl = zero
              a( 2_${ik}$, 1_${ik}$ ) = zero
              b( 2_${ik}$, 1_${ik}$ ) = zero
              b( 2_${ik}$, 2_${ik}$ ) = zero
              wi = zero
           else
              ! b is nonsingular, first compute the eigenvalues of (a,b)
              call stdlib${ii}$_dlag2( a, lda, b, ldb, safmin, scale1, scale2, wr1, wr2,wi )
              if( wi==zero ) then
                 ! two real eigenvalues, compute s*a-w*b
                 h1 = scale1*a( 1_${ik}$, 1_${ik}$ ) - wr1*b( 1_${ik}$, 1_${ik}$ )
                 h2 = scale1*a( 1_${ik}$, 2_${ik}$ ) - wr1*b( 1_${ik}$, 2_${ik}$ )
                 h3 = scale1*a( 2_${ik}$, 2_${ik}$ ) - wr1*b( 2_${ik}$, 2_${ik}$ )
                 rr = stdlib${ii}$_dlapy2( h1, h2 )
                 qq = stdlib${ii}$_dlapy2( scale1*a( 2_${ik}$, 1_${ik}$ ), h3 )
                 if( rr>qq ) then
                    ! find right rotation matrix to zero 1,1 element of
                    ! (sa - wb)
                    call stdlib${ii}$_dlartg( h2, h1, csr, snr, t )
                 else
                    ! find right rotation matrix to zero 2,1 element of
                    ! (sa - wb)
                    call stdlib${ii}$_dlartg( h3, scale1*a( 2_${ik}$, 1_${ik}$ ), csr, snr, t )
                 end if
                 snr = -snr
                 call stdlib${ii}$_drot( 2_${ik}$, a( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, csr, snr )
                 call stdlib${ii}$_drot( 2_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, csr, snr )
                 ! compute inf norms of a and b
                 h1 = max( abs( a( 1_${ik}$, 1_${ik}$ ) )+abs( a( 1_${ik}$, 2_${ik}$ ) ),abs( a( 2_${ik}$, 1_${ik}$ ) )+abs( a( 2_${ik}$, 2_${ik}$ ) ) )
                           
                 h2 = max( abs( b( 1_${ik}$, 1_${ik}$ ) )+abs( b( 1_${ik}$, 2_${ik}$ ) ),abs( b( 2_${ik}$, 1_${ik}$ ) )+abs( b( 2_${ik}$, 2_${ik}$ ) ) )
                           
                 if( ( scale1*h1 )>=abs( wr1 )*h2 ) then
                    ! find left rotation matrix q to zero out b(2,1)
                    call stdlib${ii}$_dlartg( b( 1_${ik}$, 1_${ik}$ ), b( 2_${ik}$, 1_${ik}$ ), csl, snl, r )
                 else
                    ! find left rotation matrix q to zero out a(2,1)
                    call stdlib${ii}$_dlartg( a( 1_${ik}$, 1_${ik}$ ), a( 2_${ik}$, 1_${ik}$ ), csl, snl, r )
                 end if
                 call stdlib${ii}$_drot( 2_${ik}$, a( 1_${ik}$, 1_${ik}$ ), lda, a( 2_${ik}$, 1_${ik}$ ), lda, csl, snl )
                 call stdlib${ii}$_drot( 2_${ik}$, b( 1_${ik}$, 1_${ik}$ ), ldb, b( 2_${ik}$, 1_${ik}$ ), ldb, csl, snl )
                 a( 2_${ik}$, 1_${ik}$ ) = zero
                 b( 2_${ik}$, 1_${ik}$ ) = zero
              else
                 ! a pair of complex conjugate eigenvalues
                 ! first compute the svd of the matrix b
                 call stdlib${ii}$_dlasv2( b( 1_${ik}$, 1_${ik}$ ), b( 1_${ik}$, 2_${ik}$ ), b( 2_${ik}$, 2_${ik}$ ), r, t, snr,csr, snl, csl )
                           
                 ! form (a,b) := q(a,b)z**t where q is left rotation matrix and
                 ! z is right rotation matrix computed from stdlib${ii}$_dlasv2
                 call stdlib${ii}$_drot( 2_${ik}$, a( 1_${ik}$, 1_${ik}$ ), lda, a( 2_${ik}$, 1_${ik}$ ), lda, csl, snl )
                 call stdlib${ii}$_drot( 2_${ik}$, b( 1_${ik}$, 1_${ik}$ ), ldb, b( 2_${ik}$, 1_${ik}$ ), ldb, csl, snl )
                 call stdlib${ii}$_drot( 2_${ik}$, a( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, csr, snr )
                 call stdlib${ii}$_drot( 2_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, csr, snr )
                 b( 2_${ik}$, 1_${ik}$ ) = zero
                 b( 1_${ik}$, 2_${ik}$ ) = zero
              end if
           end if
           ! unscaling
           a( 1_${ik}$, 1_${ik}$ ) = anorm*a( 1_${ik}$, 1_${ik}$ )
           a( 2_${ik}$, 1_${ik}$ ) = anorm*a( 2_${ik}$, 1_${ik}$ )
           a( 1_${ik}$, 2_${ik}$ ) = anorm*a( 1_${ik}$, 2_${ik}$ )
           a( 2_${ik}$, 2_${ik}$ ) = anorm*a( 2_${ik}$, 2_${ik}$ )
           b( 1_${ik}$, 1_${ik}$ ) = bnorm*b( 1_${ik}$, 1_${ik}$ )
           b( 2_${ik}$, 1_${ik}$ ) = bnorm*b( 2_${ik}$, 1_${ik}$ )
           b( 1_${ik}$, 2_${ik}$ ) = bnorm*b( 1_${ik}$, 2_${ik}$ )
           b( 2_${ik}$, 2_${ik}$ ) = bnorm*b( 2_${ik}$, 2_${ik}$ )
           if( wi==zero ) then
              alphar( 1_${ik}$ ) = a( 1_${ik}$, 1_${ik}$ )
              alphar( 2_${ik}$ ) = a( 2_${ik}$, 2_${ik}$ )
              alphai( 1_${ik}$ ) = zero
              alphai( 2_${ik}$ ) = zero
              beta( 1_${ik}$ ) = b( 1_${ik}$, 1_${ik}$ )
              beta( 2_${ik}$ ) = b( 2_${ik}$, 2_${ik}$ )
           else
              alphar( 1_${ik}$ ) = anorm*wr1 / scale1 / bnorm
              alphai( 1_${ik}$ ) = anorm*wi / scale1 / bnorm
              alphar( 2_${ik}$ ) = alphar( 1_${ik}$ )
              alphai( 2_${ik}$ ) = -alphai( 1_${ik}$ )
              beta( 1_${ik}$ ) = one
              beta( 2_${ik}$ ) = one
           end if
           return
     end subroutine stdlib${ii}$_dlagv2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$lagv2( a, lda, b, ldb, alphar, alphai, beta, csl, snl,csr, snr )
     !! DLAGV2: computes the Generalized Schur factorization of a real 2-by-2
     !! matrix pencil (A,B) where B is upper triangular. This routine
     !! computes orthogonal (rotation) matrices given by CSL, SNL and CSR,
     !! SNR such that
     !! 1) if the pencil (A,B) has two real eigenvalues (include 0/0 or 1/0
     !! types), then
     !! [ a11 a12 ] := [  CSL  SNL ] [ a11 a12 ] [  CSR -SNR ]
     !! [  0  a22 ]    [ -SNL  CSL ] [ a21 a22 ] [  SNR  CSR ]
     !! [ b11 b12 ] := [  CSL  SNL ] [ b11 b12 ] [  CSR -SNR ]
     !! [  0  b22 ]    [ -SNL  CSL ] [  0  b22 ] [  SNR  CSR ],
     !! 2) if the pencil (A,B) has a pair of complex conjugate eigenvalues,
     !! then
     !! [ a11 a12 ] := [  CSL  SNL ] [ a11 a12 ] [  CSR -SNR ]
     !! [ a21 a22 ]    [ -SNL  CSL ] [ a21 a22 ] [  SNR  CSR ]
     !! [ b11  0  ] := [  CSL  SNL ] [ b11 b12 ] [  CSR -SNR ]
     !! [  0  b22 ]    [ -SNL  CSL ] [  0  b22 ] [  SNR  CSR ]
     !! where b11 >= b22 > 0.
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           integer(${ik}$), intent(in) :: lda, ldb
           real(${rk}$), intent(out) :: csl, csr, snl, snr
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*)
           real(${rk}$), intent(out) :: alphai(2_${ik}$), alphar(2_${ik}$), beta(2_${ik}$)
        ! =====================================================================
           
           ! Local Scalars 
           real(${rk}$) :: anorm, ascale, bnorm, bscale, h1, h2, h3, qq, r, rr, safmin, scale1, &
                     scale2, t, ulp, wi, wr1, wr2
           ! Intrinsic Functions 
           ! Executable Statements 
           safmin = stdlib${ii}$_${ri}$lamch( 'S' )
           ulp = stdlib${ii}$_${ri}$lamch( 'P' )
           ! scale a
           anorm = max( abs( a( 1_${ik}$, 1_${ik}$ ) )+abs( a( 2_${ik}$, 1_${ik}$ ) ),abs( a( 1_${ik}$, 2_${ik}$ ) )+abs( a( 2_${ik}$, 2_${ik}$ ) ), &
                     safmin )
           ascale = one / anorm
           a( 1_${ik}$, 1_${ik}$ ) = ascale*a( 1_${ik}$, 1_${ik}$ )
           a( 1_${ik}$, 2_${ik}$ ) = ascale*a( 1_${ik}$, 2_${ik}$ )
           a( 2_${ik}$, 1_${ik}$ ) = ascale*a( 2_${ik}$, 1_${ik}$ )
           a( 2_${ik}$, 2_${ik}$ ) = ascale*a( 2_${ik}$, 2_${ik}$ )
           ! scale b
           bnorm = max( abs( b( 1_${ik}$, 1_${ik}$ ) ), abs( b( 1_${ik}$, 2_${ik}$ ) )+abs( b( 2_${ik}$, 2_${ik}$ ) ),safmin )
           bscale = one / bnorm
           b( 1_${ik}$, 1_${ik}$ ) = bscale*b( 1_${ik}$, 1_${ik}$ )
           b( 1_${ik}$, 2_${ik}$ ) = bscale*b( 1_${ik}$, 2_${ik}$ )
           b( 2_${ik}$, 2_${ik}$ ) = bscale*b( 2_${ik}$, 2_${ik}$ )
           ! check if a can be deflated
           if( abs( a( 2_${ik}$, 1_${ik}$ ) )<=ulp ) then
              csl = one
              snl = zero
              csr = one
              snr = zero
              a( 2_${ik}$, 1_${ik}$ ) = zero
              b( 2_${ik}$, 1_${ik}$ ) = zero
              wi = zero
           ! check if b is singular
           else if( abs( b( 1_${ik}$, 1_${ik}$ ) )<=ulp ) then
              call stdlib${ii}$_${ri}$lartg( a( 1_${ik}$, 1_${ik}$ ), a( 2_${ik}$, 1_${ik}$ ), csl, snl, r )
              csr = one
              snr = zero
              call stdlib${ii}$_${ri}$rot( 2_${ik}$, a( 1_${ik}$, 1_${ik}$ ), lda, a( 2_${ik}$, 1_${ik}$ ), lda, csl, snl )
              call stdlib${ii}$_${ri}$rot( 2_${ik}$, b( 1_${ik}$, 1_${ik}$ ), ldb, b( 2_${ik}$, 1_${ik}$ ), ldb, csl, snl )
              a( 2_${ik}$, 1_${ik}$ ) = zero
              b( 1_${ik}$, 1_${ik}$ ) = zero
              b( 2_${ik}$, 1_${ik}$ ) = zero
              wi = zero
           else if( abs( b( 2_${ik}$, 2_${ik}$ ) )<=ulp ) then
              call stdlib${ii}$_${ri}$lartg( a( 2_${ik}$, 2_${ik}$ ), a( 2_${ik}$, 1_${ik}$ ), csr, snr, t )
              snr = -snr
              call stdlib${ii}$_${ri}$rot( 2_${ik}$, a( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, csr, snr )
              call stdlib${ii}$_${ri}$rot( 2_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, csr, snr )
              csl = one
              snl = zero
              a( 2_${ik}$, 1_${ik}$ ) = zero
              b( 2_${ik}$, 1_${ik}$ ) = zero
              b( 2_${ik}$, 2_${ik}$ ) = zero
              wi = zero
           else
              ! b is nonsingular, first compute the eigenvalues of (a,b)
              call stdlib${ii}$_${ri}$lag2( a, lda, b, ldb, safmin, scale1, scale2, wr1, wr2,wi )
              if( wi==zero ) then
                 ! two real eigenvalues, compute s*a-w*b
                 h1 = scale1*a( 1_${ik}$, 1_${ik}$ ) - wr1*b( 1_${ik}$, 1_${ik}$ )
                 h2 = scale1*a( 1_${ik}$, 2_${ik}$ ) - wr1*b( 1_${ik}$, 2_${ik}$ )
                 h3 = scale1*a( 2_${ik}$, 2_${ik}$ ) - wr1*b( 2_${ik}$, 2_${ik}$ )
                 rr = stdlib${ii}$_${ri}$lapy2( h1, h2 )
                 qq = stdlib${ii}$_${ri}$lapy2( scale1*a( 2_${ik}$, 1_${ik}$ ), h3 )
                 if( rr>qq ) then
                    ! find right rotation matrix to zero 1,1 element of
                    ! (sa - wb)
                    call stdlib${ii}$_${ri}$lartg( h2, h1, csr, snr, t )
                 else
                    ! find right rotation matrix to zero 2,1 element of
                    ! (sa - wb)
                    call stdlib${ii}$_${ri}$lartg( h3, scale1*a( 2_${ik}$, 1_${ik}$ ), csr, snr, t )
                 end if
                 snr = -snr
                 call stdlib${ii}$_${ri}$rot( 2_${ik}$, a( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, csr, snr )
                 call stdlib${ii}$_${ri}$rot( 2_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, csr, snr )
                 ! compute inf norms of a and b
                 h1 = max( abs( a( 1_${ik}$, 1_${ik}$ ) )+abs( a( 1_${ik}$, 2_${ik}$ ) ),abs( a( 2_${ik}$, 1_${ik}$ ) )+abs( a( 2_${ik}$, 2_${ik}$ ) ) )
                           
                 h2 = max( abs( b( 1_${ik}$, 1_${ik}$ ) )+abs( b( 1_${ik}$, 2_${ik}$ ) ),abs( b( 2_${ik}$, 1_${ik}$ ) )+abs( b( 2_${ik}$, 2_${ik}$ ) ) )
                           
                 if( ( scale1*h1 )>=abs( wr1 )*h2 ) then
                    ! find left rotation matrix q to zero out b(2,1)
                    call stdlib${ii}$_${ri}$lartg( b( 1_${ik}$, 1_${ik}$ ), b( 2_${ik}$, 1_${ik}$ ), csl, snl, r )
                 else
                    ! find left rotation matrix q to zero out a(2,1)
                    call stdlib${ii}$_${ri}$lartg( a( 1_${ik}$, 1_${ik}$ ), a( 2_${ik}$, 1_${ik}$ ), csl, snl, r )
                 end if
                 call stdlib${ii}$_${ri}$rot( 2_${ik}$, a( 1_${ik}$, 1_${ik}$ ), lda, a( 2_${ik}$, 1_${ik}$ ), lda, csl, snl )
                 call stdlib${ii}$_${ri}$rot( 2_${ik}$, b( 1_${ik}$, 1_${ik}$ ), ldb, b( 2_${ik}$, 1_${ik}$ ), ldb, csl, snl )
                 a( 2_${ik}$, 1_${ik}$ ) = zero
                 b( 2_${ik}$, 1_${ik}$ ) = zero
              else
                 ! a pair of complex conjugate eigenvalues
                 ! first compute the svd of the matrix b
                 call stdlib${ii}$_${ri}$lasv2( b( 1_${ik}$, 1_${ik}$ ), b( 1_${ik}$, 2_${ik}$ ), b( 2_${ik}$, 2_${ik}$ ), r, t, snr,csr, snl, csl )
                           
                 ! form (a,b) := q(a,b)z**t where q is left rotation matrix and
                 ! z is right rotation matrix computed from stdlib${ii}$_${ri}$lasv2
                 call stdlib${ii}$_${ri}$rot( 2_${ik}$, a( 1_${ik}$, 1_${ik}$ ), lda, a( 2_${ik}$, 1_${ik}$ ), lda, csl, snl )
                 call stdlib${ii}$_${ri}$rot( 2_${ik}$, b( 1_${ik}$, 1_${ik}$ ), ldb, b( 2_${ik}$, 1_${ik}$ ), ldb, csl, snl )
                 call stdlib${ii}$_${ri}$rot( 2_${ik}$, a( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, a( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, csr, snr )
                 call stdlib${ii}$_${ri}$rot( 2_${ik}$, b( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, b( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, csr, snr )
                 b( 2_${ik}$, 1_${ik}$ ) = zero
                 b( 1_${ik}$, 2_${ik}$ ) = zero
              end if
           end if
           ! unscaling
           a( 1_${ik}$, 1_${ik}$ ) = anorm*a( 1_${ik}$, 1_${ik}$ )
           a( 2_${ik}$, 1_${ik}$ ) = anorm*a( 2_${ik}$, 1_${ik}$ )
           a( 1_${ik}$, 2_${ik}$ ) = anorm*a( 1_${ik}$, 2_${ik}$ )
           a( 2_${ik}$, 2_${ik}$ ) = anorm*a( 2_${ik}$, 2_${ik}$ )
           b( 1_${ik}$, 1_${ik}$ ) = bnorm*b( 1_${ik}$, 1_${ik}$ )
           b( 2_${ik}$, 1_${ik}$ ) = bnorm*b( 2_${ik}$, 1_${ik}$ )
           b( 1_${ik}$, 2_${ik}$ ) = bnorm*b( 1_${ik}$, 2_${ik}$ )
           b( 2_${ik}$, 2_${ik}$ ) = bnorm*b( 2_${ik}$, 2_${ik}$ )
           if( wi==zero ) then
              alphar( 1_${ik}$ ) = a( 1_${ik}$, 1_${ik}$ )
              alphar( 2_${ik}$ ) = a( 2_${ik}$, 2_${ik}$ )
              alphai( 1_${ik}$ ) = zero
              alphai( 2_${ik}$ ) = zero
              beta( 1_${ik}$ ) = b( 1_${ik}$, 1_${ik}$ )
              beta( 2_${ik}$ ) = b( 2_${ik}$, 2_${ik}$ )
           else
              alphar( 1_${ik}$ ) = anorm*wr1 / scale1 / bnorm
              alphai( 1_${ik}$ ) = anorm*wi / scale1 / bnorm
              alphar( 2_${ik}$ ) = alphar( 1_${ik}$ )
              alphai( 2_${ik}$ ) = -alphai( 1_${ik}$ )
              beta( 1_${ik}$ ) = one
              beta( 2_${ik}$ ) = one
           end if
           return
     end subroutine stdlib${ii}$_${ri}$lagv2

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_stgevc( side, howmny, select, n, s, lds, p, ldp, vl,ldvl, vr, ldvr, &
     !! STGEVC computes some or all of the right and/or left eigenvectors of
     !! a pair of real matrices (S,P), where S is a quasi-triangular matrix
     !! and P is upper triangular.  Matrix pairs of this type are produced by
     !! the generalized Schur factorization of a matrix pair (A,B):
     !! A = Q*S*Z**T,  B = Q*P*Z**T
     !! as computed by SGGHRD + SHGEQZ.
     !! The right eigenvector x and the left eigenvector y of (S,P)
     !! corresponding to an eigenvalue w are defined by:
     !! S*x = w*P*x,  (y**H)*S = w*(y**H)*P,
     !! where y**H denotes the conjugate tranpose of y.
     !! The eigenvalues are not input to this routine, but are computed
     !! directly from the diagonal blocks of S and P.
     !! This routine returns the matrices X and/or Y of right and left
     !! eigenvectors of (S,P), or the products Z*X and/or Q*Y,
     !! where Z and Q are input matrices.
     !! If Q and Z are the orthogonal factors from the generalized Schur
     !! factorization of a matrix pair (A,B), then Z*X and Q*Y
     !! are the matrices of right and left eigenvectors of (A,B).
               mm, m, work, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: howmny, side
           integer(${ik}$), intent(out) :: info, m
           integer(${ik}$), intent(in) :: ldp, lds, ldvl, ldvr, mm, n
           ! Array Arguments 
           logical(lk), intent(in) :: select(*)
           real(sp), intent(in) :: p(ldp,*), s(lds,*)
           real(sp), intent(inout) :: vl(ldvl,*), vr(ldvr,*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           real(sp), parameter :: safety = 1.0e+2_sp
           
           ! Local Scalars 
           logical(lk) :: compl, compr, il2by2, ilabad, ilall, ilback, ilbbad, ilcomp, ilcplx, &
                     lsa, lsb
           integer(${ik}$) :: i, ibeg, ieig, iend, ihwmny, iinfo, im, iside, j, ja, jc, je, jr, jw, &
                     na, nw
           real(sp) :: acoef, acoefa, anorm, ascale, bcoefa, bcoefi, bcoefr, big, bignum, bnorm, &
           bscale, cim2a, cim2b, cimaga, cimagb, cre2a, cre2b, creala, crealb, dmin, safmin, &
                     salfar, sbeta, scale, small, temp, temp2, temp2i, temp2r, ulp, xmax, xscale
           ! Local Arrays 
           real(sp) :: bdiag(2_${ik}$), sum(2_${ik}$,2_${ik}$), sums(2_${ik}$,2_${ik}$), sump(2_${ik}$,2_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           if( stdlib_lsame( howmny, 'A' ) ) then
              ihwmny = 1_${ik}$
              ilall = .true.
              ilback = .false.
           else if( stdlib_lsame( howmny, 'S' ) ) then
              ihwmny = 2_${ik}$
              ilall = .false.
              ilback = .false.
           else if( stdlib_lsame( howmny, 'B' ) ) then
              ihwmny = 3_${ik}$
              ilall = .true.
              ilback = .true.
           else
              ihwmny = -1_${ik}$
              ilall = .true.
           end if
           if( stdlib_lsame( side, 'R' ) ) then
              iside = 1_${ik}$
              compl = .false.
              compr = .true.
           else if( stdlib_lsame( side, 'L' ) ) then
              iside = 2_${ik}$
              compl = .true.
              compr = .false.
           else if( stdlib_lsame( side, 'B' ) ) then
              iside = 3_${ik}$
              compl = .true.
              compr = .true.
           else
              iside = -1_${ik}$
           end if
           info = 0_${ik}$
           if( iside<0_${ik}$ ) then
              info = -1_${ik}$
           else if( ihwmny<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lds<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           else if( ldp<max( 1_${ik}$, n ) ) then
              info = -8_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'STGEVC', -info )
              return
           end if
           ! count the number of eigenvectors to be computed
           if( .not.ilall ) then
              im = 0_${ik}$
              ilcplx = .false.
              loop_10: do j = 1, n
                 if( ilcplx ) then
                    ilcplx = .false.
                    cycle loop_10
                 end if
                 if( j<n ) then
                    if( s( j+1, j )/=zero )ilcplx = .true.
                 end if
                 if( ilcplx ) then
                    if( select( j ) .or. select( j+1 ) )im = im + 2_${ik}$
                 else
                    if( select( j ) )im = im + 1_${ik}$
                 end if
              end do loop_10
           else
              im = n
           end if
           ! check 2-by-2 diagonal blocks of a, b
           ilabad = .false.
           ilbbad = .false.
           do j = 1, n - 1
              if( s( j+1, j )/=zero ) then
                 if( p( j, j )==zero .or. p( j+1, j+1 )==zero .or.p( j, j+1 )/=zero )ilbbad = &
                           .true.
                 if( j<n-1 ) then
                    if( s( j+2, j+1 )/=zero )ilabad = .true.
                 end if
              end if
           end do
           if( ilabad ) then
              info = -5_${ik}$
           else if( ilbbad ) then
              info = -7_${ik}$
           else if( compl .and. ldvl<n .or. ldvl<1_${ik}$ ) then
              info = -10_${ik}$
           else if( compr .and. ldvr<n .or. ldvr<1_${ik}$ ) then
              info = -12_${ik}$
           else if( mm<im ) then
              info = -13_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'STGEVC', -info )
              return
           end if
           ! quick return if possible
           m = im
           if( n==0 )return
           ! machine constants
           safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' )
           big = one / safmin
           call stdlib${ii}$_slabad( safmin, big )
           ulp = stdlib${ii}$_slamch( 'EPSILON' )*stdlib${ii}$_slamch( 'BASE' )
           small = safmin*n / ulp
           big = one / small
           bignum = one / ( safmin*n )
           ! compute the 1-norm of each column of the strictly upper triangular
           ! part (i.e., excluding all elements belonging to the diagonal
           ! blocks) of a and b to check for possible overflow in the
           ! triangular solver.
           anorm = abs( s( 1_${ik}$, 1_${ik}$ ) )
           if( n>1_${ik}$ )anorm = anorm + abs( s( 2_${ik}$, 1_${ik}$ ) )
           bnorm = abs( p( 1_${ik}$, 1_${ik}$ ) )
           work( 1_${ik}$ ) = zero
           work( n+1 ) = zero
           do j = 2, n
              temp = zero
              temp2 = zero
              if( s( j, j-1 )==zero ) then
                 iend = j - 1_${ik}$
              else
                 iend = j - 2_${ik}$
              end if
              do i = 1, iend
                 temp = temp + abs( s( i, j ) )
                 temp2 = temp2 + abs( p( i, j ) )
              end do
              work( j ) = temp
              work( n+j ) = temp2
              do i = iend + 1, min( j+1, n )
                 temp = temp + abs( s( i, j ) )
                 temp2 = temp2 + abs( p( i, j ) )
              end do
              anorm = max( anorm, temp )
              bnorm = max( bnorm, temp2 )
           end do
           ascale = one / max( anorm, safmin )
           bscale = one / max( bnorm, safmin )
           ! left eigenvectors
           if( compl ) then
              ieig = 0_${ik}$
              ! main loop over eigenvalues
              ilcplx = .false.
              loop_220: do je = 1, n
                 ! skip this iteration if (a) howmny='s' and select=.false., or
                 ! (b) this would be the second of a complex pair.
                 ! check for complex eigenvalue, so as to be sure of which
                 ! entry(-ies) of select to look at.
                 if( ilcplx ) then
                    ilcplx = .false.
                    cycle loop_220
                 end if
                 nw = 1_${ik}$
                 if( je<n ) then
                    if( s( je+1, je )/=zero ) then
                       ilcplx = .true.
                       nw = 2_${ik}$
                    end if
                 end if
                 if( ilall ) then
                    ilcomp = .true.
                 else if( ilcplx ) then
                    ilcomp = select( je ) .or. select( je+1 )
                 else
                    ilcomp = select( je )
                 end if
                 if( .not.ilcomp )cycle loop_220
                 ! decide if (a) singular pencil, (b) real eigenvalue, or
                 ! (c) complex eigenvalue.
                 if( .not.ilcplx ) then
                    if( abs( s( je, je ) )<=safmin .and.abs( p( je, je ) )<=safmin ) then
                       ! singular matrix pencil -- return unit eigenvector
                       ieig = ieig + 1_${ik}$
                       do jr = 1, n
                          vl( jr, ieig ) = zero
                       end do
                       vl( ieig, ieig ) = one
                       cycle loop_220
                    end if
                 end if
                 ! clear vector
                 do jr = 1, nw*n
                    work( 2_${ik}$*n+jr ) = zero
                 end do
                                                       ! t
                 ! compute coefficients in  ( a a - b b )  y = 0
                    ! a  is  acoef
                    ! b  is  bcoefr + i*bcoefi
                 if( .not.ilcplx ) then
                    ! real eigenvalue
                    temp = one / max( abs( s( je, je ) )*ascale,abs( p( je, je ) )*bscale, safmin &
                              )
                    salfar = ( temp*s( je, je ) )*ascale
                    sbeta = ( temp*p( je, je ) )*bscale
                    acoef = sbeta*ascale
                    bcoefr = salfar*bscale
                    bcoefi = zero
                    ! scale to avoid underflow
                    scale = one
                    lsa = abs( sbeta )>=safmin .and. abs( acoef )<small
                    lsb = abs( salfar )>=safmin .and. abs( bcoefr )<small
                    if( lsa )scale = ( small / abs( sbeta ) )*min( anorm, big )
                    if( lsb )scale = max( scale, ( small / abs( salfar ) )*min( bnorm, big ) )
                              
                    if( lsa .or. lsb ) then
                       scale = min( scale, one /( safmin*max( one, abs( acoef ),abs( bcoefr ) ) ) &
                                 )
                       if( lsa ) then
                          acoef = ascale*( scale*sbeta )
                       else
                          acoef = scale*acoef
                       end if
                       if( lsb ) then
                          bcoefr = bscale*( scale*salfar )
                       else
                          bcoefr = scale*bcoefr
                       end if
                    end if
                    acoefa = abs( acoef )
                    bcoefa = abs( bcoefr )
                    ! first component is 1
                    work( 2_${ik}$*n+je ) = one
                    xmax = one
                 else
                    ! complex eigenvalue
                    call stdlib${ii}$_slag2( s( je, je ), lds, p( je, je ), ldp,safmin*safety, acoef, &
                              temp, bcoefr, temp2,bcoefi )
                    bcoefi = -bcoefi
                    if( bcoefi==zero ) then
                       info = je
                       return
                    end if
                    ! scale to avoid over/underflow
                    acoefa = abs( acoef )
                    bcoefa = abs( bcoefr ) + abs( bcoefi )
                    scale = one
                    if( acoefa*ulp<safmin .and. acoefa>=safmin )scale = ( safmin / ulp ) / &
                              acoefa
                    if( bcoefa*ulp<safmin .and. bcoefa>=safmin )scale = max( scale, ( safmin / &
                              ulp ) / bcoefa )
                    if( safmin*acoefa>ascale )scale = ascale / ( safmin*acoefa )
                    if( safmin*bcoefa>bscale )scale = min( scale, bscale / ( safmin*bcoefa ) )
                              
                    if( scale/=one ) then
                       acoef = scale*acoef
                       acoefa = abs( acoef )
                       bcoefr = scale*bcoefr
                       bcoefi = scale*bcoefi
                       bcoefa = abs( bcoefr ) + abs( bcoefi )
                    end if
                    ! compute first two components of eigenvector
                    temp = acoef*s( je+1, je )
                    temp2r = acoef*s( je, je ) - bcoefr*p( je, je )
                    temp2i = -bcoefi*p( je, je )
                    if( abs( temp )>abs( temp2r )+abs( temp2i ) ) then
                       work( 2_${ik}$*n+je ) = one
                       work( 3_${ik}$*n+je ) = zero
                       work( 2_${ik}$*n+je+1 ) = -temp2r / temp
                       work( 3_${ik}$*n+je+1 ) = -temp2i / temp
                    else
                       work( 2_${ik}$*n+je+1 ) = one
                       work( 3_${ik}$*n+je+1 ) = zero
                       temp = acoef*s( je, je+1 )
                       work( 2_${ik}$*n+je ) = ( bcoefr*p( je+1, je+1 )-acoef*s( je+1, je+1 ) ) / &
                                 temp
                       work( 3_${ik}$*n+je ) = bcoefi*p( je+1, je+1 ) / temp
                    end if
                    xmax = max( abs( work( 2_${ik}$*n+je ) )+abs( work( 3_${ik}$*n+je ) ),abs( work( 2_${ik}$*n+je+1 ) &
                              )+abs( work( 3_${ik}$*n+je+1 ) ) )
                 end if
                 dmin = max( ulp*acoefa*anorm, ulp*bcoefa*bnorm, safmin )
                                                 ! t
                 ! triangular solve of  (a a - b b)  y = 0
                                         ! t
                 ! (rowwise in  (a a - b b) , or columnwise in (a a - b b) )
                 il2by2 = .false.
                 loop_160: do j = je + nw, n
                    if( il2by2 ) then
                       il2by2 = .false.
                       cycle loop_160
                    end if
                    na = 1_${ik}$
                    bdiag( 1_${ik}$ ) = p( j, j )
                    if( j<n ) then
                       if( s( j+1, j )/=zero ) then
                          il2by2 = .true.
                          bdiag( 2_${ik}$ ) = p( j+1, j+1 )
                          na = 2_${ik}$
                       end if
                    end if
                    ! check whether scaling is necessary for dot products
                    xscale = one / max( one, xmax )
                    temp = max( work( j ), work( n+j ),acoefa*work( j )+bcoefa*work( n+j ) )
                              
                    if( il2by2 )temp = max( temp, work( j+1 ), work( n+j+1 ),acoefa*work( j+1 )+&
                              bcoefa*work( n+j+1 ) )
                    if( temp>bignum*xscale ) then
                       do jw = 0, nw - 1
                          do jr = je, j - 1
                             work( ( jw+2 )*n+jr ) = xscale*work( ( jw+2 )*n+jr )
                          end do
                       end do
                       xmax = xmax*xscale
                    end if
                    ! compute dot products
                          ! j-1
                    ! sum = sum  conjg( a*s(k,j) - b*p(k,j) )*x(k)
                          ! k=je
                    ! to reduce the op count, this is done as
                    ! _        j-1                  _        j-1
                    ! a*conjg( sum  s(k,j)*x(k) ) - b*conjg( sum  p(k,j)*x(k) )
                             ! k=je                          k=je
                    ! which may cause underflow problems if a or b are close
                    ! to underflow.  (e.g., less than small.)
                    do jw = 1, nw
                       do ja = 1, na
                          sums( ja, jw ) = zero
                          sump( ja, jw ) = zero
                          do jr = je, j - 1
                             sums( ja, jw ) = sums( ja, jw ) +s( jr, j+ja-1 )*work( ( jw+1 )*n+jr &
                                       )
                             sump( ja, jw ) = sump( ja, jw ) +p( jr, j+ja-1 )*work( ( jw+1 )*n+jr &
                                       )
                          end do
                       end do
                    end do
                    do ja = 1, na
                       if( ilcplx ) then
                          sum( ja, 1_${ik}$ ) = -acoef*sums( ja, 1_${ik}$ ) +bcoefr*sump( ja, 1_${ik}$ ) -bcoefi*sump( &
                                    ja, 2_${ik}$ )
                          sum( ja, 2_${ik}$ ) = -acoef*sums( ja, 2_${ik}$ ) +bcoefr*sump( ja, 2_${ik}$ ) +bcoefi*sump( &
                                    ja, 1_${ik}$ )
                       else
                          sum( ja, 1_${ik}$ ) = -acoef*sums( ja, 1_${ik}$ ) +bcoefr*sump( ja, 1_${ik}$ )
                       end if
                    end do
                                        ! t
                    ! solve  ( a a - b b )  y = sum(,)
                    ! with scaling and perturbation of the denominator
                    call stdlib${ii}$_slaln2( .true., na, nw, dmin, acoef, s( j, j ), lds,bdiag( 1_${ik}$ ), &
                    bdiag( 2_${ik}$ ), sum, 2_${ik}$, bcoefr,bcoefi, work( 2_${ik}$*n+j ), n, scale, temp,iinfo )
                              
                    if( scale<one ) then
                       do jw = 0, nw - 1
                          do jr = je, j - 1
                             work( ( jw+2 )*n+jr ) = scale*work( ( jw+2 )*n+jr )
                          end do
                       end do
                       xmax = scale*xmax
                    end if
                    xmax = max( xmax, temp )
                 end do loop_160
                 ! copy eigenvector to vl, back transforming if
                 ! howmny='b'.
                 ieig = ieig + 1_${ik}$
                 if( ilback ) then
                    do jw = 0, nw - 1
                       call stdlib${ii}$_sgemv( 'N', n, n+1-je, one, vl( 1_${ik}$, je ), ldvl,work( ( jw+2 )*n+&
                                 je ), 1_${ik}$, zero,work( ( jw+4 )*n+1 ), 1_${ik}$ )
                    end do
                    call stdlib${ii}$_slacpy( ' ', n, nw, work( 4_${ik}$*n+1 ), n, vl( 1_${ik}$, je ),ldvl )
                    ibeg = 1_${ik}$
                 else
                    call stdlib${ii}$_slacpy( ' ', n, nw, work( 2_${ik}$*n+1 ), n, vl( 1_${ik}$, ieig ),ldvl )
                    ibeg = je
                 end if
                 ! scale eigenvector
                 xmax = zero
                 if( ilcplx ) then
                    do j = ibeg, n
                       xmax = max( xmax, abs( vl( j, ieig ) )+abs( vl( j, ieig+1 ) ) )
                    end do
                 else
                    do j = ibeg, n
                       xmax = max( xmax, abs( vl( j, ieig ) ) )
                    end do
                 end if
                 if( xmax>safmin ) then
                    xscale = one / xmax
                    do jw = 0, nw - 1
                       do jr = ibeg, n
                          vl( jr, ieig+jw ) = xscale*vl( jr, ieig+jw )
                       end do
                    end do
                 end if
                 ieig = ieig + nw - 1_${ik}$
              end do loop_220
           end if
           ! right eigenvectors
           if( compr ) then
              ieig = im + 1_${ik}$
              ! main loop over eigenvalues
              ilcplx = .false.
              loop_500: do je = n, 1, -1
                 ! skip this iteration if (a) howmny='s' and select=.false., or
                 ! (b) this would be the second of a complex pair.
                 ! check for complex eigenvalue, so as to be sure of which
                 ! entry(-ies) of select to look at -- if complex, select(je)
                 ! or select(je-1).
                 ! if this is a complex pair, the 2-by-2 diagonal block
                 ! corresponding to the eigenvalue is in rows/columns je-1:je
                 if( ilcplx ) then
                    ilcplx = .false.
                    cycle loop_500
                 end if
                 nw = 1_${ik}$
                 if( je>1_${ik}$ ) then
                    if( s( je, je-1 )/=zero ) then
                       ilcplx = .true.
                       nw = 2_${ik}$
                    end if
                 end if
                 if( ilall ) then
                    ilcomp = .true.
                 else if( ilcplx ) then
                    ilcomp = select( je ) .or. select( je-1 )
                 else
                    ilcomp = select( je )
                 end if
                 if( .not.ilcomp )cycle loop_500
                 ! decide if (a) singular pencil, (b) real eigenvalue, or
                 ! (c) complex eigenvalue.
                 if( .not.ilcplx ) then
                    if( abs( s( je, je ) )<=safmin .and.abs( p( je, je ) )<=safmin ) then
                       ! singular matrix pencil -- unit eigenvector
                       ieig = ieig - 1_${ik}$
                       do jr = 1, n
                          vr( jr, ieig ) = zero
                       end do
                       vr( ieig, ieig ) = one
                       cycle loop_500
                    end if
                 end if
                 ! clear vector
                 do jw = 0, nw - 1
                    do jr = 1, n
                       work( ( jw+2 )*n+jr ) = zero
                    end do
                 end do
                 ! compute coefficients in  ( a a - b b ) x = 0
                    ! a  is  acoef
                    ! b  is  bcoefr + i*bcoefi
                 if( .not.ilcplx ) then
                    ! real eigenvalue
                    temp = one / max( abs( s( je, je ) )*ascale,abs( p( je, je ) )*bscale, safmin &
                              )
                    salfar = ( temp*s( je, je ) )*ascale
                    sbeta = ( temp*p( je, je ) )*bscale
                    acoef = sbeta*ascale
                    bcoefr = salfar*bscale
                    bcoefi = zero
                    ! scale to avoid underflow
                    scale = one
                    lsa = abs( sbeta )>=safmin .and. abs( acoef )<small
                    lsb = abs( salfar )>=safmin .and. abs( bcoefr )<small
                    if( lsa )scale = ( small / abs( sbeta ) )*min( anorm, big )
                    if( lsb )scale = max( scale, ( small / abs( salfar ) )*min( bnorm, big ) )
                              
                    if( lsa .or. lsb ) then
                       scale = min( scale, one /( safmin*max( one, abs( acoef ),abs( bcoefr ) ) ) &
                                 )
                       if( lsa ) then
                          acoef = ascale*( scale*sbeta )
                       else
                          acoef = scale*acoef
                       end if
                       if( lsb ) then
                          bcoefr = bscale*( scale*salfar )
                       else
                          bcoefr = scale*bcoefr
                       end if
                    end if
                    acoefa = abs( acoef )
                    bcoefa = abs( bcoefr )
                    ! first component is 1
                    work( 2_${ik}$*n+je ) = one
                    xmax = one
                    ! compute contribution from column je of a and b to sum
                    ! (see "further details", above.)
                    do jr = 1, je - 1
                       work( 2_${ik}$*n+jr ) = bcoefr*p( jr, je ) -acoef*s( jr, je )
                    end do
                 else
                    ! complex eigenvalue
                    call stdlib${ii}$_slag2( s( je-1, je-1 ), lds, p( je-1, je-1 ), ldp,safmin*safety, &
                              acoef, temp, bcoefr, temp2,bcoefi )
                    if( bcoefi==zero ) then
                       info = je - 1_${ik}$
                       return
                    end if
                    ! scale to avoid over/underflow
                    acoefa = abs( acoef )
                    bcoefa = abs( bcoefr ) + abs( bcoefi )
                    scale = one
                    if( acoefa*ulp<safmin .and. acoefa>=safmin )scale = ( safmin / ulp ) / &
                              acoefa
                    if( bcoefa*ulp<safmin .and. bcoefa>=safmin )scale = max( scale, ( safmin / &
                              ulp ) / bcoefa )
                    if( safmin*acoefa>ascale )scale = ascale / ( safmin*acoefa )
                    if( safmin*bcoefa>bscale )scale = min( scale, bscale / ( safmin*bcoefa ) )
                              
                    if( scale/=one ) then
                       acoef = scale*acoef
                       acoefa = abs( acoef )
                       bcoefr = scale*bcoefr
                       bcoefi = scale*bcoefi
                       bcoefa = abs( bcoefr ) + abs( bcoefi )
                    end if
                    ! compute first two components of eigenvector
                    ! and contribution to sums
                    temp = acoef*s( je, je-1 )
                    temp2r = acoef*s( je, je ) - bcoefr*p( je, je )
                    temp2i = -bcoefi*p( je, je )
                    if( abs( temp )>=abs( temp2r )+abs( temp2i ) ) then
                       work( 2_${ik}$*n+je ) = one
                       work( 3_${ik}$*n+je ) = zero
                       work( 2_${ik}$*n+je-1 ) = -temp2r / temp
                       work( 3_${ik}$*n+je-1 ) = -temp2i / temp
                    else
                       work( 2_${ik}$*n+je-1 ) = one
                       work( 3_${ik}$*n+je-1 ) = zero
                       temp = acoef*s( je-1, je )
                       work( 2_${ik}$*n+je ) = ( bcoefr*p( je-1, je-1 )-acoef*s( je-1, je-1 ) ) / &
                                 temp
                       work( 3_${ik}$*n+je ) = bcoefi*p( je-1, je-1 ) / temp
                    end if
                    xmax = max( abs( work( 2_${ik}$*n+je ) )+abs( work( 3_${ik}$*n+je ) ),abs( work( 2_${ik}$*n+je-1 ) &
                              )+abs( work( 3_${ik}$*n+je-1 ) ) )
                    ! compute contribution from columns je and je-1
                    ! of a and b to the sums.
                    creala = acoef*work( 2_${ik}$*n+je-1 )
                    cimaga = acoef*work( 3_${ik}$*n+je-1 )
                    crealb = bcoefr*work( 2_${ik}$*n+je-1 ) -bcoefi*work( 3_${ik}$*n+je-1 )
                    cimagb = bcoefi*work( 2_${ik}$*n+je-1 ) +bcoefr*work( 3_${ik}$*n+je-1 )
                    cre2a = acoef*work( 2_${ik}$*n+je )
                    cim2a = acoef*work( 3_${ik}$*n+je )
                    cre2b = bcoefr*work( 2_${ik}$*n+je ) - bcoefi*work( 3_${ik}$*n+je )
                    cim2b = bcoefi*work( 2_${ik}$*n+je ) + bcoefr*work( 3_${ik}$*n+je )
                    do jr = 1, je - 2
                       work( 2_${ik}$*n+jr ) = -creala*s( jr, je-1 ) +crealb*p( jr, je-1 ) -cre2a*s( jr, &
                                 je ) + cre2b*p( jr, je )
                       work( 3_${ik}$*n+jr ) = -cimaga*s( jr, je-1 ) +cimagb*p( jr, je-1 ) -cim2a*s( jr, &
                                 je ) + cim2b*p( jr, je )
                    end do
                 end if
                 dmin = max( ulp*acoefa*anorm, ulp*bcoefa*bnorm, safmin )
                 ! columnwise triangular solve of  (a a - b b)  x = 0
                 il2by2 = .false.
                 loop_370: do j = je - nw, 1, -1
                    ! if a 2-by-2 block, is in position j-1:j, wait until
                    ! next iteration to process it (when it will be j:j+1)
                    if( .not.il2by2 .and. j>1_${ik}$ ) then
                       if( s( j, j-1 )/=zero ) then
                          il2by2 = .true.
                          cycle loop_370
                       end if
                    end if
                    bdiag( 1_${ik}$ ) = p( j, j )
                    if( il2by2 ) then
                       na = 2_${ik}$
                       bdiag( 2_${ik}$ ) = p( j+1, j+1 )
                    else
                       na = 1_${ik}$
                    end if
                    ! compute x(j) (and x(j+1), if 2-by-2 block)
                    call stdlib${ii}$_slaln2( .false., na, nw, dmin, acoef, s( j, j ),lds, bdiag( 1_${ik}$ ), &
                    bdiag( 2_${ik}$ ), work( 2_${ik}$*n+j ),n, bcoefr, bcoefi, sum, 2_${ik}$, scale, temp,iinfo )
                              
                    if( scale<one ) then
                       do jw = 0, nw - 1
                          do jr = 1, je
                             work( ( jw+2 )*n+jr ) = scale*work( ( jw+2 )*n+jr )
                          end do
                       end do
                    end if
                    xmax = max( scale*xmax, temp )
                    do jw = 1, nw
                       do ja = 1, na
                          work( ( jw+1 )*n+j+ja-1 ) = sum( ja, jw )
                       end do
                    end do
                    ! w = w + x(j)*(a s(*,j) - b p(*,j) ) with scaling
                    if( j>1_${ik}$ ) then
                       ! check whether scaling is necessary for sum.
                       xscale = one / max( one, xmax )
                       temp = acoefa*work( j ) + bcoefa*work( n+j )
                       if( il2by2 )temp = max( temp, acoefa*work( j+1 )+bcoefa*work( n+j+1 ) )
                                 
                       temp = max( temp, acoefa, bcoefa )
                       if( temp>bignum*xscale ) then
                          do jw = 0, nw - 1
                             do jr = 1, je
                                work( ( jw+2 )*n+jr ) = xscale*work( ( jw+2 )*n+jr )
                             end do
                          end do
                          xmax = xmax*xscale
                       end if
                       ! compute the contributions of the off-diagonals of
                       ! column j (and j+1, if 2-by-2 block) of a and b to the
                       ! sums.
                       do ja = 1, na
                          if( ilcplx ) then
                             creala = acoef*work( 2_${ik}$*n+j+ja-1 )
                             cimaga = acoef*work( 3_${ik}$*n+j+ja-1 )
                             crealb = bcoefr*work( 2_${ik}$*n+j+ja-1 ) -bcoefi*work( 3_${ik}$*n+j+ja-1 )
                             cimagb = bcoefi*work( 2_${ik}$*n+j+ja-1 ) +bcoefr*work( 3_${ik}$*n+j+ja-1 )
                             do jr = 1, j - 1
                                work( 2_${ik}$*n+jr ) = work( 2_${ik}$*n+jr ) -creala*s( jr, j+ja-1 ) +crealb*p(&
                                           jr, j+ja-1 )
                                work( 3_${ik}$*n+jr ) = work( 3_${ik}$*n+jr ) -cimaga*s( jr, j+ja-1 ) +cimagb*p(&
                                           jr, j+ja-1 )
                             end do
                          else
                             creala = acoef*work( 2_${ik}$*n+j+ja-1 )
                             crealb = bcoefr*work( 2_${ik}$*n+j+ja-1 )
                             do jr = 1, j - 1
                                work( 2_${ik}$*n+jr ) = work( 2_${ik}$*n+jr ) -creala*s( jr, j+ja-1 ) +crealb*p(&
                                           jr, j+ja-1 )
                             end do
                          end if
                       end do
                    end if
                    il2by2 = .false.
                 end do loop_370
                 ! copy eigenvector to vr, back transforming if
                 ! howmny='b'.
                 ieig = ieig - nw
                 if( ilback ) then
                    do jw = 0, nw - 1
                       do jr = 1, n
                          work( ( jw+4 )*n+jr ) = work( ( jw+2 )*n+1 )*vr( jr, 1_${ik}$ )
                       end do
                       ! a series of compiler directives to defeat
                       ! vectorization for the next loop
                       do jc = 2, je
                          do jr = 1, n
                             work( ( jw+4 )*n+jr ) = work( ( jw+4 )*n+jr ) +work( ( jw+2 )*n+jc )&
                                       *vr( jr, jc )
                          end do
                       end do
                    end do
                    do jw = 0, nw - 1
                       do jr = 1, n
                          vr( jr, ieig+jw ) = work( ( jw+4 )*n+jr )
                       end do
                    end do
                    iend = n
                 else
                    do jw = 0, nw - 1
                       do jr = 1, n
                          vr( jr, ieig+jw ) = work( ( jw+2 )*n+jr )
                       end do
                    end do
                    iend = je
                 end if
                 ! scale eigenvector
                 xmax = zero
                 if( ilcplx ) then
                    do j = 1, iend
                       xmax = max( xmax, abs( vr( j, ieig ) )+abs( vr( j, ieig+1 ) ) )
                    end do
                 else
                    do j = 1, iend
                       xmax = max( xmax, abs( vr( j, ieig ) ) )
                    end do
                 end if
                 if( xmax>safmin ) then
                    xscale = one / xmax
                    do jw = 0, nw - 1
                       do jr = 1, iend
                          vr( jr, ieig+jw ) = xscale*vr( jr, ieig+jw )
                       end do
                    end do
                 end if
              end do loop_500
           end if
           return
     end subroutine stdlib${ii}$_stgevc

     pure module subroutine stdlib${ii}$_dtgevc( side, howmny, select, n, s, lds, p, ldp, vl,ldvl, vr, ldvr, &
     !! DTGEVC computes some or all of the right and/or left eigenvectors of
     !! a pair of real matrices (S,P), where S is a quasi-triangular matrix
     !! and P is upper triangular.  Matrix pairs of this type are produced by
     !! the generalized Schur factorization of a matrix pair (A,B):
     !! A = Q*S*Z**T,  B = Q*P*Z**T
     !! as computed by DGGHRD + DHGEQZ.
     !! The right eigenvector x and the left eigenvector y of (S,P)
     !! corresponding to an eigenvalue w are defined by:
     !! S*x = w*P*x,  (y**H)*S = w*(y**H)*P,
     !! where y**H denotes the conjugate tranpose of y.
     !! The eigenvalues are not input to this routine, but are computed
     !! directly from the diagonal blocks of S and P.
     !! This routine returns the matrices X and/or Y of right and left
     !! eigenvectors of (S,P), or the products Z*X and/or Q*Y,
     !! where Z and Q are input matrices.
     !! If Q and Z are the orthogonal factors from the generalized Schur
     !! factorization of a matrix pair (A,B), then Z*X and Q*Y
     !! are the matrices of right and left eigenvectors of (A,B).
               mm, m, work, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: howmny, side
           integer(${ik}$), intent(out) :: info, m
           integer(${ik}$), intent(in) :: ldp, lds, ldvl, ldvr, mm, n
           ! Array Arguments 
           logical(lk), intent(in) :: select(*)
           real(dp), intent(in) :: p(ldp,*), s(lds,*)
           real(dp), intent(inout) :: vl(ldvl,*), vr(ldvr,*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           real(dp), parameter :: safety = 1.0e+2_dp
           
           ! Local Scalars 
           logical(lk) :: compl, compr, il2by2, ilabad, ilall, ilback, ilbbad, ilcomp, ilcplx, &
                     lsa, lsb
           integer(${ik}$) :: i, ibeg, ieig, iend, ihwmny, iinfo, im, iside, j, ja, jc, je, jr, jw, &
                     na, nw
           real(dp) :: acoef, acoefa, anorm, ascale, bcoefa, bcoefi, bcoefr, big, bignum, bnorm, &
           bscale, cim2a, cim2b, cimaga, cimagb, cre2a, cre2b, creala, crealb, dmin, safmin, &
                     salfar, sbeta, scale, small, temp, temp2, temp2i, temp2r, ulp, xmax, xscale
           ! Local Arrays 
           real(dp) :: bdiag(2_${ik}$), sum(2_${ik}$,2_${ik}$), sums(2_${ik}$,2_${ik}$), sump(2_${ik}$,2_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           if( stdlib_lsame( howmny, 'A' ) ) then
              ihwmny = 1_${ik}$
              ilall = .true.
              ilback = .false.
           else if( stdlib_lsame( howmny, 'S' ) ) then
              ihwmny = 2_${ik}$
              ilall = .false.
              ilback = .false.
           else if( stdlib_lsame( howmny, 'B' ) ) then
              ihwmny = 3_${ik}$
              ilall = .true.
              ilback = .true.
           else
              ihwmny = -1_${ik}$
              ilall = .true.
           end if
           if( stdlib_lsame( side, 'R' ) ) then
              iside = 1_${ik}$
              compl = .false.
              compr = .true.
           else if( stdlib_lsame( side, 'L' ) ) then
              iside = 2_${ik}$
              compl = .true.
              compr = .false.
           else if( stdlib_lsame( side, 'B' ) ) then
              iside = 3_${ik}$
              compl = .true.
              compr = .true.
           else
              iside = -1_${ik}$
           end if
           info = 0_${ik}$
           if( iside<0_${ik}$ ) then
              info = -1_${ik}$
           else if( ihwmny<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lds<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           else if( ldp<max( 1_${ik}$, n ) ) then
              info = -8_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTGEVC', -info )
              return
           end if
           ! count the number of eigenvectors to be computed
           if( .not.ilall ) then
              im = 0_${ik}$
              ilcplx = .false.
              loop_10: do j = 1, n
                 if( ilcplx ) then
                    ilcplx = .false.
                    cycle loop_10
                 end if
                 if( j<n ) then
                    if( s( j+1, j )/=zero )ilcplx = .true.
                 end if
                 if( ilcplx ) then
                    if( select( j ) .or. select( j+1 ) )im = im + 2_${ik}$
                 else
                    if( select( j ) )im = im + 1_${ik}$
                 end if
              end do loop_10
           else
              im = n
           end if
           ! check 2-by-2 diagonal blocks of a, b
           ilabad = .false.
           ilbbad = .false.
           do j = 1, n - 1
              if( s( j+1, j )/=zero ) then
                 if( p( j, j )==zero .or. p( j+1, j+1 )==zero .or.p( j, j+1 )/=zero )ilbbad = &
                           .true.
                 if( j<n-1 ) then
                    if( s( j+2, j+1 )/=zero )ilabad = .true.
                 end if
              end if
           end do
           if( ilabad ) then
              info = -5_${ik}$
           else if( ilbbad ) then
              info = -7_${ik}$
           else if( compl .and. ldvl<n .or. ldvl<1_${ik}$ ) then
              info = -10_${ik}$
           else if( compr .and. ldvr<n .or. ldvr<1_${ik}$ ) then
              info = -12_${ik}$
           else if( mm<im ) then
              info = -13_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTGEVC', -info )
              return
           end if
           ! quick return if possible
           m = im
           if( n==0 )return
           ! machine constants
           safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' )
           big = one / safmin
           call stdlib${ii}$_dlabad( safmin, big )
           ulp = stdlib${ii}$_dlamch( 'EPSILON' )*stdlib${ii}$_dlamch( 'BASE' )
           small = safmin*n / ulp
           big = one / small
           bignum = one / ( safmin*n )
           ! compute the 1-norm of each column of the strictly upper triangular
           ! part (i.e., excluding all elements belonging to the diagonal
           ! blocks) of a and b to check for possible overflow in the
           ! triangular solver.
           anorm = abs( s( 1_${ik}$, 1_${ik}$ ) )
           if( n>1_${ik}$ )anorm = anorm + abs( s( 2_${ik}$, 1_${ik}$ ) )
           bnorm = abs( p( 1_${ik}$, 1_${ik}$ ) )
           work( 1_${ik}$ ) = zero
           work( n+1 ) = zero
           do j = 2, n
              temp = zero
              temp2 = zero
              if( s( j, j-1 )==zero ) then
                 iend = j - 1_${ik}$
              else
                 iend = j - 2_${ik}$
              end if
              do i = 1, iend
                 temp = temp + abs( s( i, j ) )
                 temp2 = temp2 + abs( p( i, j ) )
              end do
              work( j ) = temp
              work( n+j ) = temp2
              do i = iend + 1, min( j+1, n )
                 temp = temp + abs( s( i, j ) )
                 temp2 = temp2 + abs( p( i, j ) )
              end do
              anorm = max( anorm, temp )
              bnorm = max( bnorm, temp2 )
           end do
           ascale = one / max( anorm, safmin )
           bscale = one / max( bnorm, safmin )
           ! left eigenvectors
           if( compl ) then
              ieig = 0_${ik}$
              ! main loop over eigenvalues
              ilcplx = .false.
              loop_220: do je = 1, n
                 ! skip this iteration if (a) howmny='s' and select=.false., or
                 ! (b) this would be the second of a complex pair.
                 ! check for complex eigenvalue, so as to be sure of which
                 ! entry(-ies) of select to look at.
                 if( ilcplx ) then
                    ilcplx = .false.
                    cycle loop_220
                 end if
                 nw = 1_${ik}$
                 if( je<n ) then
                    if( s( je+1, je )/=zero ) then
                       ilcplx = .true.
                       nw = 2_${ik}$
                    end if
                 end if
                 if( ilall ) then
                    ilcomp = .true.
                 else if( ilcplx ) then
                    ilcomp = select( je ) .or. select( je+1 )
                 else
                    ilcomp = select( je )
                 end if
                 if( .not.ilcomp )cycle loop_220
                 ! decide if (a) singular pencil, (b) real eigenvalue, or
                 ! (c) complex eigenvalue.
                 if( .not.ilcplx ) then
                    if( abs( s( je, je ) )<=safmin .and.abs( p( je, je ) )<=safmin ) then
                       ! singular matrix pencil -- return unit eigenvector
                       ieig = ieig + 1_${ik}$
                       do jr = 1, n
                          vl( jr, ieig ) = zero
                       end do
                       vl( ieig, ieig ) = one
                       cycle loop_220
                    end if
                 end if
                 ! clear vector
                 do jr = 1, nw*n
                    work( 2_${ik}$*n+jr ) = zero
                 end do
                                                       ! t
                 ! compute coefficients in  ( a a - b b )  y = 0
                    ! a  is  acoef
                    ! b  is  bcoefr + i*bcoefi
                 if( .not.ilcplx ) then
                    ! real eigenvalue
                    temp = one / max( abs( s( je, je ) )*ascale,abs( p( je, je ) )*bscale, safmin &
                              )
                    salfar = ( temp*s( je, je ) )*ascale
                    sbeta = ( temp*p( je, je ) )*bscale
                    acoef = sbeta*ascale
                    bcoefr = salfar*bscale
                    bcoefi = zero
                    ! scale to avoid underflow
                    scale = one
                    lsa = abs( sbeta )>=safmin .and. abs( acoef )<small
                    lsb = abs( salfar )>=safmin .and. abs( bcoefr )<small
                    if( lsa )scale = ( small / abs( sbeta ) )*min( anorm, big )
                    if( lsb )scale = max( scale, ( small / abs( salfar ) )*min( bnorm, big ) )
                              
                    if( lsa .or. lsb ) then
                       scale = min( scale, one /( safmin*max( one, abs( acoef ),abs( bcoefr ) ) ) &
                                 )
                       if( lsa ) then
                          acoef = ascale*( scale*sbeta )
                       else
                          acoef = scale*acoef
                       end if
                       if( lsb ) then
                          bcoefr = bscale*( scale*salfar )
                       else
                          bcoefr = scale*bcoefr
                       end if
                    end if
                    acoefa = abs( acoef )
                    bcoefa = abs( bcoefr )
                    ! first component is 1
                    work( 2_${ik}$*n+je ) = one
                    xmax = one
                 else
                    ! complex eigenvalue
                    call stdlib${ii}$_dlag2( s( je, je ), lds, p( je, je ), ldp,safmin*safety, acoef, &
                              temp, bcoefr, temp2,bcoefi )
                    bcoefi = -bcoefi
                    if( bcoefi==zero ) then
                       info = je
                       return
                    end if
                    ! scale to avoid over/underflow
                    acoefa = abs( acoef )
                    bcoefa = abs( bcoefr ) + abs( bcoefi )
                    scale = one
                    if( acoefa*ulp<safmin .and. acoefa>=safmin )scale = ( safmin / ulp ) / &
                              acoefa
                    if( bcoefa*ulp<safmin .and. bcoefa>=safmin )scale = max( scale, ( safmin / &
                              ulp ) / bcoefa )
                    if( safmin*acoefa>ascale )scale = ascale / ( safmin*acoefa )
                    if( safmin*bcoefa>bscale )scale = min( scale, bscale / ( safmin*bcoefa ) )
                              
                    if( scale/=one ) then
                       acoef = scale*acoef
                       acoefa = abs( acoef )
                       bcoefr = scale*bcoefr
                       bcoefi = scale*bcoefi
                       bcoefa = abs( bcoefr ) + abs( bcoefi )
                    end if
                    ! compute first two components of eigenvector
                    temp = acoef*s( je+1, je )
                    temp2r = acoef*s( je, je ) - bcoefr*p( je, je )
                    temp2i = -bcoefi*p( je, je )
                    if( abs( temp )>abs( temp2r )+abs( temp2i ) ) then
                       work( 2_${ik}$*n+je ) = one
                       work( 3_${ik}$*n+je ) = zero
                       work( 2_${ik}$*n+je+1 ) = -temp2r / temp
                       work( 3_${ik}$*n+je+1 ) = -temp2i / temp
                    else
                       work( 2_${ik}$*n+je+1 ) = one
                       work( 3_${ik}$*n+je+1 ) = zero
                       temp = acoef*s( je, je+1 )
                       work( 2_${ik}$*n+je ) = ( bcoefr*p( je+1, je+1 )-acoef*s( je+1, je+1 ) ) / &
                                 temp
                       work( 3_${ik}$*n+je ) = bcoefi*p( je+1, je+1 ) / temp
                    end if
                    xmax = max( abs( work( 2_${ik}$*n+je ) )+abs( work( 3_${ik}$*n+je ) ),abs( work( 2_${ik}$*n+je+1 ) &
                              )+abs( work( 3_${ik}$*n+je+1 ) ) )
                 end if
                 dmin = max( ulp*acoefa*anorm, ulp*bcoefa*bnorm, safmin )
                                                 ! t
                 ! triangular solve of  (a a - b b)  y = 0
                                         ! t
                 ! (rowwise in  (a a - b b) , or columnwise in (a a - b b) )
                 il2by2 = .false.
                 loop_160: do j = je + nw, n
                    if( il2by2 ) then
                       il2by2 = .false.
                       cycle loop_160
                    end if
                    na = 1_${ik}$
                    bdiag( 1_${ik}$ ) = p( j, j )
                    if( j<n ) then
                       if( s( j+1, j )/=zero ) then
                          il2by2 = .true.
                          bdiag( 2_${ik}$ ) = p( j+1, j+1 )
                          na = 2_${ik}$
                       end if
                    end if
                    ! check whether scaling is necessary for dot products
                    xscale = one / max( one, xmax )
                    temp = max( work( j ), work( n+j ),acoefa*work( j )+bcoefa*work( n+j ) )
                              
                    if( il2by2 )temp = max( temp, work( j+1 ), work( n+j+1 ),acoefa*work( j+1 )+&
                              bcoefa*work( n+j+1 ) )
                    if( temp>bignum*xscale ) then
                       do jw = 0, nw - 1
                          do jr = je, j - 1
                             work( ( jw+2 )*n+jr ) = xscale*work( ( jw+2 )*n+jr )
                          end do
                       end do
                       xmax = xmax*xscale
                    end if
                    ! compute dot products
                          ! j-1
                    ! sum = sum  conjg( a*s(k,j) - b*p(k,j) )*x(k)
                          ! k=je
                    ! to reduce the op count, this is done as
                    ! _        j-1                  _        j-1
                    ! a*conjg( sum  s(k,j)*x(k) ) - b*conjg( sum  p(k,j)*x(k) )
                             ! k=je                          k=je
                    ! which may cause underflow problems if a or b are close
                    ! to underflow.  (e.g., less than small.)
                    do jw = 1, nw
                       do ja = 1, na
                          sums( ja, jw ) = zero
                          sump( ja, jw ) = zero
                          do jr = je, j - 1
                             sums( ja, jw ) = sums( ja, jw ) +s( jr, j+ja-1 )*work( ( jw+1 )*n+jr &
                                       )
                             sump( ja, jw ) = sump( ja, jw ) +p( jr, j+ja-1 )*work( ( jw+1 )*n+jr &
                                       )
                          end do
                       end do
                    end do
                    do ja = 1, na
                       if( ilcplx ) then
                          sum( ja, 1_${ik}$ ) = -acoef*sums( ja, 1_${ik}$ ) +bcoefr*sump( ja, 1_${ik}$ ) -bcoefi*sump( &
                                    ja, 2_${ik}$ )
                          sum( ja, 2_${ik}$ ) = -acoef*sums( ja, 2_${ik}$ ) +bcoefr*sump( ja, 2_${ik}$ ) +bcoefi*sump( &
                                    ja, 1_${ik}$ )
                       else
                          sum( ja, 1_${ik}$ ) = -acoef*sums( ja, 1_${ik}$ ) +bcoefr*sump( ja, 1_${ik}$ )
                       end if
                    end do
                                        ! t
                    ! solve  ( a a - b b )  y = sum(,)
                    ! with scaling and perturbation of the denominator
                    call stdlib${ii}$_dlaln2( .true., na, nw, dmin, acoef, s( j, j ), lds,bdiag( 1_${ik}$ ), &
                    bdiag( 2_${ik}$ ), sum, 2_${ik}$, bcoefr,bcoefi, work( 2_${ik}$*n+j ), n, scale, temp,iinfo )
                              
                    if( scale<one ) then
                       do jw = 0, nw - 1
                          do jr = je, j - 1
                             work( ( jw+2 )*n+jr ) = scale*work( ( jw+2 )*n+jr )
                          end do
                       end do
                       xmax = scale*xmax
                    end if
                    xmax = max( xmax, temp )
                 end do loop_160
                 ! copy eigenvector to vl, back transforming if
                 ! howmny='b'.
                 ieig = ieig + 1_${ik}$
                 if( ilback ) then
                    do jw = 0, nw - 1
                       call stdlib${ii}$_dgemv( 'N', n, n+1-je, one, vl( 1_${ik}$, je ), ldvl,work( ( jw+2 )*n+&
                                 je ), 1_${ik}$, zero,work( ( jw+4 )*n+1 ), 1_${ik}$ )
                    end do
                    call stdlib${ii}$_dlacpy( ' ', n, nw, work( 4_${ik}$*n+1 ), n, vl( 1_${ik}$, je ),ldvl )
                    ibeg = 1_${ik}$
                 else
                    call stdlib${ii}$_dlacpy( ' ', n, nw, work( 2_${ik}$*n+1 ), n, vl( 1_${ik}$, ieig ),ldvl )
                    ibeg = je
                 end if
                 ! scale eigenvector
                 xmax = zero
                 if( ilcplx ) then
                    do j = ibeg, n
                       xmax = max( xmax, abs( vl( j, ieig ) )+abs( vl( j, ieig+1 ) ) )
                    end do
                 else
                    do j = ibeg, n
                       xmax = max( xmax, abs( vl( j, ieig ) ) )
                    end do
                 end if
                 if( xmax>safmin ) then
                    xscale = one / xmax
                    do jw = 0, nw - 1
                       do jr = ibeg, n
                          vl( jr, ieig+jw ) = xscale*vl( jr, ieig+jw )
                       end do
                    end do
                 end if
                 ieig = ieig + nw - 1_${ik}$
              end do loop_220
           end if
           ! right eigenvectors
           if( compr ) then
              ieig = im + 1_${ik}$
              ! main loop over eigenvalues
              ilcplx = .false.
              loop_500: do je = n, 1, -1
                 ! skip this iteration if (a) howmny='s' and select=.false., or
                 ! (b) this would be the second of a complex pair.
                 ! check for complex eigenvalue, so as to be sure of which
                 ! entry(-ies) of select to look at -- if complex, select(je)
                 ! or select(je-1).
                 ! if this is a complex pair, the 2-by-2 diagonal block
                 ! corresponding to the eigenvalue is in rows/columns je-1:je
                 if( ilcplx ) then
                    ilcplx = .false.
                    cycle loop_500
                 end if
                 nw = 1_${ik}$
                 if( je>1_${ik}$ ) then
                    if( s( je, je-1 )/=zero ) then
                       ilcplx = .true.
                       nw = 2_${ik}$
                    end if
                 end if
                 if( ilall ) then
                    ilcomp = .true.
                 else if( ilcplx ) then
                    ilcomp = select( je ) .or. select( je-1 )
                 else
                    ilcomp = select( je )
                 end if
                 if( .not.ilcomp )cycle loop_500
                 ! decide if (a) singular pencil, (b) real eigenvalue, or
                 ! (c) complex eigenvalue.
                 if( .not.ilcplx ) then
                    if( abs( s( je, je ) )<=safmin .and.abs( p( je, je ) )<=safmin ) then
                       ! singular matrix pencil -- unit eigenvector
                       ieig = ieig - 1_${ik}$
                       do jr = 1, n
                          vr( jr, ieig ) = zero
                       end do
                       vr( ieig, ieig ) = one
                       cycle loop_500
                    end if
                 end if
                 ! clear vector
                 do jw = 0, nw - 1
                    do jr = 1, n
                       work( ( jw+2 )*n+jr ) = zero
                    end do
                 end do
                 ! compute coefficients in  ( a a - b b ) x = 0
                    ! a  is  acoef
                    ! b  is  bcoefr + i*bcoefi
                 if( .not.ilcplx ) then
                    ! real eigenvalue
                    temp = one / max( abs( s( je, je ) )*ascale,abs( p( je, je ) )*bscale, safmin &
                              )
                    salfar = ( temp*s( je, je ) )*ascale
                    sbeta = ( temp*p( je, je ) )*bscale
                    acoef = sbeta*ascale
                    bcoefr = salfar*bscale
                    bcoefi = zero
                    ! scale to avoid underflow
                    scale = one
                    lsa = abs( sbeta )>=safmin .and. abs( acoef )<small
                    lsb = abs( salfar )>=safmin .and. abs( bcoefr )<small
                    if( lsa )scale = ( small / abs( sbeta ) )*min( anorm, big )
                    if( lsb )scale = max( scale, ( small / abs( salfar ) )*min( bnorm, big ) )
                              
                    if( lsa .or. lsb ) then
                       scale = min( scale, one /( safmin*max( one, abs( acoef ),abs( bcoefr ) ) ) &
                                 )
                       if( lsa ) then
                          acoef = ascale*( scale*sbeta )
                       else
                          acoef = scale*acoef
                       end if
                       if( lsb ) then
                          bcoefr = bscale*( scale*salfar )
                       else
                          bcoefr = scale*bcoefr
                       end if
                    end if
                    acoefa = abs( acoef )
                    bcoefa = abs( bcoefr )
                    ! first component is 1
                    work( 2_${ik}$*n+je ) = one
                    xmax = one
                    ! compute contribution from column je of a and b to sum
                    ! (see "further details", above.)
                    do jr = 1, je - 1
                       work( 2_${ik}$*n+jr ) = bcoefr*p( jr, je ) -acoef*s( jr, je )
                    end do
                 else
                    ! complex eigenvalue
                    call stdlib${ii}$_dlag2( s( je-1, je-1 ), lds, p( je-1, je-1 ), ldp,safmin*safety, &
                              acoef, temp, bcoefr, temp2,bcoefi )
                    if( bcoefi==zero ) then
                       info = je - 1_${ik}$
                       return
                    end if
                    ! scale to avoid over/underflow
                    acoefa = abs( acoef )
                    bcoefa = abs( bcoefr ) + abs( bcoefi )
                    scale = one
                    if( acoefa*ulp<safmin .and. acoefa>=safmin )scale = ( safmin / ulp ) / &
                              acoefa
                    if( bcoefa*ulp<safmin .and. bcoefa>=safmin )scale = max( scale, ( safmin / &
                              ulp ) / bcoefa )
                    if( safmin*acoefa>ascale )scale = ascale / ( safmin*acoefa )
                    if( safmin*bcoefa>bscale )scale = min( scale, bscale / ( safmin*bcoefa ) )
                              
                    if( scale/=one ) then
                       acoef = scale*acoef
                       acoefa = abs( acoef )
                       bcoefr = scale*bcoefr
                       bcoefi = scale*bcoefi
                       bcoefa = abs( bcoefr ) + abs( bcoefi )
                    end if
                    ! compute first two components of eigenvector
                    ! and contribution to sums
                    temp = acoef*s( je, je-1 )
                    temp2r = acoef*s( je, je ) - bcoefr*p( je, je )
                    temp2i = -bcoefi*p( je, je )
                    if( abs( temp )>=abs( temp2r )+abs( temp2i ) ) then
                       work( 2_${ik}$*n+je ) = one
                       work( 3_${ik}$*n+je ) = zero
                       work( 2_${ik}$*n+je-1 ) = -temp2r / temp
                       work( 3_${ik}$*n+je-1 ) = -temp2i / temp
                    else
                       work( 2_${ik}$*n+je-1 ) = one
                       work( 3_${ik}$*n+je-1 ) = zero
                       temp = acoef*s( je-1, je )
                       work( 2_${ik}$*n+je ) = ( bcoefr*p( je-1, je-1 )-acoef*s( je-1, je-1 ) ) / &
                                 temp
                       work( 3_${ik}$*n+je ) = bcoefi*p( je-1, je-1 ) / temp
                    end if
                    xmax = max( abs( work( 2_${ik}$*n+je ) )+abs( work( 3_${ik}$*n+je ) ),abs( work( 2_${ik}$*n+je-1 ) &
                              )+abs( work( 3_${ik}$*n+je-1 ) ) )
                    ! compute contribution from columns je and je-1
                    ! of a and b to the sums.
                    creala = acoef*work( 2_${ik}$*n+je-1 )
                    cimaga = acoef*work( 3_${ik}$*n+je-1 )
                    crealb = bcoefr*work( 2_${ik}$*n+je-1 ) -bcoefi*work( 3_${ik}$*n+je-1 )
                    cimagb = bcoefi*work( 2_${ik}$*n+je-1 ) +bcoefr*work( 3_${ik}$*n+je-1 )
                    cre2a = acoef*work( 2_${ik}$*n+je )
                    cim2a = acoef*work( 3_${ik}$*n+je )
                    cre2b = bcoefr*work( 2_${ik}$*n+je ) - bcoefi*work( 3_${ik}$*n+je )
                    cim2b = bcoefi*work( 2_${ik}$*n+je ) + bcoefr*work( 3_${ik}$*n+je )
                    do jr = 1, je - 2
                       work( 2_${ik}$*n+jr ) = -creala*s( jr, je-1 ) +crealb*p( jr, je-1 ) -cre2a*s( jr, &
                                 je ) + cre2b*p( jr, je )
                       work( 3_${ik}$*n+jr ) = -cimaga*s( jr, je-1 ) +cimagb*p( jr, je-1 ) -cim2a*s( jr, &
                                 je ) + cim2b*p( jr, je )
                    end do
                 end if
                 dmin = max( ulp*acoefa*anorm, ulp*bcoefa*bnorm, safmin )
                 ! columnwise triangular solve of  (a a - b b)  x = 0
                 il2by2 = .false.
                 loop_370: do j = je - nw, 1, -1
                    ! if a 2-by-2 block, is in position j-1:j, wait until
                    ! next iteration to process it (when it will be j:j+1)
                    if( .not.il2by2 .and. j>1_${ik}$ ) then
                       if( s( j, j-1 )/=zero ) then
                          il2by2 = .true.
                          cycle loop_370
                       end if
                    end if
                    bdiag( 1_${ik}$ ) = p( j, j )
                    if( il2by2 ) then
                       na = 2_${ik}$
                       bdiag( 2_${ik}$ ) = p( j+1, j+1 )
                    else
                       na = 1_${ik}$
                    end if
                    ! compute x(j) (and x(j+1), if 2-by-2 block)
                    call stdlib${ii}$_dlaln2( .false., na, nw, dmin, acoef, s( j, j ),lds, bdiag( 1_${ik}$ ), &
                    bdiag( 2_${ik}$ ), work( 2_${ik}$*n+j ),n, bcoefr, bcoefi, sum, 2_${ik}$, scale, temp,iinfo )
                              
                    if( scale<one ) then
                       do jw = 0, nw - 1
                          do jr = 1, je
                             work( ( jw+2 )*n+jr ) = scale*work( ( jw+2 )*n+jr )
                          end do
                       end do
                    end if
                    xmax = max( scale*xmax, temp )
                    do jw = 1, nw
                       do ja = 1, na
                          work( ( jw+1 )*n+j+ja-1 ) = sum( ja, jw )
                       end do
                    end do
                    ! w = w + x(j)*(a s(*,j) - b p(*,j) ) with scaling
                    if( j>1_${ik}$ ) then
                       ! check whether scaling is necessary for sum.
                       xscale = one / max( one, xmax )
                       temp = acoefa*work( j ) + bcoefa*work( n+j )
                       if( il2by2 )temp = max( temp, acoefa*work( j+1 )+bcoefa*work( n+j+1 ) )
                                 
                       temp = max( temp, acoefa, bcoefa )
                       if( temp>bignum*xscale ) then
                          do jw = 0, nw - 1
                             do jr = 1, je
                                work( ( jw+2 )*n+jr ) = xscale*work( ( jw+2 )*n+jr )
                             end do
                          end do
                          xmax = xmax*xscale
                       end if
                       ! compute the contributions of the off-diagonals of
                       ! column j (and j+1, if 2-by-2 block) of a and b to the
                       ! sums.
                       do ja = 1, na
                          if( ilcplx ) then
                             creala = acoef*work( 2_${ik}$*n+j+ja-1 )
                             cimaga = acoef*work( 3_${ik}$*n+j+ja-1 )
                             crealb = bcoefr*work( 2_${ik}$*n+j+ja-1 ) -bcoefi*work( 3_${ik}$*n+j+ja-1 )
                             cimagb = bcoefi*work( 2_${ik}$*n+j+ja-1 ) +bcoefr*work( 3_${ik}$*n+j+ja-1 )
                             do jr = 1, j - 1
                                work( 2_${ik}$*n+jr ) = work( 2_${ik}$*n+jr ) -creala*s( jr, j+ja-1 ) +crealb*p(&
                                           jr, j+ja-1 )
                                work( 3_${ik}$*n+jr ) = work( 3_${ik}$*n+jr ) -cimaga*s( jr, j+ja-1 ) +cimagb*p(&
                                           jr, j+ja-1 )
                             end do
                          else
                             creala = acoef*work( 2_${ik}$*n+j+ja-1 )
                             crealb = bcoefr*work( 2_${ik}$*n+j+ja-1 )
                             do jr = 1, j - 1
                                work( 2_${ik}$*n+jr ) = work( 2_${ik}$*n+jr ) -creala*s( jr, j+ja-1 ) +crealb*p(&
                                           jr, j+ja-1 )
                             end do
                          end if
                       end do
                    end if
                    il2by2 = .false.
                 end do loop_370
                 ! copy eigenvector to vr, back transforming if
                 ! howmny='b'.
                 ieig = ieig - nw
                 if( ilback ) then
                    do jw = 0, nw - 1
                       do jr = 1, n
                          work( ( jw+4 )*n+jr ) = work( ( jw+2 )*n+1 )*vr( jr, 1_${ik}$ )
                       end do
                       ! a series of compiler directives to defeat
                       ! vectorization for the next loop
                       do jc = 2, je
                          do jr = 1, n
                             work( ( jw+4 )*n+jr ) = work( ( jw+4 )*n+jr ) +work( ( jw+2 )*n+jc )&
                                       *vr( jr, jc )
                          end do
                       end do
                    end do
                    do jw = 0, nw - 1
                       do jr = 1, n
                          vr( jr, ieig+jw ) = work( ( jw+4 )*n+jr )
                       end do
                    end do
                    iend = n
                 else
                    do jw = 0, nw - 1
                       do jr = 1, n
                          vr( jr, ieig+jw ) = work( ( jw+2 )*n+jr )
                       end do
                    end do
                    iend = je
                 end if
                 ! scale eigenvector
                 xmax = zero
                 if( ilcplx ) then
                    do j = 1, iend
                       xmax = max( xmax, abs( vr( j, ieig ) )+abs( vr( j, ieig+1 ) ) )
                    end do
                 else
                    do j = 1, iend
                       xmax = max( xmax, abs( vr( j, ieig ) ) )
                    end do
                 end if
                 if( xmax>safmin ) then
                    xscale = one / xmax
                    do jw = 0, nw - 1
                       do jr = 1, iend
                          vr( jr, ieig+jw ) = xscale*vr( jr, ieig+jw )
                       end do
                    end do
                 end if
              end do loop_500
           end if
           return
     end subroutine stdlib${ii}$_dtgevc

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$tgevc( side, howmny, select, n, s, lds, p, ldp, vl,ldvl, vr, ldvr, &
     !! DTGEVC: computes some or all of the right and/or left eigenvectors of
     !! a pair of real matrices (S,P), where S is a quasi-triangular matrix
     !! and P is upper triangular.  Matrix pairs of this type are produced by
     !! the generalized Schur factorization of a matrix pair (A,B):
     !! A = Q*S*Z**T,  B = Q*P*Z**T
     !! as computed by DGGHRD + DHGEQZ.
     !! The right eigenvector x and the left eigenvector y of (S,P)
     !! corresponding to an eigenvalue w are defined by:
     !! S*x = w*P*x,  (y**H)*S = w*(y**H)*P,
     !! where y**H denotes the conjugate tranpose of y.
     !! The eigenvalues are not input to this routine, but are computed
     !! directly from the diagonal blocks of S and P.
     !! This routine returns the matrices X and/or Y of right and left
     !! eigenvectors of (S,P), or the products Z*X and/or Q*Y,
     !! where Z and Q are input matrices.
     !! If Q and Z are the orthogonal factors from the generalized Schur
     !! factorization of a matrix pair (A,B), then Z*X and Q*Y
     !! are the matrices of right and left eigenvectors of (A,B).
               mm, m, work, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: howmny, side
           integer(${ik}$), intent(out) :: info, m
           integer(${ik}$), intent(in) :: ldp, lds, ldvl, ldvr, mm, n
           ! Array Arguments 
           logical(lk), intent(in) :: select(*)
           real(${rk}$), intent(in) :: p(ldp,*), s(lds,*)
           real(${rk}$), intent(inout) :: vl(ldvl,*), vr(ldvr,*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           ! Parameters 
           real(${rk}$), parameter :: safety = 1.0e+2_${rk}$
           
           ! Local Scalars 
           logical(lk) :: compl, compr, il2by2, ilabad, ilall, ilback, ilbbad, ilcomp, ilcplx, &
                     lsa, lsb
           integer(${ik}$) :: i, ibeg, ieig, iend, ihwmny, iinfo, im, iside, j, ja, jc, je, jr, jw, &
                     na, nw
           real(${rk}$) :: acoef, acoefa, anorm, ascale, bcoefa, bcoefi, bcoefr, big, bignum, bnorm, &
           bscale, cim2a, cim2b, cimaga, cimagb, cre2a, cre2b, creala, crealb, dmin, safmin, &
                     salfar, sbeta, scale, small, temp, temp2, temp2i, temp2r, ulp, xmax, xscale
           ! Local Arrays 
           real(${rk}$) :: bdiag(2_${ik}$), sum(2_${ik}$,2_${ik}$), sums(2_${ik}$,2_${ik}$), sump(2_${ik}$,2_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test the input parameters
           if( stdlib_lsame( howmny, 'A' ) ) then
              ihwmny = 1_${ik}$
              ilall = .true.
              ilback = .false.
           else if( stdlib_lsame( howmny, 'S' ) ) then
              ihwmny = 2_${ik}$
              ilall = .false.
              ilback = .false.
           else if( stdlib_lsame( howmny, 'B' ) ) then
              ihwmny = 3_${ik}$
              ilall = .true.
              ilback = .true.
           else
              ihwmny = -1_${ik}$
              ilall = .true.
           end if
           if( stdlib_lsame( side, 'R' ) ) then
              iside = 1_${ik}$
              compl = .false.
              compr = .true.
           else if( stdlib_lsame( side, 'L' ) ) then
              iside = 2_${ik}$
              compl = .true.
              compr = .false.
           else if( stdlib_lsame( side, 'B' ) ) then
              iside = 3_${ik}$
              compl = .true.
              compr = .true.
           else
              iside = -1_${ik}$
           end if
           info = 0_${ik}$
           if( iside<0_${ik}$ ) then
              info = -1_${ik}$
           else if( ihwmny<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lds<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           else if( ldp<max( 1_${ik}$, n ) ) then
              info = -8_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTGEVC', -info )
              return
           end if
           ! count the number of eigenvectors to be computed
           if( .not.ilall ) then
              im = 0_${ik}$
              ilcplx = .false.
              loop_10: do j = 1, n
                 if( ilcplx ) then
                    ilcplx = .false.
                    cycle loop_10
                 end if
                 if( j<n ) then
                    if( s( j+1, j )/=zero )ilcplx = .true.
                 end if
                 if( ilcplx ) then
                    if( select( j ) .or. select( j+1 ) )im = im + 2_${ik}$
                 else
                    if( select( j ) )im = im + 1_${ik}$
                 end if
              end do loop_10
           else
              im = n
           end if
           ! check 2-by-2 diagonal blocks of a, b
           ilabad = .false.
           ilbbad = .false.
           do j = 1, n - 1
              if( s( j+1, j )/=zero ) then
                 if( p( j, j )==zero .or. p( j+1, j+1 )==zero .or.p( j, j+1 )/=zero )ilbbad = &
                           .true.
                 if( j<n-1 ) then
                    if( s( j+2, j+1 )/=zero )ilabad = .true.
                 end if
              end if
           end do
           if( ilabad ) then
              info = -5_${ik}$
           else if( ilbbad ) then
              info = -7_${ik}$
           else if( compl .and. ldvl<n .or. ldvl<1_${ik}$ ) then
              info = -10_${ik}$
           else if( compr .and. ldvr<n .or. ldvr<1_${ik}$ ) then
              info = -12_${ik}$
           else if( mm<im ) then
              info = -13_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTGEVC', -info )
              return
           end if
           ! quick return if possible
           m = im
           if( n==0 )return
           ! machine constants
           safmin = stdlib${ii}$_${ri}$lamch( 'SAFE MINIMUM' )
           big = one / safmin
           call stdlib${ii}$_${ri}$labad( safmin, big )
           ulp = stdlib${ii}$_${ri}$lamch( 'EPSILON' )*stdlib${ii}$_${ri}$lamch( 'BASE' )
           small = safmin*n / ulp
           big = one / small
           bignum = one / ( safmin*n )
           ! compute the 1-norm of each column of the strictly upper triangular
           ! part (i.e., excluding all elements belonging to the diagonal
           ! blocks) of a and b to check for possible overflow in the
           ! triangular solver.
           anorm = abs( s( 1_${ik}$, 1_${ik}$ ) )
           if( n>1_${ik}$ )anorm = anorm + abs( s( 2_${ik}$, 1_${ik}$ ) )
           bnorm = abs( p( 1_${ik}$, 1_${ik}$ ) )
           work( 1_${ik}$ ) = zero
           work( n+1 ) = zero
           do j = 2, n
              temp = zero
              temp2 = zero
              if( s( j, j-1 )==zero ) then
                 iend = j - 1_${ik}$
              else
                 iend = j - 2_${ik}$
              end if
              do i = 1, iend
                 temp = temp + abs( s( i, j ) )
                 temp2 = temp2 + abs( p( i, j ) )
              end do
              work( j ) = temp
              work( n+j ) = temp2
              do i = iend + 1, min( j+1, n )
                 temp = temp + abs( s( i, j ) )
                 temp2 = temp2 + abs( p( i, j ) )
              end do
              anorm = max( anorm, temp )
              bnorm = max( bnorm, temp2 )
           end do
           ascale = one / max( anorm, safmin )
           bscale = one / max( bnorm, safmin )
           ! left eigenvectors
           if( compl ) then
              ieig = 0_${ik}$
              ! main loop over eigenvalues
              ilcplx = .false.
              loop_220: do je = 1, n
                 ! skip this iteration if (a) howmny='s' and select=.false., or
                 ! (b) this would be the second of a complex pair.
                 ! check for complex eigenvalue, so as to be sure of which
                 ! entry(-ies) of select to look at.
                 if( ilcplx ) then
                    ilcplx = .false.
                    cycle loop_220
                 end if
                 nw = 1_${ik}$
                 if( je<n ) then
                    if( s( je+1, je )/=zero ) then
                       ilcplx = .true.
                       nw = 2_${ik}$
                    end if
                 end if
                 if( ilall ) then
                    ilcomp = .true.
                 else if( ilcplx ) then
                    ilcomp = select( je ) .or. select( je+1 )
                 else
                    ilcomp = select( je )
                 end if
                 if( .not.ilcomp )cycle loop_220
                 ! decide if (a) singular pencil, (b) real eigenvalue, or
                 ! (c) complex eigenvalue.
                 if( .not.ilcplx ) then
                    if( abs( s( je, je ) )<=safmin .and.abs( p( je, je ) )<=safmin ) then
                       ! singular matrix pencil -- return unit eigenvector
                       ieig = ieig + 1_${ik}$
                       do jr = 1, n
                          vl( jr, ieig ) = zero
                       end do
                       vl( ieig, ieig ) = one
                       cycle loop_220
                    end if
                 end if
                 ! clear vector
                 do jr = 1, nw*n
                    work( 2_${ik}$*n+jr ) = zero
                 end do
                                                       ! t
                 ! compute coefficients in  ( a a - b b )  y = 0
                    ! a  is  acoef
                    ! b  is  bcoefr + i*bcoefi
                 if( .not.ilcplx ) then
                    ! real eigenvalue
                    temp = one / max( abs( s( je, je ) )*ascale,abs( p( je, je ) )*bscale, safmin &
                              )
                    salfar = ( temp*s( je, je ) )*ascale
                    sbeta = ( temp*p( je, je ) )*bscale
                    acoef = sbeta*ascale
                    bcoefr = salfar*bscale
                    bcoefi = zero
                    ! scale to avoid underflow
                    scale = one
                    lsa = abs( sbeta )>=safmin .and. abs( acoef )<small
                    lsb = abs( salfar )>=safmin .and. abs( bcoefr )<small
                    if( lsa )scale = ( small / abs( sbeta ) )*min( anorm, big )
                    if( lsb )scale = max( scale, ( small / abs( salfar ) )*min( bnorm, big ) )
                              
                    if( lsa .or. lsb ) then
                       scale = min( scale, one /( safmin*max( one, abs( acoef ),abs( bcoefr ) ) ) &
                                 )
                       if( lsa ) then
                          acoef = ascale*( scale*sbeta )
                       else
                          acoef = scale*acoef
                       end if
                       if( lsb ) then
                          bcoefr = bscale*( scale*salfar )
                       else
                          bcoefr = scale*bcoefr
                       end if
                    end if
                    acoefa = abs( acoef )
                    bcoefa = abs( bcoefr )
                    ! first component is 1
                    work( 2_${ik}$*n+je ) = one
                    xmax = one
                 else
                    ! complex eigenvalue
                    call stdlib${ii}$_${ri}$lag2( s( je, je ), lds, p( je, je ), ldp,safmin*safety, acoef, &
                              temp, bcoefr, temp2,bcoefi )
                    bcoefi = -bcoefi
                    if( bcoefi==zero ) then
                       info = je
                       return
                    end if
                    ! scale to avoid over/underflow
                    acoefa = abs( acoef )
                    bcoefa = abs( bcoefr ) + abs( bcoefi )
                    scale = one
                    if( acoefa*ulp<safmin .and. acoefa>=safmin )scale = ( safmin / ulp ) / &
                              acoefa
                    if( bcoefa*ulp<safmin .and. bcoefa>=safmin )scale = max( scale, ( safmin / &
                              ulp ) / bcoefa )
                    if( safmin*acoefa>ascale )scale = ascale / ( safmin*acoefa )
                    if( safmin*bcoefa>bscale )scale = min( scale, bscale / ( safmin*bcoefa ) )
                              
                    if( scale/=one ) then
                       acoef = scale*acoef
                       acoefa = abs( acoef )
                       bcoefr = scale*bcoefr
                       bcoefi = scale*bcoefi
                       bcoefa = abs( bcoefr ) + abs( bcoefi )
                    end if
                    ! compute first two components of eigenvector
                    temp = acoef*s( je+1, je )
                    temp2r = acoef*s( je, je ) - bcoefr*p( je, je )
                    temp2i = -bcoefi*p( je, je )
                    if( abs( temp )>abs( temp2r )+abs( temp2i ) ) then
                       work( 2_${ik}$*n+je ) = one
                       work( 3_${ik}$*n+je ) = zero
                       work( 2_${ik}$*n+je+1 ) = -temp2r / temp
                       work( 3_${ik}$*n+je+1 ) = -temp2i / temp
                    else
                       work( 2_${ik}$*n+je+1 ) = one
                       work( 3_${ik}$*n+je+1 ) = zero
                       temp = acoef*s( je, je+1 )
                       work( 2_${ik}$*n+je ) = ( bcoefr*p( je+1, je+1 )-acoef*s( je+1, je+1 ) ) / &
                                 temp
                       work( 3_${ik}$*n+je ) = bcoefi*p( je+1, je+1 ) / temp
                    end if
                    xmax = max( abs( work( 2_${ik}$*n+je ) )+abs( work( 3_${ik}$*n+je ) ),abs( work( 2_${ik}$*n+je+1 ) &
                              )+abs( work( 3_${ik}$*n+je+1 ) ) )
                 end if
                 dmin = max( ulp*acoefa*anorm, ulp*bcoefa*bnorm, safmin )
                                                 ! t
                 ! triangular solve of  (a a - b b)  y = 0
                                         ! t
                 ! (rowwise in  (a a - b b) , or columnwise in (a a - b b) )
                 il2by2 = .false.
                 loop_160: do j = je + nw, n
                    if( il2by2 ) then
                       il2by2 = .false.
                       cycle loop_160
                    end if
                    na = 1_${ik}$
                    bdiag( 1_${ik}$ ) = p( j, j )
                    if( j<n ) then
                       if( s( j+1, j )/=zero ) then
                          il2by2 = .true.
                          bdiag( 2_${ik}$ ) = p( j+1, j+1 )
                          na = 2_${ik}$
                       end if
                    end if
                    ! check whether scaling is necessary for dot products
                    xscale = one / max( one, xmax )
                    temp = max( work( j ), work( n+j ),acoefa*work( j )+bcoefa*work( n+j ) )
                              
                    if( il2by2 )temp = max( temp, work( j+1 ), work( n+j+1 ),acoefa*work( j+1 )+&
                              bcoefa*work( n+j+1 ) )
                    if( temp>bignum*xscale ) then
                       do jw = 0, nw - 1
                          do jr = je, j - 1
                             work( ( jw+2 )*n+jr ) = xscale*work( ( jw+2 )*n+jr )
                          end do
                       end do
                       xmax = xmax*xscale
                    end if
                    ! compute dot products
                          ! j-1
                    ! sum = sum  conjg( a*s(k,j) - b*p(k,j) )*x(k)
                          ! k=je
                    ! to reduce the op count, this is done as
                    ! _        j-1                  _        j-1
                    ! a*conjg( sum  s(k,j)*x(k) ) - b*conjg( sum  p(k,j)*x(k) )
                             ! k=je                          k=je
                    ! which may cause underflow problems if a or b are close
                    ! to underflow.  (e.g., less than small.)
                    do jw = 1, nw
                       do ja = 1, na
                          sums( ja, jw ) = zero
                          sump( ja, jw ) = zero
                          do jr = je, j - 1
                             sums( ja, jw ) = sums( ja, jw ) +s( jr, j+ja-1 )*work( ( jw+1 )*n+jr &
                                       )
                             sump( ja, jw ) = sump( ja, jw ) +p( jr, j+ja-1 )*work( ( jw+1 )*n+jr &
                                       )
                          end do
                       end do
                    end do
                    do ja = 1, na
                       if( ilcplx ) then
                          sum( ja, 1_${ik}$ ) = -acoef*sums( ja, 1_${ik}$ ) +bcoefr*sump( ja, 1_${ik}$ ) -bcoefi*sump( &
                                    ja, 2_${ik}$ )
                          sum( ja, 2_${ik}$ ) = -acoef*sums( ja, 2_${ik}$ ) +bcoefr*sump( ja, 2_${ik}$ ) +bcoefi*sump( &
                                    ja, 1_${ik}$ )
                       else
                          sum( ja, 1_${ik}$ ) = -acoef*sums( ja, 1_${ik}$ ) +bcoefr*sump( ja, 1_${ik}$ )
                       end if
                    end do
                                        ! t
                    ! solve  ( a a - b b )  y = sum(,)
                    ! with scaling and perturbation of the denominator
                    call stdlib${ii}$_${ri}$laln2( .true., na, nw, dmin, acoef, s( j, j ), lds,bdiag( 1_${ik}$ ), &
                    bdiag( 2_${ik}$ ), sum, 2_${ik}$, bcoefr,bcoefi, work( 2_${ik}$*n+j ), n, scale, temp,iinfo )
                              
                    if( scale<one ) then
                       do jw = 0, nw - 1
                          do jr = je, j - 1
                             work( ( jw+2 )*n+jr ) = scale*work( ( jw+2 )*n+jr )
                          end do
                       end do
                       xmax = scale*xmax
                    end if
                    xmax = max( xmax, temp )
                 end do loop_160
                 ! copy eigenvector to vl, back transforming if
                 ! howmny='b'.
                 ieig = ieig + 1_${ik}$
                 if( ilback ) then
                    do jw = 0, nw - 1
                       call stdlib${ii}$_${ri}$gemv( 'N', n, n+1-je, one, vl( 1_${ik}$, je ), ldvl,work( ( jw+2 )*n+&
                                 je ), 1_${ik}$, zero,work( ( jw+4 )*n+1 ), 1_${ik}$ )
                    end do
                    call stdlib${ii}$_${ri}$lacpy( ' ', n, nw, work( 4_${ik}$*n+1 ), n, vl( 1_${ik}$, je ),ldvl )
                    ibeg = 1_${ik}$
                 else
                    call stdlib${ii}$_${ri}$lacpy( ' ', n, nw, work( 2_${ik}$*n+1 ), n, vl( 1_${ik}$, ieig ),ldvl )
                    ibeg = je
                 end if
                 ! scale eigenvector
                 xmax = zero
                 if( ilcplx ) then
                    do j = ibeg, n
                       xmax = max( xmax, abs( vl( j, ieig ) )+abs( vl( j, ieig+1 ) ) )
                    end do
                 else
                    do j = ibeg, n
                       xmax = max( xmax, abs( vl( j, ieig ) ) )
                    end do
                 end if
                 if( xmax>safmin ) then
                    xscale = one / xmax
                    do jw = 0, nw - 1
                       do jr = ibeg, n
                          vl( jr, ieig+jw ) = xscale*vl( jr, ieig+jw )
                       end do
                    end do
                 end if
                 ieig = ieig + nw - 1_${ik}$
              end do loop_220
           end if
           ! right eigenvectors
           if( compr ) then
              ieig = im + 1_${ik}$
              ! main loop over eigenvalues
              ilcplx = .false.
              loop_500: do je = n, 1, -1
                 ! skip this iteration if (a) howmny='s' and select=.false., or
                 ! (b) this would be the second of a complex pair.
                 ! check for complex eigenvalue, so as to be sure of which
                 ! entry(-ies) of select to look at -- if complex, select(je)
                 ! or select(je-1).
                 ! if this is a complex pair, the 2-by-2 diagonal block
                 ! corresponding to the eigenvalue is in rows/columns je-1:je
                 if( ilcplx ) then
                    ilcplx = .false.
                    cycle loop_500
                 end if
                 nw = 1_${ik}$
                 if( je>1_${ik}$ ) then
                    if( s( je, je-1 )/=zero ) then
                       ilcplx = .true.
                       nw = 2_${ik}$
                    end if
                 end if
                 if( ilall ) then
                    ilcomp = .true.
                 else if( ilcplx ) then
                    ilcomp = select( je ) .or. select( je-1 )
                 else
                    ilcomp = select( je )
                 end if
                 if( .not.ilcomp )cycle loop_500
                 ! decide if (a) singular pencil, (b) real eigenvalue, or
                 ! (c) complex eigenvalue.
                 if( .not.ilcplx ) then
                    if( abs( s( je, je ) )<=safmin .and.abs( p( je, je ) )<=safmin ) then
                       ! singular matrix pencil -- unit eigenvector
                       ieig = ieig - 1_${ik}$
                       do jr = 1, n
                          vr( jr, ieig ) = zero
                       end do
                       vr( ieig, ieig ) = one
                       cycle loop_500
                    end if
                 end if
                 ! clear vector
                 do jw = 0, nw - 1
                    do jr = 1, n
                       work( ( jw+2 )*n+jr ) = zero
                    end do
                 end do
                 ! compute coefficients in  ( a a - b b ) x = 0
                    ! a  is  acoef
                    ! b  is  bcoefr + i*bcoefi
                 if( .not.ilcplx ) then
                    ! real eigenvalue
                    temp = one / max( abs( s( je, je ) )*ascale,abs( p( je, je ) )*bscale, safmin &
                              )
                    salfar = ( temp*s( je, je ) )*ascale
                    sbeta = ( temp*p( je, je ) )*bscale
                    acoef = sbeta*ascale
                    bcoefr = salfar*bscale
                    bcoefi = zero
                    ! scale to avoid underflow
                    scale = one
                    lsa = abs( sbeta )>=safmin .and. abs( acoef )<small
                    lsb = abs( salfar )>=safmin .and. abs( bcoefr )<small
                    if( lsa )scale = ( small / abs( sbeta ) )*min( anorm, big )
                    if( lsb )scale = max( scale, ( small / abs( salfar ) )*min( bnorm, big ) )
                              
                    if( lsa .or. lsb ) then
                       scale = min( scale, one /( safmin*max( one, abs( acoef ),abs( bcoefr ) ) ) &
                                 )
                       if( lsa ) then
                          acoef = ascale*( scale*sbeta )
                       else
                          acoef = scale*acoef
                       end if
                       if( lsb ) then
                          bcoefr = bscale*( scale*salfar )
                       else
                          bcoefr = scale*bcoefr
                       end if
                    end if
                    acoefa = abs( acoef )
                    bcoefa = abs( bcoefr )
                    ! first component is 1
                    work( 2_${ik}$*n+je ) = one
                    xmax = one
                    ! compute contribution from column je of a and b to sum
                    ! (see "further details", above.)
                    do jr = 1, je - 1
                       work( 2_${ik}$*n+jr ) = bcoefr*p( jr, je ) -acoef*s( jr, je )
                    end do
                 else
                    ! complex eigenvalue
                    call stdlib${ii}$_${ri}$lag2( s( je-1, je-1 ), lds, p( je-1, je-1 ), ldp,safmin*safety, &
                              acoef, temp, bcoefr, temp2,bcoefi )
                    if( bcoefi==zero ) then
                       info = je - 1_${ik}$
                       return
                    end if
                    ! scale to avoid over/underflow
                    acoefa = abs( acoef )
                    bcoefa = abs( bcoefr ) + abs( bcoefi )
                    scale = one
                    if( acoefa*ulp<safmin .and. acoefa>=safmin )scale = ( safmin / ulp ) / &
                              acoefa
                    if( bcoefa*ulp<safmin .and. bcoefa>=safmin )scale = max( scale, ( safmin / &
                              ulp ) / bcoefa )
                    if( safmin*acoefa>ascale )scale = ascale / ( safmin*acoefa )
                    if( safmin*bcoefa>bscale )scale = min( scale, bscale / ( safmin*bcoefa ) )
                              
                    if( scale/=one ) then
                       acoef = scale*acoef
                       acoefa = abs( acoef )
                       bcoefr = scale*bcoefr
                       bcoefi = scale*bcoefi
                       bcoefa = abs( bcoefr ) + abs( bcoefi )
                    end if
                    ! compute first two components of eigenvector
                    ! and contribution to sums
                    temp = acoef*s( je, je-1 )
                    temp2r = acoef*s( je, je ) - bcoefr*p( je, je )
                    temp2i = -bcoefi*p( je, je )
                    if( abs( temp )>=abs( temp2r )+abs( temp2i ) ) then
                       work( 2_${ik}$*n+je ) = one
                       work( 3_${ik}$*n+je ) = zero
                       work( 2_${ik}$*n+je-1 ) = -temp2r / temp
                       work( 3_${ik}$*n+je-1 ) = -temp2i / temp
                    else
                       work( 2_${ik}$*n+je-1 ) = one
                       work( 3_${ik}$*n+je-1 ) = zero
                       temp = acoef*s( je-1, je )
                       work( 2_${ik}$*n+je ) = ( bcoefr*p( je-1, je-1 )-acoef*s( je-1, je-1 ) ) / &
                                 temp
                       work( 3_${ik}$*n+je ) = bcoefi*p( je-1, je-1 ) / temp
                    end if
                    xmax = max( abs( work( 2_${ik}$*n+je ) )+abs( work( 3_${ik}$*n+je ) ),abs( work( 2_${ik}$*n+je-1 ) &
                              )+abs( work( 3_${ik}$*n+je-1 ) ) )
                    ! compute contribution from columns je and je-1
                    ! of a and b to the sums.
                    creala = acoef*work( 2_${ik}$*n+je-1 )
                    cimaga = acoef*work( 3_${ik}$*n+je-1 )
                    crealb = bcoefr*work( 2_${ik}$*n+je-1 ) -bcoefi*work( 3_${ik}$*n+je-1 )
                    cimagb = bcoefi*work( 2_${ik}$*n+je-1 ) +bcoefr*work( 3_${ik}$*n+je-1 )
                    cre2a = acoef*work( 2_${ik}$*n+je )
                    cim2a = acoef*work( 3_${ik}$*n+je )
                    cre2b = bcoefr*work( 2_${ik}$*n+je ) - bcoefi*work( 3_${ik}$*n+je )
                    cim2b = bcoefi*work( 2_${ik}$*n+je ) + bcoefr*work( 3_${ik}$*n+je )
                    do jr = 1, je - 2
                       work( 2_${ik}$*n+jr ) = -creala*s( jr, je-1 ) +crealb*p( jr, je-1 ) -cre2a*s( jr, &
                                 je ) + cre2b*p( jr, je )
                       work( 3_${ik}$*n+jr ) = -cimaga*s( jr, je-1 ) +cimagb*p( jr, je-1 ) -cim2a*s( jr, &
                                 je ) + cim2b*p( jr, je )
                    end do
                 end if
                 dmin = max( ulp*acoefa*anorm, ulp*bcoefa*bnorm, safmin )
                 ! columnwise triangular solve of  (a a - b b)  x = 0
                 il2by2 = .false.
                 loop_370: do j = je - nw, 1, -1
                    ! if a 2-by-2 block, is in position j-1:j, wait until
                    ! next iteration to process it (when it will be j:j+1)
                    if( .not.il2by2 .and. j>1_${ik}$ ) then
                       if( s( j, j-1 )/=zero ) then
                          il2by2 = .true.
                          cycle loop_370
                       end if
                    end if
                    bdiag( 1_${ik}$ ) = p( j, j )
                    if( il2by2 ) then
                       na = 2_${ik}$
                       bdiag( 2_${ik}$ ) = p( j+1, j+1 )
                    else
                       na = 1_${ik}$
                    end if
                    ! compute x(j) (and x(j+1), if 2-by-2 block)
                    call stdlib${ii}$_${ri}$laln2( .false., na, nw, dmin, acoef, s( j, j ),lds, bdiag( 1_${ik}$ ), &
                    bdiag( 2_${ik}$ ), work( 2_${ik}$*n+j ),n, bcoefr, bcoefi, sum, 2_${ik}$, scale, temp,iinfo )
                              
                    if( scale<one ) then
                       do jw = 0, nw - 1
                          do jr = 1, je
                             work( ( jw+2 )*n+jr ) = scale*work( ( jw+2 )*n+jr )
                          end do
                       end do
                    end if
                    xmax = max( scale*xmax, temp )
                    do jw = 1, nw
                       do ja = 1, na
                          work( ( jw+1 )*n+j+ja-1 ) = sum( ja, jw )
                       end do
                    end do
                    ! w = w + x(j)*(a s(*,j) - b p(*,j) ) with scaling
                    if( j>1_${ik}$ ) then
                       ! check whether scaling is necessary for sum.
                       xscale = one / max( one, xmax )
                       temp = acoefa*work( j ) + bcoefa*work( n+j )
                       if( il2by2 )temp = max( temp, acoefa*work( j+1 )+bcoefa*work( n+j+1 ) )
                                 
                       temp = max( temp, acoefa, bcoefa )
                       if( temp>bignum*xscale ) then
                          do jw = 0, nw - 1
                             do jr = 1, je
                                work( ( jw+2 )*n+jr ) = xscale*work( ( jw+2 )*n+jr )
                             end do
                          end do
                          xmax = xmax*xscale
                       end if
                       ! compute the contributions of the off-diagonals of
                       ! column j (and j+1, if 2-by-2 block) of a and b to the
                       ! sums.
                       do ja = 1, na
                          if( ilcplx ) then
                             creala = acoef*work( 2_${ik}$*n+j+ja-1 )
                             cimaga = acoef*work( 3_${ik}$*n+j+ja-1 )
                             crealb = bcoefr*work( 2_${ik}$*n+j+ja-1 ) -bcoefi*work( 3_${ik}$*n+j+ja-1 )
                             cimagb = bcoefi*work( 2_${ik}$*n+j+ja-1 ) +bcoefr*work( 3_${ik}$*n+j+ja-1 )
                             do jr = 1, j - 1
                                work( 2_${ik}$*n+jr ) = work( 2_${ik}$*n+jr ) -creala*s( jr, j+ja-1 ) +crealb*p(&
                                           jr, j+ja-1 )
                                work( 3_${ik}$*n+jr ) = work( 3_${ik}$*n+jr ) -cimaga*s( jr, j+ja-1 ) +cimagb*p(&
                                           jr, j+ja-1 )
                             end do
                          else
                             creala = acoef*work( 2_${ik}$*n+j+ja-1 )
                             crealb = bcoefr*work( 2_${ik}$*n+j+ja-1 )
                             do jr = 1, j - 1
                                work( 2_${ik}$*n+jr ) = work( 2_${ik}$*n+jr ) -creala*s( jr, j+ja-1 ) +crealb*p(&
                                           jr, j+ja-1 )
                             end do
                          end if
                       end do
                    end if
                    il2by2 = .false.
                 end do loop_370
                 ! copy eigenvector to vr, back transforming if
                 ! howmny='b'.
                 ieig = ieig - nw
                 if( ilback ) then
                    do jw = 0, nw - 1
                       do jr = 1, n
                          work( ( jw+4 )*n+jr ) = work( ( jw+2 )*n+1 )*vr( jr, 1_${ik}$ )
                       end do
                       ! a series of compiler directives to defeat
                       ! vectorization for the next loop
                       do jc = 2, je
                          do jr = 1, n
                             work( ( jw+4 )*n+jr ) = work( ( jw+4 )*n+jr ) +work( ( jw+2 )*n+jc )&
                                       *vr( jr, jc )
                          end do
                       end do
                    end do
                    do jw = 0, nw - 1
                       do jr = 1, n
                          vr( jr, ieig+jw ) = work( ( jw+4 )*n+jr )
                       end do
                    end do
                    iend = n
                 else
                    do jw = 0, nw - 1
                       do jr = 1, n
                          vr( jr, ieig+jw ) = work( ( jw+2 )*n+jr )
                       end do
                    end do
                    iend = je
                 end if
                 ! scale eigenvector
                 xmax = zero
                 if( ilcplx ) then
                    do j = 1, iend
                       xmax = max( xmax, abs( vr( j, ieig ) )+abs( vr( j, ieig+1 ) ) )
                    end do
                 else
                    do j = 1, iend
                       xmax = max( xmax, abs( vr( j, ieig ) ) )
                    end do
                 end if
                 if( xmax>safmin ) then
                    xscale = one / xmax
                    do jw = 0, nw - 1
                       do jr = 1, iend
                          vr( jr, ieig+jw ) = xscale*vr( jr, ieig+jw )
                       end do
                    end do
                 end if
              end do loop_500
           end if
           return
     end subroutine stdlib${ii}$_${ri}$tgevc

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_ctgevc( side, howmny, select, n, s, lds, p, ldp, vl,ldvl, vr, ldvr, &
     !! CTGEVC computes some or all of the right and/or left eigenvectors of
     !! a pair of complex matrices (S,P), where S and P are upper triangular.
     !! Matrix pairs of this type are produced by the generalized Schur
     !! factorization of a complex matrix pair (A,B):
     !! A = Q*S*Z**H,  B = Q*P*Z**H
     !! as computed by CGGHRD + CHGEQZ.
     !! The right eigenvector x and the left eigenvector y of (S,P)
     !! corresponding to an eigenvalue w are defined by:
     !! S*x = w*P*x,  (y**H)*S = w*(y**H)*P,
     !! where y**H denotes the conjugate tranpose of y.
     !! The eigenvalues are not input to this routine, but are computed
     !! directly from the diagonal elements of S and P.
     !! This routine returns the matrices X and/or Y of right and left
     !! eigenvectors of (S,P), or the products Z*X and/or Q*Y,
     !! where Z and Q are input matrices.
     !! If Q and Z are the unitary factors from the generalized Schur
     !! factorization of a matrix pair (A,B), then Z*X and Q*Y
     !! are the matrices of right and left eigenvectors of (A,B).
               mm, m, work, rwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: howmny, side
           integer(${ik}$), intent(out) :: info, m
           integer(${ik}$), intent(in) :: ldp, lds, ldvl, ldvr, mm, n
           ! Array Arguments 
           logical(lk), intent(in) :: select(*)
           real(sp), intent(out) :: rwork(*)
           complex(sp), intent(in) :: p(ldp,*), s(lds,*)
           complex(sp), intent(inout) :: vl(ldvl,*), vr(ldvr,*)
           complex(sp), intent(out) :: work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: compl, compr, ilall, ilback, ilbbad, ilcomp, lsa, lsb
           integer(${ik}$) :: i, ibeg, ieig, iend, ihwmny, im, iside, isrc, j, je, jr
           real(sp) :: acoefa, acoeff, anorm, ascale, bcoefa, big, bignum, bnorm, bscale, dmin, &
                     safmin, sbeta, scale, small, temp, ulp, xmax
           complex(sp) :: bcoeff, ca, cb, d, salpha, sum, suma, sumb, x
           ! Intrinsic Functions 
           ! Statement Functions 
           real(sp) :: abs1
           ! Statement Function Definitions 
           abs1( x ) = abs( real( x,KIND=sp) ) + abs( aimag( x ) )
           ! Executable Statements 
           ! decode and test the input parameters
           if( stdlib_lsame( howmny, 'A' ) ) then
              ihwmny = 1_${ik}$
              ilall = .true.
              ilback = .false.
           else if( stdlib_lsame( howmny, 'S' ) ) then
              ihwmny = 2_${ik}$
              ilall = .false.
              ilback = .false.
           else if( stdlib_lsame( howmny, 'B' ) ) then
              ihwmny = 3_${ik}$
              ilall = .true.
              ilback = .true.
           else
              ihwmny = -1_${ik}$
           end if
           if( stdlib_lsame( side, 'R' ) ) then
              iside = 1_${ik}$
              compl = .false.
              compr = .true.
           else if( stdlib_lsame( side, 'L' ) ) then
              iside = 2_${ik}$
              compl = .true.
              compr = .false.
           else if( stdlib_lsame( side, 'B' ) ) then
              iside = 3_${ik}$
              compl = .true.
              compr = .true.
           else
              iside = -1_${ik}$
           end if
           info = 0_${ik}$
           if( iside<0_${ik}$ ) then
              info = -1_${ik}$
           else if( ihwmny<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lds<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           else if( ldp<max( 1_${ik}$, n ) ) then
              info = -8_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CTGEVC', -info )
              return
           end if
           ! count the number of eigenvectors
           if( .not.ilall ) then
              im = 0_${ik}$
              do j = 1, n
                 if( select( j ) )im = im + 1_${ik}$
              end do
           else
              im = n
           end if
           ! check diagonal of b
           ilbbad = .false.
           do j = 1, n
              if( aimag( p( j, j ) )/=zero )ilbbad = .true.
           end do
           if( ilbbad ) then
              info = -7_${ik}$
           else if( compl .and. ldvl<n .or. ldvl<1_${ik}$ ) then
              info = -10_${ik}$
           else if( compr .and. ldvr<n .or. ldvr<1_${ik}$ ) then
              info = -12_${ik}$
           else if( mm<im ) then
              info = -13_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CTGEVC', -info )
              return
           end if
           ! quick return if possible
           m = im
           if( n==0 )return
           ! machine constants
           safmin = stdlib${ii}$_slamch( 'SAFE MINIMUM' )
           big = one / safmin
           call stdlib${ii}$_slabad( safmin, big )
           ulp = stdlib${ii}$_slamch( 'EPSILON' )*stdlib${ii}$_slamch( 'BASE' )
           small = safmin*n / ulp
           big = one / small
           bignum = one / ( safmin*n )
           ! compute the 1-norm of each column of the strictly upper triangular
           ! part of a and b to check for possible overflow in the triangular
           ! solver.
           anorm = abs1( s( 1_${ik}$, 1_${ik}$ ) )
           bnorm = abs1( p( 1_${ik}$, 1_${ik}$ ) )
           rwork( 1_${ik}$ ) = zero
           rwork( n+1 ) = zero
           do j = 2, n
              rwork( j ) = zero
              rwork( n+j ) = zero
              do i = 1, j - 1
                 rwork( j ) = rwork( j ) + abs1( s( i, j ) )
                 rwork( n+j ) = rwork( n+j ) + abs1( p( i, j ) )
              end do
              anorm = max( anorm, rwork( j )+abs1( s( j, j ) ) )
              bnorm = max( bnorm, rwork( n+j )+abs1( p( j, j ) ) )
           end do
           ascale = one / max( anorm, safmin )
           bscale = one / max( bnorm, safmin )
           ! left eigenvectors
           if( compl ) then
              ieig = 0_${ik}$
              ! main loop over eigenvalues
              loop_140: do je = 1, n
                 if( ilall ) then
                    ilcomp = .true.
                 else
                    ilcomp = select( je )
                 end if
                 if( ilcomp ) then
                    ieig = ieig + 1_${ik}$
                    if( abs1( s( je, je ) )<=safmin .and.abs( real( p( je, je ),KIND=sp) )&
                              <=safmin ) then
                       ! singular matrix pencil -- return unit eigenvector
                       do jr = 1, n
                          vl( jr, ieig ) = czero
                       end do
                       vl( ieig, ieig ) = cone
                       cycle loop_140
                    end if
                    ! non-singular eigenvalue:
                    ! compute coefficients  a  and  b  in
                         ! h
                       ! y  ( a a - b b ) = 0
                    temp = one / max( abs1( s( je, je ) )*ascale,abs( real( p( je, je ),KIND=sp) )&
                              *bscale, safmin )
                    salpha = ( temp*s( je, je ) )*ascale
                    sbeta = ( temp*real( p( je, je ),KIND=sp) )*bscale
                    acoeff = sbeta*ascale
                    bcoeff = salpha*bscale
                    ! scale to avoid underflow
                    lsa = abs( sbeta )>=safmin .and. abs( acoeff )<small
                    lsb = abs1( salpha )>=safmin .and. abs1( bcoeff )<small
                    scale = one
                    if( lsa )scale = ( small / abs( sbeta ) )*min( anorm, big )
                    if( lsb )scale = max( scale, ( small / abs1( salpha ) )*min( bnorm, big ) )
                              
                    if( lsa .or. lsb ) then
                       scale = min( scale, one /( safmin*max( one, abs( acoeff ),abs1( bcoeff ) ) &
                                 ) )
                       if( lsa ) then
                          acoeff = ascale*( scale*sbeta )
                       else
                          acoeff = scale*acoeff
                       end if
                       if( lsb ) then
                          bcoeff = bscale*( scale*salpha )
                       else
                          bcoeff = scale*bcoeff
                       end if
                    end if
                    acoefa = abs( acoeff )
                    bcoefa = abs1( bcoeff )
                    xmax = one
                    do jr = 1, n
                       work( jr ) = czero
                    end do
                    work( je ) = cone
                    dmin = max( ulp*acoefa*anorm, ulp*bcoefa*bnorm, safmin )
                                                    ! h
                    ! triangular solve of  (a a - b b)  y = 0
                                            ! h
                    ! (rowwise in  (a a - b b) , or columnwise in a a - b b)
                    loop_100: do j = je + 1, n
                       ! compute
                             ! j-1
                       ! sum = sum  conjg( a*s(k,j) - b*p(k,j) )*x(k)
                             ! k=je
                       ! (scale if necessary)
                       temp = one / xmax
                       if( acoefa*rwork( j )+bcoefa*rwork( n+j )>bignum*temp ) then
                          do jr = je, j - 1
                             work( jr ) = temp*work( jr )
                          end do
                          xmax = one
                       end if
                       suma = czero
                       sumb = czero
                       do jr = je, j - 1
                          suma = suma + conjg( s( jr, j ) )*work( jr )
                          sumb = sumb + conjg( p( jr, j ) )*work( jr )
                       end do
                       sum = acoeff*suma - conjg( bcoeff )*sumb
                       ! form x(j) = - sum / conjg( a*s(j,j) - b*p(j,j) )
                       ! with scaling and perturbation of the denominator
                       d = conjg( acoeff*s( j, j )-bcoeff*p( j, j ) )
                       if( abs1( d )<=dmin )d = cmplx( dmin,KIND=sp)
                       if( abs1( d )<one ) then
                          if( abs1( sum )>=bignum*abs1( d ) ) then
                             temp = one / abs1( sum )
                             do jr = je, j - 1
                                work( jr ) = temp*work( jr )
                             end do
                             xmax = temp*xmax
                             sum = temp*sum
                          end if
                       end if
                       work( j ) = stdlib${ii}$_cladiv( -sum, d )
                       xmax = max( xmax, abs1( work( j ) ) )
                    end do loop_100
                    ! back transform eigenvector if howmny='b'.
                    if( ilback ) then
                       call stdlib${ii}$_cgemv( 'N', n, n+1-je, cone, vl( 1_${ik}$, je ), ldvl,work( je ), 1_${ik}$, &
                                 czero, work( n+1 ), 1_${ik}$ )
                       isrc = 2_${ik}$
                       ibeg = 1_${ik}$
                    else
                       isrc = 1_${ik}$
                       ibeg = je
                    end if
                    ! copy and scale eigenvector into column of vl
                    xmax = zero
                    do jr = ibeg, n
                       xmax = max( xmax, abs1( work( ( isrc-1 )*n+jr ) ) )
                    end do
                    if( xmax>safmin ) then
                       temp = one / xmax
                       do jr = ibeg, n
                          vl( jr, ieig ) = temp*work( ( isrc-1 )*n+jr )
                       end do
                    else
                       ibeg = n + 1_${ik}$
                    end if
                    do jr = 1, ibeg - 1
                       vl( jr, ieig ) = czero
                    end do
                 end if
              end do loop_140
           end if
           ! right eigenvectors
           if( compr ) then
              ieig = im + 1_${ik}$
              ! main loop over eigenvalues
              loop_250: do je = n, 1, -1
                 if( ilall ) then
                    ilcomp = .true.
                 else
                    ilcomp = select( je )
                 end if
                 if( ilcomp ) then
                    ieig = ieig - 1_${ik}$
                    if( abs1( s( je, je ) )<=safmin .and.abs( real( p( je, je ),KIND=sp) )&
                              <=safmin ) then
                       ! singular matrix pencil -- return unit eigenvector
                       do jr = 1, n
                          vr( jr, ieig ) = czero
                       end do
                       vr( ieig, ieig ) = cone
                       cycle loop_250
                    end if
                    ! non-singular eigenvalue:
                    ! compute coefficients  a  and  b  in
                    ! ( a a - b b ) x  = 0
                    temp = one / max( abs1( s( je, je ) )*ascale,abs( real( p( je, je ),KIND=sp) )&
                              *bscale, safmin )
                    salpha = ( temp*s( je, je ) )*ascale
                    sbeta = ( temp*real( p( je, je ),KIND=sp) )*bscale
                    acoeff = sbeta*ascale
                    bcoeff = salpha*bscale
                    ! scale to avoid underflow
                    lsa = abs( sbeta )>=safmin .and. abs( acoeff )<small
                    lsb = abs1( salpha )>=safmin .and. abs1( bcoeff )<small
                    scale = one
                    if( lsa )scale = ( small / abs( sbeta ) )*min( anorm, big )
                    if( lsb )scale = max( scale, ( small / abs1( salpha ) )*min( bnorm, big ) )
                              
                    if( lsa .or. lsb ) then
                       scale = min( scale, one /( safmin*max( one, abs( acoeff ),abs1( bcoeff ) ) &
                                 ) )
                       if( lsa ) then
                          acoeff = ascale*( scale*sbeta )
                       else
                          acoeff = scale*acoeff
                       end if
                       if( lsb ) then
                          bcoeff = bscale*( scale*salpha )
                       else
                          bcoeff = scale*bcoeff
                       end if
                    end if
                    acoefa = abs( acoeff )
                    bcoefa = abs1( bcoeff )
                    xmax = one
                    do jr = 1, n
                       work( jr ) = czero
                    end do
                    work( je ) = cone
                    dmin = max( ulp*acoefa*anorm, ulp*bcoefa*bnorm, safmin )
                    ! triangular solve of  (a a - b b) x = 0  (columnwise)
                    ! work(1:j-1) contains sums w,
                    ! work(j+1:je) contains x
                    do jr = 1, je - 1
                       work( jr ) = acoeff*s( jr, je ) - bcoeff*p( jr, je )
                    end do
                    work( je ) = cone
                    loop_210: do j = je - 1, 1, -1
                       ! form x(j) := - w(j) / d
                       ! with scaling and perturbation of the denominator
                       d = acoeff*s( j, j ) - bcoeff*p( j, j )
                       if( abs1( d )<=dmin )d = cmplx( dmin,KIND=sp)
                       if( abs1( d )<one ) then
                          if( abs1( work( j ) )>=bignum*abs1( d ) ) then
                             temp = one / abs1( work( j ) )
                             do jr = 1, je
                                work( jr ) = temp*work( jr )
                             end do
                          end if
                       end if
                       work( j ) = stdlib${ii}$_cladiv( -work( j ), d )
                       if( j>1_${ik}$ ) then
                          ! w = w + x(j)*(a s(*,j) - b p(*,j) ) with scaling
                          if( abs1( work( j ) )>one ) then
                             temp = one / abs1( work( j ) )
                             if( acoefa*rwork( j )+bcoefa*rwork( n+j )>=bignum*temp ) then
                                do jr = 1, je
                                   work( jr ) = temp*work( jr )
                                end do
                             end if
                          end if
                          ca = acoeff*work( j )
                          cb = bcoeff*work( j )
                          do jr = 1, j - 1
                             work( jr ) = work( jr ) + ca*s( jr, j ) -cb*p( jr, j )
                          end do
                       end if
                    end do loop_210
                    ! back transform eigenvector if howmny='b'.
                    if( ilback ) then
                       call stdlib${ii}$_cgemv( 'N', n, je, cone, vr, ldvr, work, 1_${ik}$,czero, work( n+1 ), &
                                 1_${ik}$ )
                       isrc = 2_${ik}$
                       iend = n
                    else
                       isrc = 1_${ik}$
                       iend = je
                    end if
                    ! copy and scale eigenvector into column of vr
                    xmax = zero
                    do jr = 1, iend
                       xmax = max( xmax, abs1( work( ( isrc-1 )*n+jr ) ) )
                    end do
                    if( xmax>safmin ) then
                       temp = one / xmax
                       do jr = 1, iend
                          vr( jr, ieig ) = temp*work( ( isrc-1 )*n+jr )
                       end do
                    else
                       iend = 0_${ik}$
                    end if
                    do jr = iend + 1, n
                       vr( jr, ieig ) = czero
                    end do
                 end if
              end do loop_250
           end if
           return
     end subroutine stdlib${ii}$_ctgevc

     pure module subroutine stdlib${ii}$_ztgevc( side, howmny, select, n, s, lds, p, ldp, vl,ldvl, vr, ldvr, &
     !! ZTGEVC computes some or all of the right and/or left eigenvectors of
     !! a pair of complex matrices (S,P), where S and P are upper triangular.
     !! Matrix pairs of this type are produced by the generalized Schur
     !! factorization of a complex matrix pair (A,B):
     !! A = Q*S*Z**H,  B = Q*P*Z**H
     !! as computed by ZGGHRD + ZHGEQZ.
     !! The right eigenvector x and the left eigenvector y of (S,P)
     !! corresponding to an eigenvalue w are defined by:
     !! S*x = w*P*x,  (y**H)*S = w*(y**H)*P,
     !! where y**H denotes the conjugate tranpose of y.
     !! The eigenvalues are not input to this routine, but are computed
     !! directly from the diagonal elements of S and P.
     !! This routine returns the matrices X and/or Y of right and left
     !! eigenvectors of (S,P), or the products Z*X and/or Q*Y,
     !! where Z and Q are input matrices.
     !! If Q and Z are the unitary factors from the generalized Schur
     !! factorization of a matrix pair (A,B), then Z*X and Q*Y
     !! are the matrices of right and left eigenvectors of (A,B).
               mm, m, work, rwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: howmny, side
           integer(${ik}$), intent(out) :: info, m
           integer(${ik}$), intent(in) :: ldp, lds, ldvl, ldvr, mm, n
           ! Array Arguments 
           logical(lk), intent(in) :: select(*)
           real(dp), intent(out) :: rwork(*)
           complex(dp), intent(in) :: p(ldp,*), s(lds,*)
           complex(dp), intent(inout) :: vl(ldvl,*), vr(ldvr,*)
           complex(dp), intent(out) :: work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: compl, compr, ilall, ilback, ilbbad, ilcomp, lsa, lsb
           integer(${ik}$) :: i, ibeg, ieig, iend, ihwmny, im, iside, isrc, j, je, jr
           real(dp) :: acoefa, acoeff, anorm, ascale, bcoefa, big, bignum, bnorm, bscale, dmin, &
                     safmin, sbeta, scale, small, temp, ulp, xmax
           complex(dp) :: bcoeff, ca, cb, d, salpha, sum, suma, sumb, x
           ! Intrinsic Functions 
           ! Statement Functions 
           real(dp) :: abs1
           ! Statement Function Definitions 
           abs1( x ) = abs( real( x,KIND=dp) ) + abs( aimag( x ) )
           ! Executable Statements 
           ! decode and test the input parameters
           if( stdlib_lsame( howmny, 'A' ) ) then
              ihwmny = 1_${ik}$
              ilall = .true.
              ilback = .false.
           else if( stdlib_lsame( howmny, 'S' ) ) then
              ihwmny = 2_${ik}$
              ilall = .false.
              ilback = .false.
           else if( stdlib_lsame( howmny, 'B' ) ) then
              ihwmny = 3_${ik}$
              ilall = .true.
              ilback = .true.
           else
              ihwmny = -1_${ik}$
           end if
           if( stdlib_lsame( side, 'R' ) ) then
              iside = 1_${ik}$
              compl = .false.
              compr = .true.
           else if( stdlib_lsame( side, 'L' ) ) then
              iside = 2_${ik}$
              compl = .true.
              compr = .false.
           else if( stdlib_lsame( side, 'B' ) ) then
              iside = 3_${ik}$
              compl = .true.
              compr = .true.
           else
              iside = -1_${ik}$
           end if
           info = 0_${ik}$
           if( iside<0_${ik}$ ) then
              info = -1_${ik}$
           else if( ihwmny<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lds<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           else if( ldp<max( 1_${ik}$, n ) ) then
              info = -8_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTGEVC', -info )
              return
           end if
           ! count the number of eigenvectors
           if( .not.ilall ) then
              im = 0_${ik}$
              do j = 1, n
                 if( select( j ) )im = im + 1_${ik}$
              end do
           else
              im = n
           end if
           ! check diagonal of b
           ilbbad = .false.
           do j = 1, n
              if( aimag( p( j, j ) )/=zero )ilbbad = .true.
           end do
           if( ilbbad ) then
              info = -7_${ik}$
           else if( compl .and. ldvl<n .or. ldvl<1_${ik}$ ) then
              info = -10_${ik}$
           else if( compr .and. ldvr<n .or. ldvr<1_${ik}$ ) then
              info = -12_${ik}$
           else if( mm<im ) then
              info = -13_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTGEVC', -info )
              return
           end if
           ! quick return if possible
           m = im
           if( n==0 )return
           ! machine constants
           safmin = stdlib${ii}$_dlamch( 'SAFE MINIMUM' )
           big = one / safmin
           call stdlib${ii}$_dlabad( safmin, big )
           ulp = stdlib${ii}$_dlamch( 'EPSILON' )*stdlib${ii}$_dlamch( 'BASE' )
           small = safmin*n / ulp
           big = one / small
           bignum = one / ( safmin*n )
           ! compute the 1-norm of each column of the strictly upper triangular
           ! part of a and b to check for possible overflow in the triangular
           ! solver.
           anorm = abs1( s( 1_${ik}$, 1_${ik}$ ) )
           bnorm = abs1( p( 1_${ik}$, 1_${ik}$ ) )
           rwork( 1_${ik}$ ) = zero
           rwork( n+1 ) = zero
           do j = 2, n
              rwork( j ) = zero
              rwork( n+j ) = zero
              do i = 1, j - 1
                 rwork( j ) = rwork( j ) + abs1( s( i, j ) )
                 rwork( n+j ) = rwork( n+j ) + abs1( p( i, j ) )
              end do
              anorm = max( anorm, rwork( j )+abs1( s( j, j ) ) )
              bnorm = max( bnorm, rwork( n+j )+abs1( p( j, j ) ) )
           end do
           ascale = one / max( anorm, safmin )
           bscale = one / max( bnorm, safmin )
           ! left eigenvectors
           if( compl ) then
              ieig = 0_${ik}$
              ! main loop over eigenvalues
              loop_140: do je = 1, n
                 if( ilall ) then
                    ilcomp = .true.
                 else
                    ilcomp = select( je )
                 end if
                 if( ilcomp ) then
                    ieig = ieig + 1_${ik}$
                    if( abs1( s( je, je ) )<=safmin .and.abs( real( p( je, je ),KIND=dp) )&
                              <=safmin ) then
                       ! singular matrix pencil -- return unit eigenvector
                       do jr = 1, n
                          vl( jr, ieig ) = czero
                       end do
                       vl( ieig, ieig ) = cone
                       cycle loop_140
                    end if
                    ! non-singular eigenvalue:
                    ! compute coefficients  a  and  b  in
                         ! h
                       ! y  ( a a - b b ) = 0
                    temp = one / max( abs1( s( je, je ) )*ascale,abs( real( p( je, je ),KIND=dp) )&
                              *bscale, safmin )
                    salpha = ( temp*s( je, je ) )*ascale
                    sbeta = ( temp*real( p( je, je ),KIND=dp) )*bscale
                    acoeff = sbeta*ascale
                    bcoeff = salpha*bscale
                    ! scale to avoid underflow
                    lsa = abs( sbeta )>=safmin .and. abs( acoeff )<small
                    lsb = abs1( salpha )>=safmin .and. abs1( bcoeff )<small
                    scale = one
                    if( lsa )scale = ( small / abs( sbeta ) )*min( anorm, big )
                    if( lsb )scale = max( scale, ( small / abs1( salpha ) )*min( bnorm, big ) )
                              
                    if( lsa .or. lsb ) then
                       scale = min( scale, one /( safmin*max( one, abs( acoeff ),abs1( bcoeff ) ) &
                                 ) )
                       if( lsa ) then
                          acoeff = ascale*( scale*sbeta )
                       else
                          acoeff = scale*acoeff
                       end if
                       if( lsb ) then
                          bcoeff = bscale*( scale*salpha )
                       else
                          bcoeff = scale*bcoeff
                       end if
                    end if
                    acoefa = abs( acoeff )
                    bcoefa = abs1( bcoeff )
                    xmax = one
                    do jr = 1, n
                       work( jr ) = czero
                    end do
                    work( je ) = cone
                    dmin = max( ulp*acoefa*anorm, ulp*bcoefa*bnorm, safmin )
                                                    ! h
                    ! triangular solve of  (a a - b b)  y = 0
                                            ! h
                    ! (rowwise in  (a a - b b) , or columnwise in a a - b b)
                    loop_100: do j = je + 1, n
                       ! compute
                             ! j-1
                       ! sum = sum  conjg( a*s(k,j) - b*p(k,j) )*x(k)
                             ! k=je
                       ! (scale if necessary)
                       temp = one / xmax
                       if( acoefa*rwork( j )+bcoefa*rwork( n+j )>bignum*temp ) then
                          do jr = je, j - 1
                             work( jr ) = temp*work( jr )
                          end do
                          xmax = one
                       end if
                       suma = czero
                       sumb = czero
                       do jr = je, j - 1
                          suma = suma + conjg( s( jr, j ) )*work( jr )
                          sumb = sumb + conjg( p( jr, j ) )*work( jr )
                       end do
                       sum = acoeff*suma - conjg( bcoeff )*sumb
                       ! form x(j) = - sum / conjg( a*s(j,j) - b*p(j,j) )
                       ! with scaling and perturbation of the denominator
                       d = conjg( acoeff*s( j, j )-bcoeff*p( j, j ) )
                       if( abs1( d )<=dmin )d = cmplx( dmin,KIND=dp)
                       if( abs1( d )<one ) then
                          if( abs1( sum )>=bignum*abs1( d ) ) then
                             temp = one / abs1( sum )
                             do jr = je, j - 1
                                work( jr ) = temp*work( jr )
                             end do
                             xmax = temp*xmax
                             sum = temp*sum
                          end if
                       end if
                       work( j ) = stdlib${ii}$_zladiv( -sum, d )
                       xmax = max( xmax, abs1( work( j ) ) )
                    end do loop_100
                    ! back transform eigenvector if howmny='b'.
                    if( ilback ) then
                       call stdlib${ii}$_zgemv( 'N', n, n+1-je, cone, vl( 1_${ik}$, je ), ldvl,work( je ), 1_${ik}$, &
                                 czero, work( n+1 ), 1_${ik}$ )
                       isrc = 2_${ik}$
                       ibeg = 1_${ik}$
                    else
                       isrc = 1_${ik}$
                       ibeg = je
                    end if
                    ! copy and scale eigenvector into column of vl
                    xmax = zero
                    do jr = ibeg, n
                       xmax = max( xmax, abs1( work( ( isrc-1 )*n+jr ) ) )
                    end do
                    if( xmax>safmin ) then
                       temp = one / xmax
                       do jr = ibeg, n
                          vl( jr, ieig ) = temp*work( ( isrc-1 )*n+jr )
                       end do
                    else
                       ibeg = n + 1_${ik}$
                    end if
                    do jr = 1, ibeg - 1
                       vl( jr, ieig ) = czero
                    end do
                 end if
              end do loop_140
           end if
           ! right eigenvectors
           if( compr ) then
              ieig = im + 1_${ik}$
              ! main loop over eigenvalues
              loop_250: do je = n, 1, -1
                 if( ilall ) then
                    ilcomp = .true.
                 else
                    ilcomp = select( je )
                 end if
                 if( ilcomp ) then
                    ieig = ieig - 1_${ik}$
                    if( abs1( s( je, je ) )<=safmin .and.abs( real( p( je, je ),KIND=dp) )&
                              <=safmin ) then
                       ! singular matrix pencil -- return unit eigenvector
                       do jr = 1, n
                          vr( jr, ieig ) = czero
                       end do
                       vr( ieig, ieig ) = cone
                       cycle loop_250
                    end if
                    ! non-singular eigenvalue:
                    ! compute coefficients  a  and  b  in
                    ! ( a a - b b ) x  = 0
                    temp = one / max( abs1( s( je, je ) )*ascale,abs( real( p( je, je ),KIND=dp) )&
                              *bscale, safmin )
                    salpha = ( temp*s( je, je ) )*ascale
                    sbeta = ( temp*real( p( je, je ),KIND=dp) )*bscale
                    acoeff = sbeta*ascale
                    bcoeff = salpha*bscale
                    ! scale to avoid underflow
                    lsa = abs( sbeta )>=safmin .and. abs( acoeff )<small
                    lsb = abs1( salpha )>=safmin .and. abs1( bcoeff )<small
                    scale = one
                    if( lsa )scale = ( small / abs( sbeta ) )*min( anorm, big )
                    if( lsb )scale = max( scale, ( small / abs1( salpha ) )*min( bnorm, big ) )
                              
                    if( lsa .or. lsb ) then
                       scale = min( scale, one /( safmin*max( one, abs( acoeff ),abs1( bcoeff ) ) &
                                 ) )
                       if( lsa ) then
                          acoeff = ascale*( scale*sbeta )
                       else
                          acoeff = scale*acoeff
                       end if
                       if( lsb ) then
                          bcoeff = bscale*( scale*salpha )
                       else
                          bcoeff = scale*bcoeff
                       end if
                    end if
                    acoefa = abs( acoeff )
                    bcoefa = abs1( bcoeff )
                    xmax = one
                    do jr = 1, n
                       work( jr ) = czero
                    end do
                    work( je ) = cone
                    dmin = max( ulp*acoefa*anorm, ulp*bcoefa*bnorm, safmin )
                    ! triangular solve of  (a a - b b) x = 0  (columnwise)
                    ! work(1:j-1) contains sums w,
                    ! work(j+1:je) contains x
                    do jr = 1, je - 1
                       work( jr ) = acoeff*s( jr, je ) - bcoeff*p( jr, je )
                    end do
                    work( je ) = cone
                    loop_210: do j = je - 1, 1, -1
                       ! form x(j) := - w(j) / d
                       ! with scaling and perturbation of the denominator
                       d = acoeff*s( j, j ) - bcoeff*p( j, j )
                       if( abs1( d )<=dmin )d = cmplx( dmin,KIND=dp)
                       if( abs1( d )<one ) then
                          if( abs1( work( j ) )>=bignum*abs1( d ) ) then
                             temp = one / abs1( work( j ) )
                             do jr = 1, je
                                work( jr ) = temp*work( jr )
                             end do
                          end if
                       end if
                       work( j ) = stdlib${ii}$_zladiv( -work( j ), d )
                       if( j>1_${ik}$ ) then
                          ! w = w + x(j)*(a s(*,j) - b p(*,j) ) with scaling
                          if( abs1( work( j ) )>one ) then
                             temp = one / abs1( work( j ) )
                             if( acoefa*rwork( j )+bcoefa*rwork( n+j )>=bignum*temp ) then
                                do jr = 1, je
                                   work( jr ) = temp*work( jr )
                                end do
                             end if
                          end if
                          ca = acoeff*work( j )
                          cb = bcoeff*work( j )
                          do jr = 1, j - 1
                             work( jr ) = work( jr ) + ca*s( jr, j ) -cb*p( jr, j )
                          end do
                       end if
                    end do loop_210
                    ! back transform eigenvector if howmny='b'.
                    if( ilback ) then
                       call stdlib${ii}$_zgemv( 'N', n, je, cone, vr, ldvr, work, 1_${ik}$,czero, work( n+1 ), &
                                 1_${ik}$ )
                       isrc = 2_${ik}$
                       iend = n
                    else
                       isrc = 1_${ik}$
                       iend = je
                    end if
                    ! copy and scale eigenvector into column of vr
                    xmax = zero
                    do jr = 1, iend
                       xmax = max( xmax, abs1( work( ( isrc-1 )*n+jr ) ) )
                    end do
                    if( xmax>safmin ) then
                       temp = one / xmax
                       do jr = 1, iend
                          vr( jr, ieig ) = temp*work( ( isrc-1 )*n+jr )
                       end do
                    else
                       iend = 0_${ik}$
                    end if
                    do jr = iend + 1, n
                       vr( jr, ieig ) = czero
                    end do
                 end if
              end do loop_250
           end if
           return
     end subroutine stdlib${ii}$_ztgevc

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$tgevc( side, howmny, select, n, s, lds, p, ldp, vl,ldvl, vr, ldvr, &
     !! ZTGEVC: computes some or all of the right and/or left eigenvectors of
     !! a pair of complex matrices (S,P), where S and P are upper triangular.
     !! Matrix pairs of this type are produced by the generalized Schur
     !! factorization of a complex matrix pair (A,B):
     !! A = Q*S*Z**H,  B = Q*P*Z**H
     !! as computed by ZGGHRD + ZHGEQZ.
     !! The right eigenvector x and the left eigenvector y of (S,P)
     !! corresponding to an eigenvalue w are defined by:
     !! S*x = w*P*x,  (y**H)*S = w*(y**H)*P,
     !! where y**H denotes the conjugate tranpose of y.
     !! The eigenvalues are not input to this routine, but are computed
     !! directly from the diagonal elements of S and P.
     !! This routine returns the matrices X and/or Y of right and left
     !! eigenvectors of (S,P), or the products Z*X and/or Q*Y,
     !! where Z and Q are input matrices.
     !! If Q and Z are the unitary factors from the generalized Schur
     !! factorization of a matrix pair (A,B), then Z*X and Q*Y
     !! are the matrices of right and left eigenvectors of (A,B).
               mm, m, work, rwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: howmny, side
           integer(${ik}$), intent(out) :: info, m
           integer(${ik}$), intent(in) :: ldp, lds, ldvl, ldvr, mm, n
           ! Array Arguments 
           logical(lk), intent(in) :: select(*)
           real(${ck}$), intent(out) :: rwork(*)
           complex(${ck}$), intent(in) :: p(ldp,*), s(lds,*)
           complex(${ck}$), intent(inout) :: vl(ldvl,*), vr(ldvr,*)
           complex(${ck}$), intent(out) :: work(*)
        ! =====================================================================
           
           
           ! Local Scalars 
           logical(lk) :: compl, compr, ilall, ilback, ilbbad, ilcomp, lsa, lsb
           integer(${ik}$) :: i, ibeg, ieig, iend, ihwmny, im, iside, isrc, j, je, jr
           real(${ck}$) :: acoefa, acoeff, anorm, ascale, bcoefa, big, bignum, bnorm, bscale, dmin, &
                     safmin, sbeta, scale, small, temp, ulp, xmax
           complex(${ck}$) :: bcoeff, ca, cb, d, salpha, sum, suma, sumb, x
           ! Intrinsic Functions 
           ! Statement Functions 
           real(${ck}$) :: abs1
           ! Statement Function Definitions 
           abs1( x ) = abs( real( x,KIND=${ck}$) ) + abs( aimag( x ) )
           ! Executable Statements 
           ! decode and test the input parameters
           if( stdlib_lsame( howmny, 'A' ) ) then
              ihwmny = 1_${ik}$
              ilall = .true.
              ilback = .false.
           else if( stdlib_lsame( howmny, 'S' ) ) then
              ihwmny = 2_${ik}$
              ilall = .false.
              ilback = .false.
           else if( stdlib_lsame( howmny, 'B' ) ) then
              ihwmny = 3_${ik}$
              ilall = .true.
              ilback = .true.
           else
              ihwmny = -1_${ik}$
           end if
           if( stdlib_lsame( side, 'R' ) ) then
              iside = 1_${ik}$
              compl = .false.
              compr = .true.
           else if( stdlib_lsame( side, 'L' ) ) then
              iside = 2_${ik}$
              compl = .true.
              compr = .false.
           else if( stdlib_lsame( side, 'B' ) ) then
              iside = 3_${ik}$
              compl = .true.
              compr = .true.
           else
              iside = -1_${ik}$
           end if
           info = 0_${ik}$
           if( iside<0_${ik}$ ) then
              info = -1_${ik}$
           else if( ihwmny<0_${ik}$ ) then
              info = -2_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lds<max( 1_${ik}$, n ) ) then
              info = -6_${ik}$
           else if( ldp<max( 1_${ik}$, n ) ) then
              info = -8_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTGEVC', -info )
              return
           end if
           ! count the number of eigenvectors
           if( .not.ilall ) then
              im = 0_${ik}$
              do j = 1, n
                 if( select( j ) )im = im + 1_${ik}$
              end do
           else
              im = n
           end if
           ! check diagonal of b
           ilbbad = .false.
           do j = 1, n
              if( aimag( p( j, j ) )/=zero )ilbbad = .true.
           end do
           if( ilbbad ) then
              info = -7_${ik}$
           else if( compl .and. ldvl<n .or. ldvl<1_${ik}$ ) then
              info = -10_${ik}$
           else if( compr .and. ldvr<n .or. ldvr<1_${ik}$ ) then
              info = -12_${ik}$
           else if( mm<im ) then
              info = -13_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTGEVC', -info )
              return
           end if
           ! quick return if possible
           m = im
           if( n==0 )return
           ! machine constants
           safmin = stdlib${ii}$_${c2ri(ci)}$lamch( 'SAFE MINIMUM' )
           big = one / safmin
           call stdlib${ii}$_${c2ri(ci)}$labad( safmin, big )
           ulp = stdlib${ii}$_${c2ri(ci)}$lamch( 'EPSILON' )*stdlib${ii}$_${c2ri(ci)}$lamch( 'BASE' )
           small = safmin*n / ulp
           big = one / small
           bignum = one / ( safmin*n )
           ! compute the 1-norm of each column of the strictly upper triangular
           ! part of a and b to check for possible overflow in the triangular
           ! solver.
           anorm = abs1( s( 1_${ik}$, 1_${ik}$ ) )
           bnorm = abs1( p( 1_${ik}$, 1_${ik}$ ) )
           rwork( 1_${ik}$ ) = zero
           rwork( n+1 ) = zero
           do j = 2, n
              rwork( j ) = zero
              rwork( n+j ) = zero
              do i = 1, j - 1
                 rwork( j ) = rwork( j ) + abs1( s( i, j ) )
                 rwork( n+j ) = rwork( n+j ) + abs1( p( i, j ) )
              end do
              anorm = max( anorm, rwork( j )+abs1( s( j, j ) ) )
              bnorm = max( bnorm, rwork( n+j )+abs1( p( j, j ) ) )
           end do
           ascale = one / max( anorm, safmin )
           bscale = one / max( bnorm, safmin )
           ! left eigenvectors
           if( compl ) then
              ieig = 0_${ik}$
              ! main loop over eigenvalues
              loop_140: do je = 1, n
                 if( ilall ) then
                    ilcomp = .true.
                 else
                    ilcomp = select( je )
                 end if
                 if( ilcomp ) then
                    ieig = ieig + 1_${ik}$
                    if( abs1( s( je, je ) )<=safmin .and.abs( real( p( je, je ),KIND=${ck}$) )&
                              <=safmin ) then
                       ! singular matrix pencil -- return unit eigenvector
                       do jr = 1, n
                          vl( jr, ieig ) = czero
                       end do
                       vl( ieig, ieig ) = cone
                       cycle loop_140
                    end if
                    ! non-singular eigenvalue:
                    ! compute coefficients  a  and  b  in
                         ! h
                       ! y  ( a a - b b ) = 0
                    temp = one / max( abs1( s( je, je ) )*ascale,abs( real( p( je, je ),KIND=${ck}$) )&
                              *bscale, safmin )
                    salpha = ( temp*s( je, je ) )*ascale
                    sbeta = ( temp*real( p( je, je ),KIND=${ck}$) )*bscale
                    acoeff = sbeta*ascale
                    bcoeff = salpha*bscale
                    ! scale to avoid underflow
                    lsa = abs( sbeta )>=safmin .and. abs( acoeff )<small
                    lsb = abs1( salpha )>=safmin .and. abs1( bcoeff )<small
                    scale = one
                    if( lsa )scale = ( small / abs( sbeta ) )*min( anorm, big )
                    if( lsb )scale = max( scale, ( small / abs1( salpha ) )*min( bnorm, big ) )
                              
                    if( lsa .or. lsb ) then
                       scale = min( scale, one /( safmin*max( one, abs( acoeff ),abs1( bcoeff ) ) &
                                 ) )
                       if( lsa ) then
                          acoeff = ascale*( scale*sbeta )
                       else
                          acoeff = scale*acoeff
                       end if
                       if( lsb ) then
                          bcoeff = bscale*( scale*salpha )
                       else
                          bcoeff = scale*bcoeff
                       end if
                    end if
                    acoefa = abs( acoeff )
                    bcoefa = abs1( bcoeff )
                    xmax = one
                    do jr = 1, n
                       work( jr ) = czero
                    end do
                    work( je ) = cone
                    dmin = max( ulp*acoefa*anorm, ulp*bcoefa*bnorm, safmin )
                                                    ! h
                    ! triangular solve of  (a a - b b)  y = 0
                                            ! h
                    ! (rowwise in  (a a - b b) , or columnwise in a a - b b)
                    loop_100: do j = je + 1, n
                       ! compute
                             ! j-1
                       ! sum = sum  conjg( a*s(k,j) - b*p(k,j) )*x(k)
                             ! k=je
                       ! (scale if necessary)
                       temp = one / xmax
                       if( acoefa*rwork( j )+bcoefa*rwork( n+j )>bignum*temp ) then
                          do jr = je, j - 1
                             work( jr ) = temp*work( jr )
                          end do
                          xmax = one
                       end if
                       suma = czero
                       sumb = czero
                       do jr = je, j - 1
                          suma = suma + conjg( s( jr, j ) )*work( jr )
                          sumb = sumb + conjg( p( jr, j ) )*work( jr )
                       end do
                       sum = acoeff*suma - conjg( bcoeff )*sumb
                       ! form x(j) = - sum / conjg( a*s(j,j) - b*p(j,j) )
                       ! with scaling and perturbation of the denominator
                       d = conjg( acoeff*s( j, j )-bcoeff*p( j, j ) )
                       if( abs1( d )<=dmin )d = cmplx( dmin,KIND=${ck}$)
                       if( abs1( d )<one ) then
                          if( abs1( sum )>=bignum*abs1( d ) ) then
                             temp = one / abs1( sum )
                             do jr = je, j - 1
                                work( jr ) = temp*work( jr )
                             end do
                             xmax = temp*xmax
                             sum = temp*sum
                          end if
                       end if
                       work( j ) = stdlib${ii}$_${ci}$ladiv( -sum, d )
                       xmax = max( xmax, abs1( work( j ) ) )
                    end do loop_100
                    ! back transform eigenvector if howmny='b'.
                    if( ilback ) then
                       call stdlib${ii}$_${ci}$gemv( 'N', n, n+1-je, cone, vl( 1_${ik}$, je ), ldvl,work( je ), 1_${ik}$, &
                                 czero, work( n+1 ), 1_${ik}$ )
                       isrc = 2_${ik}$
                       ibeg = 1_${ik}$
                    else
                       isrc = 1_${ik}$
                       ibeg = je
                    end if
                    ! copy and scale eigenvector into column of vl
                    xmax = zero
                    do jr = ibeg, n
                       xmax = max( xmax, abs1( work( ( isrc-1 )*n+jr ) ) )
                    end do
                    if( xmax>safmin ) then
                       temp = one / xmax
                       do jr = ibeg, n
                          vl( jr, ieig ) = temp*work( ( isrc-1 )*n+jr )
                       end do
                    else
                       ibeg = n + 1_${ik}$
                    end if
                    do jr = 1, ibeg - 1
                       vl( jr, ieig ) = czero
                    end do
                 end if
              end do loop_140
           end if
           ! right eigenvectors
           if( compr ) then
              ieig = im + 1_${ik}$
              ! main loop over eigenvalues
              loop_250: do je = n, 1, -1
                 if( ilall ) then
                    ilcomp = .true.
                 else
                    ilcomp = select( je )
                 end if
                 if( ilcomp ) then
                    ieig = ieig - 1_${ik}$
                    if( abs1( s( je, je ) )<=safmin .and.abs( real( p( je, je ),KIND=${ck}$) )&
                              <=safmin ) then
                       ! singular matrix pencil -- return unit eigenvector
                       do jr = 1, n
                          vr( jr, ieig ) = czero
                       end do
                       vr( ieig, ieig ) = cone
                       cycle loop_250
                    end if
                    ! non-singular eigenvalue:
                    ! compute coefficients  a  and  b  in
                    ! ( a a - b b ) x  = 0
                    temp = one / max( abs1( s( je, je ) )*ascale,abs( real( p( je, je ),KIND=${ck}$) )&
                              *bscale, safmin )
                    salpha = ( temp*s( je, je ) )*ascale
                    sbeta = ( temp*real( p( je, je ),KIND=${ck}$) )*bscale
                    acoeff = sbeta*ascale
                    bcoeff = salpha*bscale
                    ! scale to avoid underflow
                    lsa = abs( sbeta )>=safmin .and. abs( acoeff )<small
                    lsb = abs1( salpha )>=safmin .and. abs1( bcoeff )<small
                    scale = one
                    if( lsa )scale = ( small / abs( sbeta ) )*min( anorm, big )
                    if( lsb )scale = max( scale, ( small / abs1( salpha ) )*min( bnorm, big ) )
                              
                    if( lsa .or. lsb ) then
                       scale = min( scale, one /( safmin*max( one, abs( acoeff ),abs1( bcoeff ) ) &
                                 ) )
                       if( lsa ) then
                          acoeff = ascale*( scale*sbeta )
                       else
                          acoeff = scale*acoeff
                       end if
                       if( lsb ) then
                          bcoeff = bscale*( scale*salpha )
                       else
                          bcoeff = scale*bcoeff
                       end if
                    end if
                    acoefa = abs( acoeff )
                    bcoefa = abs1( bcoeff )
                    xmax = one
                    do jr = 1, n
                       work( jr ) = czero
                    end do
                    work( je ) = cone
                    dmin = max( ulp*acoefa*anorm, ulp*bcoefa*bnorm, safmin )
                    ! triangular solve of  (a a - b b) x = 0  (columnwise)
                    ! work(1:j-1) contains sums w,
                    ! work(j+1:je) contains x
                    do jr = 1, je - 1
                       work( jr ) = acoeff*s( jr, je ) - bcoeff*p( jr, je )
                    end do
                    work( je ) = cone
                    loop_210: do j = je - 1, 1, -1
                       ! form x(j) := - w(j) / d
                       ! with scaling and perturbation of the denominator
                       d = acoeff*s( j, j ) - bcoeff*p( j, j )
                       if( abs1( d )<=dmin )d = cmplx( dmin,KIND=${ck}$)
                       if( abs1( d )<one ) then
                          if( abs1( work( j ) )>=bignum*abs1( d ) ) then
                             temp = one / abs1( work( j ) )
                             do jr = 1, je
                                work( jr ) = temp*work( jr )
                             end do
                          end if
                       end if
                       work( j ) = stdlib${ii}$_${ci}$ladiv( -work( j ), d )
                       if( j>1_${ik}$ ) then
                          ! w = w + x(j)*(a s(*,j) - b p(*,j) ) with scaling
                          if( abs1( work( j ) )>one ) then
                             temp = one / abs1( work( j ) )
                             if( acoefa*rwork( j )+bcoefa*rwork( n+j )>=bignum*temp ) then
                                do jr = 1, je
                                   work( jr ) = temp*work( jr )
                                end do
                             end if
                          end if
                          ca = acoeff*work( j )
                          cb = bcoeff*work( j )
                          do jr = 1, j - 1
                             work( jr ) = work( jr ) + ca*s( jr, j ) -cb*p( jr, j )
                          end do
                       end if
                    end do loop_210
                    ! back transform eigenvector if howmny='b'.
                    if( ilback ) then
                       call stdlib${ii}$_${ci}$gemv( 'N', n, je, cone, vr, ldvr, work, 1_${ik}$,czero, work( n+1 ), &
                                 1_${ik}$ )
                       isrc = 2_${ik}$
                       iend = n
                    else
                       isrc = 1_${ik}$
                       iend = je
                    end if
                    ! copy and scale eigenvector into column of vr
                    xmax = zero
                    do jr = 1, iend
                       xmax = max( xmax, abs1( work( ( isrc-1 )*n+jr ) ) )
                    end do
                    if( xmax>safmin ) then
                       temp = one / xmax
                       do jr = 1, iend
                          vr( jr, ieig ) = temp*work( ( isrc-1 )*n+jr )
                       end do
                    else
                       iend = 0_${ik}$
                    end if
                    do jr = iend + 1, n
                       vr( jr, ieig ) = czero
                    end do
                 end if
              end do loop_250
           end if
           return
     end subroutine stdlib${ii}$_${ci}$tgevc

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_stgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, ifst, ilst, &
     !! STGEXC reorders the generalized real Schur decomposition of a real
     !! matrix pair (A,B) using an orthogonal equivalence transformation
     !! (A, B) = Q * (A, B) * Z**T,
     !! so that the diagonal block of (A, B) with row index IFST is moved
     !! to row ILST.
     !! (A, B) must be in generalized real Schur canonical form (as returned
     !! by SGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2
     !! diagonal blocks. B is upper triangular.
     !! Optionally, the matrices Q and Z of generalized Schur vectors are
     !! updated.
     !! Q(in) * A(in) * Z(in)**T = Q(out) * A(out) * Z(out)**T
     !! Q(in) * B(in) * Z(in)**T = Q(out) * B(out) * Z(out)**T
               work, lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: wantq, wantz
           integer(${ik}$), intent(inout) :: ifst, ilst
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, ldq, ldz, lwork, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: here, lwmin, nbf, nbl, nbnext
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test input arguments.
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           else if( ldq<1_${ik}$ .or. wantq .and. ( ldq<max( 1_${ik}$, n ) ) ) then
              info = -9_${ik}$
           else if( ldz<1_${ik}$ .or. wantz .and. ( ldz<max( 1_${ik}$, n ) ) ) then
              info = -11_${ik}$
           else if( ifst<1_${ik}$ .or. ifst>n ) then
              info = -12_${ik}$
           else if( ilst<1_${ik}$ .or. ilst>n ) then
              info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
              if( n<=1_${ik}$ ) then
                 lwmin = 1_${ik}$
              else
                 lwmin = 4_${ik}$*n + 16_${ik}$
              end if
              work(1_${ik}$) = lwmin
              if (lwork<lwmin .and. .not.lquery) then
                 info = -15_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'STGEXC', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n<=1 )return
           ! determine the first row of the specified block and find out
           ! if it is 1-by-1 or 2-by-2.
           if( ifst>1_${ik}$ ) then
              if( a( ifst, ifst-1 )/=zero )ifst = ifst - 1_${ik}$
           end if
           nbf = 1_${ik}$
           if( ifst<n ) then
              if( a( ifst+1, ifst )/=zero )nbf = 2_${ik}$
           end if
           ! determine the first row of the final block
           ! and find out if it is 1-by-1 or 2-by-2.
           if( ilst>1_${ik}$ ) then
              if( a( ilst, ilst-1 )/=zero )ilst = ilst - 1_${ik}$
           end if
           nbl = 1_${ik}$
           if( ilst<n ) then
              if( a( ilst+1, ilst )/=zero )nbl = 2_${ik}$
           end if
           if( ifst==ilst )return
           if( ifst<ilst ) then
              ! update ilst.
              if( nbf==2_${ik}$ .and. nbl==1_${ik}$ )ilst = ilst - 1_${ik}$
              if( nbf==1_${ik}$ .and. nbl==2_${ik}$ )ilst = ilst + 1_${ik}$
              here = ifst
              10 continue
              ! swap with next one below.
              if( nbf==1_${ik}$ .or. nbf==2_${ik}$ ) then
                 ! current block either 1-by-1 or 2-by-2.
                 nbnext = 1_${ik}$
                 if( here+nbf+1<=n ) then
                    if( a( here+nbf+1, here+nbf )/=zero )nbnext = 2_${ik}$
                 end if
                 call stdlib${ii}$_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here, nbf, &
                           nbnext, work, lwork, info )
                 if( info/=0_${ik}$ ) then
                    ilst = here
                    return
                 end if
                 here = here + nbnext
                 ! test if 2-by-2 block breaks into two 1-by-1 blocks.
                 if( nbf==2_${ik}$ ) then
                    if( a( here+1, here )==zero )nbf = 3_${ik}$
                 end if
              else
                 ! current block consists of two 1-by-1 blocks, each of which
                 ! must be swapped individually.
                 nbnext = 1_${ik}$
                 if( here+3<=n ) then
                    if( a( here+3, here+2 )/=zero )nbnext = 2_${ik}$
                 end if
                 call stdlib${ii}$_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here+1, 1_${ik}$, &
                           nbnext, work, lwork, info )
                 if( info/=0_${ik}$ ) then
                    ilst = here
                    return
                 end if
                 if( nbnext==1_${ik}$ ) then
                    ! swap two 1-by-1 blocks.
                    call stdlib${ii}$_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here, 1_${ik}$, &
                              1_${ik}$, work, lwork, info )
                    if( info/=0_${ik}$ ) then
                       ilst = here
                       return
                    end if
                    here = here + 1_${ik}$
                 else
                    ! recompute nbnext in case of 2-by-2 split.
                    if( a( here+2, here+1 )==zero )nbnext = 1_${ik}$
                    if( nbnext==2_${ik}$ ) then
                       ! 2-by-2 block did not split.
                       call stdlib${ii}$_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, &
                                 1_${ik}$, nbnext, work, lwork,info )
                       if( info/=0_${ik}$ ) then
                          ilst = here
                          return
                       end if
                       here = here + 2_${ik}$
                    else
                       ! 2-by-2 block did split.
                       call stdlib${ii}$_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, &
                                 1_${ik}$, 1_${ik}$, work, lwork, info )
                       if( info/=0_${ik}$ ) then
                          ilst = here
                          return
                       end if
                       here = here + 1_${ik}$
                       call stdlib${ii}$_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, &
                                 1_${ik}$, 1_${ik}$, work, lwork, info )
                       if( info/=0_${ik}$ ) then
                          ilst = here
                          return
                       end if
                       here = here + 1_${ik}$
                    end if
                 end if
              end if
              if( here<ilst )go to 10
           else
              here = ifst
              20 continue
              ! swap with next one below.
              if( nbf==1_${ik}$ .or. nbf==2_${ik}$ ) then
                 ! current block either 1-by-1 or 2-by-2.
                 nbnext = 1_${ik}$
                 if( here>=3_${ik}$ ) then
                    if( a( here-1, here-2 )/=zero )nbnext = 2_${ik}$
                 end if
                 call stdlib${ii}$_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here-nbnext, &
                           nbnext, nbf, work, lwork,info )
                 if( info/=0_${ik}$ ) then
                    ilst = here
                    return
                 end if
                 here = here - nbnext
                 ! test if 2-by-2 block breaks into two 1-by-1 blocks.
                 if( nbf==2_${ik}$ ) then
                    if( a( here+1, here )==zero )nbf = 3_${ik}$
                 end if
              else
                 ! current block consists of two 1-by-1 blocks, each of which
                 ! must be swapped individually.
                 nbnext = 1_${ik}$
                 if( here>=3_${ik}$ ) then
                    if( a( here-1, here-2 )/=zero )nbnext = 2_${ik}$
                 end if
                 call stdlib${ii}$_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here-nbnext, &
                           nbnext, 1_${ik}$, work, lwork,info )
                 if( info/=0_${ik}$ ) then
                    ilst = here
                    return
                 end if
                 if( nbnext==1_${ik}$ ) then
                    ! swap two 1-by-1 blocks.
                    call stdlib${ii}$_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here, &
                              nbnext, 1_${ik}$, work, lwork, info )
                    if( info/=0_${ik}$ ) then
                       ilst = here
                       return
                    end if
                    here = here - 1_${ik}$
                 else
                   ! recompute nbnext in case of 2-by-2 split.
                    if( a( here, here-1 )==zero )nbnext = 1_${ik}$
                    if( nbnext==2_${ik}$ ) then
                       ! 2-by-2 block did not split.
                       call stdlib${ii}$_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here-1,&
                                  2_${ik}$, 1_${ik}$, work, lwork, info )
                       if( info/=0_${ik}$ ) then
                          ilst = here
                          return
                       end if
                       here = here - 2_${ik}$
                    else
                       ! 2-by-2 block did split.
                       call stdlib${ii}$_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, &
                                 1_${ik}$, 1_${ik}$, work, lwork, info )
                       if( info/=0_${ik}$ ) then
                          ilst = here
                          return
                       end if
                       here = here - 1_${ik}$
                       call stdlib${ii}$_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, &
                                 1_${ik}$, 1_${ik}$, work, lwork, info )
                       if( info/=0_${ik}$ ) then
                          ilst = here
                          return
                       end if
                       here = here - 1_${ik}$
                    end if
                 end if
              end if
              if( here>ilst )go to 20
           end if
           ilst = here
           work( 1_${ik}$ ) = lwmin
           return
     end subroutine stdlib${ii}$_stgexc

     pure module subroutine stdlib${ii}$_dtgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, ifst, ilst, &
     !! DTGEXC reorders the generalized real Schur decomposition of a real
     !! matrix pair (A,B) using an orthogonal equivalence transformation
     !! (A, B) = Q * (A, B) * Z**T,
     !! so that the diagonal block of (A, B) with row index IFST is moved
     !! to row ILST.
     !! (A, B) must be in generalized real Schur canonical form (as returned
     !! by DGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2
     !! diagonal blocks. B is upper triangular.
     !! Optionally, the matrices Q and Z of generalized Schur vectors are
     !! updated.
     !! Q(in) * A(in) * Z(in)**T = Q(out) * A(out) * Z(out)**T
     !! Q(in) * B(in) * Z(in)**T = Q(out) * B(out) * Z(out)**T
               work, lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: wantq, wantz
           integer(${ik}$), intent(inout) :: ifst, ilst
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, ldq, ldz, lwork, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: here, lwmin, nbf, nbl, nbnext
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test input arguments.
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           else if( ldq<1_${ik}$ .or. wantq .and. ( ldq<max( 1_${ik}$, n ) ) ) then
              info = -9_${ik}$
           else if( ldz<1_${ik}$ .or. wantz .and. ( ldz<max( 1_${ik}$, n ) ) ) then
              info = -11_${ik}$
           else if( ifst<1_${ik}$ .or. ifst>n ) then
              info = -12_${ik}$
           else if( ilst<1_${ik}$ .or. ilst>n ) then
              info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
              if( n<=1_${ik}$ ) then
                 lwmin = 1_${ik}$
              else
                 lwmin = 4_${ik}$*n + 16_${ik}$
              end if
              work(1_${ik}$) = lwmin
              if (lwork<lwmin .and. .not.lquery) then
                 info = -15_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTGEXC', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n<=1 )return
           ! determine the first row of the specified block and find out
           ! if it is 1-by-1 or 2-by-2.
           if( ifst>1_${ik}$ ) then
              if( a( ifst, ifst-1 )/=zero )ifst = ifst - 1_${ik}$
           end if
           nbf = 1_${ik}$
           if( ifst<n ) then
              if( a( ifst+1, ifst )/=zero )nbf = 2_${ik}$
           end if
           ! determine the first row of the final block
           ! and find out if it is 1-by-1 or 2-by-2.
           if( ilst>1_${ik}$ ) then
              if( a( ilst, ilst-1 )/=zero )ilst = ilst - 1_${ik}$
           end if
           nbl = 1_${ik}$
           if( ilst<n ) then
              if( a( ilst+1, ilst )/=zero )nbl = 2_${ik}$
           end if
           if( ifst==ilst )return
           if( ifst<ilst ) then
              ! update ilst.
              if( nbf==2_${ik}$ .and. nbl==1_${ik}$ )ilst = ilst - 1_${ik}$
              if( nbf==1_${ik}$ .and. nbl==2_${ik}$ )ilst = ilst + 1_${ik}$
              here = ifst
              10 continue
              ! swap with next one below.
              if( nbf==1_${ik}$ .or. nbf==2_${ik}$ ) then
                 ! current block either 1-by-1 or 2-by-2.
                 nbnext = 1_${ik}$
                 if( here+nbf+1<=n ) then
                    if( a( here+nbf+1, here+nbf )/=zero )nbnext = 2_${ik}$
                 end if
                 call stdlib${ii}$_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here, nbf, &
                           nbnext, work, lwork, info )
                 if( info/=0_${ik}$ ) then
                    ilst = here
                    return
                 end if
                 here = here + nbnext
                 ! test if 2-by-2 block breaks into two 1-by-1 blocks.
                 if( nbf==2_${ik}$ ) then
                    if( a( here+1, here )==zero )nbf = 3_${ik}$
                 end if
              else
                 ! current block consists of two 1-by-1 blocks, each of which
                 ! must be swapped individually.
                 nbnext = 1_${ik}$
                 if( here+3<=n ) then
                    if( a( here+3, here+2 )/=zero )nbnext = 2_${ik}$
                 end if
                 call stdlib${ii}$_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here+1, 1_${ik}$, &
                           nbnext, work, lwork, info )
                 if( info/=0_${ik}$ ) then
                    ilst = here
                    return
                 end if
                 if( nbnext==1_${ik}$ ) then
                    ! swap two 1-by-1 blocks.
                    call stdlib${ii}$_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here, 1_${ik}$, &
                              1_${ik}$, work, lwork, info )
                    if( info/=0_${ik}$ ) then
                       ilst = here
                       return
                    end if
                    here = here + 1_${ik}$
                 else
                    ! recompute nbnext in case of 2-by-2 split.
                    if( a( here+2, here+1 )==zero )nbnext = 1_${ik}$
                    if( nbnext==2_${ik}$ ) then
                       ! 2-by-2 block did not split.
                       call stdlib${ii}$_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, &
                                 1_${ik}$, nbnext, work, lwork,info )
                       if( info/=0_${ik}$ ) then
                          ilst = here
                          return
                       end if
                       here = here + 2_${ik}$
                    else
                       ! 2-by-2 block did split.
                       call stdlib${ii}$_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, &
                                 1_${ik}$, 1_${ik}$, work, lwork, info )
                       if( info/=0_${ik}$ ) then
                          ilst = here
                          return
                       end if
                       here = here + 1_${ik}$
                       call stdlib${ii}$_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, &
                                 1_${ik}$, 1_${ik}$, work, lwork, info )
                       if( info/=0_${ik}$ ) then
                          ilst = here
                          return
                       end if
                       here = here + 1_${ik}$
                    end if
                 end if
              end if
              if( here<ilst )go to 10
           else
              here = ifst
              20 continue
              ! swap with next one below.
              if( nbf==1_${ik}$ .or. nbf==2_${ik}$ ) then
                 ! current block either 1-by-1 or 2-by-2.
                 nbnext = 1_${ik}$
                 if( here>=3_${ik}$ ) then
                    if( a( here-1, here-2 )/=zero )nbnext = 2_${ik}$
                 end if
                 call stdlib${ii}$_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here-nbnext, &
                           nbnext, nbf, work, lwork,info )
                 if( info/=0_${ik}$ ) then
                    ilst = here
                    return
                 end if
                 here = here - nbnext
                 ! test if 2-by-2 block breaks into two 1-by-1 blocks.
                 if( nbf==2_${ik}$ ) then
                    if( a( here+1, here )==zero )nbf = 3_${ik}$
                 end if
              else
                 ! current block consists of two 1-by-1 blocks, each of which
                 ! must be swapped individually.
                 nbnext = 1_${ik}$
                 if( here>=3_${ik}$ ) then
                    if( a( here-1, here-2 )/=zero )nbnext = 2_${ik}$
                 end if
                 call stdlib${ii}$_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here-nbnext, &
                           nbnext, 1_${ik}$, work, lwork,info )
                 if( info/=0_${ik}$ ) then
                    ilst = here
                    return
                 end if
                 if( nbnext==1_${ik}$ ) then
                    ! swap two 1-by-1 blocks.
                    call stdlib${ii}$_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here, &
                              nbnext, 1_${ik}$, work, lwork, info )
                    if( info/=0_${ik}$ ) then
                       ilst = here
                       return
                    end if
                    here = here - 1_${ik}$
                 else
                   ! recompute nbnext in case of 2-by-2 split.
                    if( a( here, here-1 )==zero )nbnext = 1_${ik}$
                    if( nbnext==2_${ik}$ ) then
                       ! 2-by-2 block did not split.
                       call stdlib${ii}$_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here-1,&
                                  2_${ik}$, 1_${ik}$, work, lwork, info )
                       if( info/=0_${ik}$ ) then
                          ilst = here
                          return
                       end if
                       here = here - 2_${ik}$
                    else
                       ! 2-by-2 block did split.
                       call stdlib${ii}$_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, &
                                 1_${ik}$, 1_${ik}$, work, lwork, info )
                       if( info/=0_${ik}$ ) then
                          ilst = here
                          return
                       end if
                       here = here - 1_${ik}$
                       call stdlib${ii}$_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, &
                                 1_${ik}$, 1_${ik}$, work, lwork, info )
                       if( info/=0_${ik}$ ) then
                          ilst = here
                          return
                       end if
                       here = here - 1_${ik}$
                    end if
                 end if
              end if
              if( here>ilst )go to 20
           end if
           ilst = here
           work( 1_${ik}$ ) = lwmin
           return
     end subroutine stdlib${ii}$_dtgexc

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$tgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, ifst, ilst, &
     !! DTGEXC: reorders the generalized real Schur decomposition of a real
     !! matrix pair (A,B) using an orthogonal equivalence transformation
     !! (A, B) = Q * (A, B) * Z**T,
     !! so that the diagonal block of (A, B) with row index IFST is moved
     !! to row ILST.
     !! (A, B) must be in generalized real Schur canonical form (as returned
     !! by DGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2
     !! diagonal blocks. B is upper triangular.
     !! Optionally, the matrices Q and Z of generalized Schur vectors are
     !! updated.
     !! Q(in) * A(in) * Z(in)**T = Q(out) * A(out) * Z(out)**T
     !! Q(in) * B(in) * Z(in)**T = Q(out) * B(out) * Z(out)**T
               work, lwork, info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: wantq, wantz
           integer(${ik}$), intent(inout) :: ifst, ilst
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldb, ldq, ldz, lwork, n
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery
           integer(${ik}$) :: here, lwmin, nbf, nbl, nbnext
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test input arguments.
           info = 0_${ik}$
           lquery = ( lwork==-1_${ik}$ )
           if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           else if( ldq<1_${ik}$ .or. wantq .and. ( ldq<max( 1_${ik}$, n ) ) ) then
              info = -9_${ik}$
           else if( ldz<1_${ik}$ .or. wantz .and. ( ldz<max( 1_${ik}$, n ) ) ) then
              info = -11_${ik}$
           else if( ifst<1_${ik}$ .or. ifst>n ) then
              info = -12_${ik}$
           else if( ilst<1_${ik}$ .or. ilst>n ) then
              info = -13_${ik}$
           end if
           if( info==0_${ik}$ ) then
              if( n<=1_${ik}$ ) then
                 lwmin = 1_${ik}$
              else
                 lwmin = 4_${ik}$*n + 16_${ik}$
              end if
              work(1_${ik}$) = lwmin
              if (lwork<lwmin .and. .not.lquery) then
                 info = -15_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'DTGEXC', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( n<=1 )return
           ! determine the first row of the specified block and find out
           ! if it is 1-by-1 or 2-by-2.
           if( ifst>1_${ik}$ ) then
              if( a( ifst, ifst-1 )/=zero )ifst = ifst - 1_${ik}$
           end if
           nbf = 1_${ik}$
           if( ifst<n ) then
              if( a( ifst+1, ifst )/=zero )nbf = 2_${ik}$
           end if
           ! determine the first row of the final block
           ! and find out if it is 1-by-1 or 2-by-2.
           if( ilst>1_${ik}$ ) then
              if( a( ilst, ilst-1 )/=zero )ilst = ilst - 1_${ik}$
           end if
           nbl = 1_${ik}$
           if( ilst<n ) then
              if( a( ilst+1, ilst )/=zero )nbl = 2_${ik}$
           end if
           if( ifst==ilst )return
           if( ifst<ilst ) then
              ! update ilst.
              if( nbf==2_${ik}$ .and. nbl==1_${ik}$ )ilst = ilst - 1_${ik}$
              if( nbf==1_${ik}$ .and. nbl==2_${ik}$ )ilst = ilst + 1_${ik}$
              here = ifst
              10 continue
              ! swap with next one below.
              if( nbf==1_${ik}$ .or. nbf==2_${ik}$ ) then
                 ! current block either 1-by-1 or 2-by-2.
                 nbnext = 1_${ik}$
                 if( here+nbf+1<=n ) then
                    if( a( here+nbf+1, here+nbf )/=zero )nbnext = 2_${ik}$
                 end if
                 call stdlib${ii}$_${ri}$tgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here, nbf, &
                           nbnext, work, lwork, info )
                 if( info/=0_${ik}$ ) then
                    ilst = here
                    return
                 end if
                 here = here + nbnext
                 ! test if 2-by-2 block breaks into two 1-by-1 blocks.
                 if( nbf==2_${ik}$ ) then
                    if( a( here+1, here )==zero )nbf = 3_${ik}$
                 end if
              else
                 ! current block consists of two 1-by-1 blocks, each of which
                 ! must be swapped individually.
                 nbnext = 1_${ik}$
                 if( here+3<=n ) then
                    if( a( here+3, here+2 )/=zero )nbnext = 2_${ik}$
                 end if
                 call stdlib${ii}$_${ri}$tgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here+1, 1_${ik}$, &
                           nbnext, work, lwork, info )
                 if( info/=0_${ik}$ ) then
                    ilst = here
                    return
                 end if
                 if( nbnext==1_${ik}$ ) then
                    ! swap two 1-by-1 blocks.
                    call stdlib${ii}$_${ri}$tgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here, 1_${ik}$, &
                              1_${ik}$, work, lwork, info )
                    if( info/=0_${ik}$ ) then
                       ilst = here
                       return
                    end if
                    here = here + 1_${ik}$
                 else
                    ! recompute nbnext in case of 2-by-2 split.
                    if( a( here+2, here+1 )==zero )nbnext = 1_${ik}$
                    if( nbnext==2_${ik}$ ) then
                       ! 2-by-2 block did not split.
                       call stdlib${ii}$_${ri}$tgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, &
                                 1_${ik}$, nbnext, work, lwork,info )
                       if( info/=0_${ik}$ ) then
                          ilst = here
                          return
                       end if
                       here = here + 2_${ik}$
                    else
                       ! 2-by-2 block did split.
                       call stdlib${ii}$_${ri}$tgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, &
                                 1_${ik}$, 1_${ik}$, work, lwork, info )
                       if( info/=0_${ik}$ ) then
                          ilst = here
                          return
                       end if
                       here = here + 1_${ik}$
                       call stdlib${ii}$_${ri}$tgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, &
                                 1_${ik}$, 1_${ik}$, work, lwork, info )
                       if( info/=0_${ik}$ ) then
                          ilst = here
                          return
                       end if
                       here = here + 1_${ik}$
                    end if
                 end if
              end if
              if( here<ilst )go to 10
           else
              here = ifst
              20 continue
              ! swap with next one below.
              if( nbf==1_${ik}$ .or. nbf==2_${ik}$ ) then
                 ! current block either 1-by-1 or 2-by-2.
                 nbnext = 1_${ik}$
                 if( here>=3_${ik}$ ) then
                    if( a( here-1, here-2 )/=zero )nbnext = 2_${ik}$
                 end if
                 call stdlib${ii}$_${ri}$tgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here-nbnext, &
                           nbnext, nbf, work, lwork,info )
                 if( info/=0_${ik}$ ) then
                    ilst = here
                    return
                 end if
                 here = here - nbnext
                 ! test if 2-by-2 block breaks into two 1-by-1 blocks.
                 if( nbf==2_${ik}$ ) then
                    if( a( here+1, here )==zero )nbf = 3_${ik}$
                 end if
              else
                 ! current block consists of two 1-by-1 blocks, each of which
                 ! must be swapped individually.
                 nbnext = 1_${ik}$
                 if( here>=3_${ik}$ ) then
                    if( a( here-1, here-2 )/=zero )nbnext = 2_${ik}$
                 end if
                 call stdlib${ii}$_${ri}$tgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here-nbnext, &
                           nbnext, 1_${ik}$, work, lwork,info )
                 if( info/=0_${ik}$ ) then
                    ilst = here
                    return
                 end if
                 if( nbnext==1_${ik}$ ) then
                    ! swap two 1-by-1 blocks.
                    call stdlib${ii}$_${ri}$tgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, here, &
                              nbnext, 1_${ik}$, work, lwork, info )
                    if( info/=0_${ik}$ ) then
                       ilst = here
                       return
                    end if
                    here = here - 1_${ik}$
                 else
                   ! recompute nbnext in case of 2-by-2 split.
                    if( a( here, here-1 )==zero )nbnext = 1_${ik}$
                    if( nbnext==2_${ik}$ ) then
                       ! 2-by-2 block did not split.
                       call stdlib${ii}$_${ri}$tgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here-1,&
                                  2_${ik}$, 1_${ik}$, work, lwork, info )
                       if( info/=0_${ik}$ ) then
                          ilst = here
                          return
                       end if
                       here = here - 2_${ik}$
                    else
                       ! 2-by-2 block did split.
                       call stdlib${ii}$_${ri}$tgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, &
                                 1_${ik}$, 1_${ik}$, work, lwork, info )
                       if( info/=0_${ik}$ ) then
                          ilst = here
                          return
                       end if
                       here = here - 1_${ik}$
                       call stdlib${ii}$_${ri}$tgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq,z, ldz, here, &
                                 1_${ik}$, 1_${ik}$, work, lwork, info )
                       if( info/=0_${ik}$ ) then
                          ilst = here
                          return
                       end if
                       here = here - 1_${ik}$
                    end if
                 end if
              end if
              if( here>ilst )go to 20
           end if
           ilst = here
           work( 1_${ik}$ ) = lwmin
           return
     end subroutine stdlib${ii}$_${ri}$tgexc

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_ctgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, ifst, ilst, &
     !! CTGEXC reorders the generalized Schur decomposition of a complex
     !! matrix pair (A,B), using an unitary equivalence transformation
     !! (A, B) := Q * (A, B) * Z**H, so that the diagonal block of (A, B) with
     !! row index IFST is moved to row ILST.
     !! (A, B) must be in generalized Schur canonical form, that is, A and
     !! B are both upper triangular.
     !! Optionally, the matrices Q and Z of generalized Schur vectors are
     !! updated.
     !! Q(in) * A(in) * Z(in)**H = Q(out) * A(out) * Z(out)**H
     !! Q(in) * B(in) * Z(in)**H = Q(out) * B(out) * Z(out)**H
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: wantq, wantz
           integer(${ik}$), intent(in) :: ifst, lda, ldb, ldq, ldz, n
           integer(${ik}$), intent(inout) :: ilst
           integer(${ik}$), intent(out) :: info
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
        ! =====================================================================
           ! Local Scalars 
           integer(${ik}$) :: here
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test input arguments.
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           else if( ldq<1_${ik}$ .or. wantq .and. ( ldq<max( 1_${ik}$, n ) ) ) then
              info = -9_${ik}$
           else if( ldz<1_${ik}$ .or. wantz .and. ( ldz<max( 1_${ik}$, n ) ) ) then
              info = -11_${ik}$
           else if( ifst<1_${ik}$ .or. ifst>n ) then
              info = -12_${ik}$
           else if( ilst<1_${ik}$ .or. ilst>n ) then
              info = -13_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'CTGEXC', -info )
              return
           end if
           ! quick return if possible
           if( n<=1 )return
           if( ifst==ilst )return
           if( ifst<ilst ) then
              here = ifst
              10 continue
              ! swap with next one below
              call stdlib${ii}$_ctgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z, ldz,here, info )
                        
              if( info/=0_${ik}$ ) then
                 ilst = here
                 return
              end if
              here = here + 1_${ik}$
              if( here<ilst )go to 10
              here = here - 1_${ik}$
           else
              here = ifst - 1_${ik}$
              20 continue
              ! swap with next one above
              call stdlib${ii}$_ctgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z, ldz,here, info )
                        
              if( info/=0_${ik}$ ) then
                 ilst = here
                 return
              end if
              here = here - 1_${ik}$
              if( here>=ilst )go to 20
              here = here + 1_${ik}$
           end if
           ilst = here
           return
     end subroutine stdlib${ii}$_ctgexc

     pure module subroutine stdlib${ii}$_ztgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, ifst, ilst, &
     !! ZTGEXC reorders the generalized Schur decomposition of a complex
     !! matrix pair (A,B), using an unitary equivalence transformation
     !! (A, B) := Q * (A, B) * Z**H, so that the diagonal block of (A, B) with
     !! row index IFST is moved to row ILST.
     !! (A, B) must be in generalized Schur canonical form, that is, A and
     !! B are both upper triangular.
     !! Optionally, the matrices Q and Z of generalized Schur vectors are
     !! updated.
     !! Q(in) * A(in) * Z(in)**H = Q(out) * A(out) * Z(out)**H
     !! Q(in) * B(in) * Z(in)**H = Q(out) * B(out) * Z(out)**H
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: wantq, wantz
           integer(${ik}$), intent(in) :: ifst, lda, ldb, ldq, ldz, n
           integer(${ik}$), intent(inout) :: ilst
           integer(${ik}$), intent(out) :: info
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
        ! =====================================================================
           ! Local Scalars 
           integer(${ik}$) :: here
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test input arguments.
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           else if( ldq<1_${ik}$ .or. wantq .and. ( ldq<max( 1_${ik}$, n ) ) ) then
              info = -9_${ik}$
           else if( ldz<1_${ik}$ .or. wantz .and. ( ldz<max( 1_${ik}$, n ) ) ) then
              info = -11_${ik}$
           else if( ifst<1_${ik}$ .or. ifst>n ) then
              info = -12_${ik}$
           else if( ilst<1_${ik}$ .or. ilst>n ) then
              info = -13_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTGEXC', -info )
              return
           end if
           ! quick return if possible
           if( n<=1 )return
           if( ifst==ilst )return
           if( ifst<ilst ) then
              here = ifst
              10 continue
              ! swap with next one below
              call stdlib${ii}$_ztgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z, ldz,here, info )
                        
              if( info/=0_${ik}$ ) then
                 ilst = here
                 return
              end if
              here = here + 1_${ik}$
              if( here<ilst )go to 10
              here = here - 1_${ik}$
           else
              here = ifst - 1_${ik}$
              20 continue
              ! swap with next one above
              call stdlib${ii}$_ztgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z, ldz,here, info )
                        
              if( info/=0_${ik}$ ) then
                 ilst = here
                 return
              end if
              here = here - 1_${ik}$
              if( here>=ilst )go to 20
              here = here + 1_${ik}$
           end if
           ilst = here
           return
     end subroutine stdlib${ii}$_ztgexc

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$tgexc( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, ifst, ilst, &
     !! ZTGEXC: reorders the generalized Schur decomposition of a complex
     !! matrix pair (A,B), using an unitary equivalence transformation
     !! (A, B) := Q * (A, B) * Z**H, so that the diagonal block of (A, B) with
     !! row index IFST is moved to row ILST.
     !! (A, B) must be in generalized Schur canonical form, that is, A and
     !! B are both upper triangular.
     !! Optionally, the matrices Q and Z of generalized Schur vectors are
     !! updated.
     !! Q(in) * A(in) * Z(in)**H = Q(out) * A(out) * Z(out)**H
     !! Q(in) * B(in) * Z(in)**H = Q(out) * B(out) * Z(out)**H
               info )
        ! -- lapack computational routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: wantq, wantz
           integer(${ik}$), intent(in) :: ifst, lda, ldb, ldq, ldz, n
           integer(${ik}$), intent(inout) :: ilst
           integer(${ik}$), intent(out) :: info
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
        ! =====================================================================
           ! Local Scalars 
           integer(${ik}$) :: here
           ! Intrinsic Functions 
           ! Executable Statements 
           ! decode and test input arguments.
           info = 0_${ik}$
           if( n<0_${ik}$ ) then
              info = -3_${ik}$
           else if( lda<max( 1_${ik}$, n ) ) then
              info = -5_${ik}$
           else if( ldb<max( 1_${ik}$, n ) ) then
              info = -7_${ik}$
           else if( ldq<1_${ik}$ .or. wantq .and. ( ldq<max( 1_${ik}$, n ) ) ) then
              info = -9_${ik}$
           else if( ldz<1_${ik}$ .or. wantz .and. ( ldz<max( 1_${ik}$, n ) ) ) then
              info = -11_${ik}$
           else if( ifst<1_${ik}$ .or. ifst>n ) then
              info = -12_${ik}$
           else if( ilst<1_${ik}$ .or. ilst>n ) then
              info = -13_${ik}$
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'ZTGEXC', -info )
              return
           end if
           ! quick return if possible
           if( n<=1 )return
           if( ifst==ilst )return
           if( ifst<ilst ) then
              here = ifst
              10 continue
              ! swap with next one below
              call stdlib${ii}$_${ci}$tgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z, ldz,here, info )
                        
              if( info/=0_${ik}$ ) then
                 ilst = here
                 return
              end if
              here = here + 1_${ik}$
              if( here<ilst )go to 10
              here = here - 1_${ik}$
           else
              here = ifst - 1_${ik}$
              20 continue
              ! swap with next one above
              call stdlib${ii}$_${ci}$tgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z, ldz,here, info )
                        
              if( info/=0_${ik}$ ) then
                 ilst = here
                 return
              end if
              here = here - 1_${ik}$
              if( here>=ilst )go to 20
              here = here + 1_${ik}$
           end if
           ilst = here
           return
     end subroutine stdlib${ii}$_${ci}$tgexc

#:endif
#:endfor



     pure module subroutine stdlib${ii}$_stgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, j1, n1, n2, &
     !! STGEX2 swaps adjacent diagonal blocks (A11, B11) and (A22, B22)
     !! of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair
     !! (A, B) by an orthogonal equivalence transformation.
     !! (A, B) must be in generalized real Schur canonical form (as returned
     !! by SGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2
     !! diagonal blocks. B is upper triangular.
     !! Optionally, the matrices Q and Z of generalized Schur vectors are
     !! updated.
     !! Q(in) * A(in) * Z(in)**T = Q(out) * A(out) * Z(out)**T
     !! Q(in) * B(in) * Z(in)**T = Q(out) * B(out) * Z(out)**T
               work, lwork, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: wantq, wantz
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: j1, lda, ldb, ldq, ldz, lwork, n, n1, n2
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
           real(sp), intent(out) :: work(*)
        ! =====================================================================
        ! replaced various illegal calls to stdlib${ii}$_scopy by calls to stdlib${ii}$_slaset, or by do
        ! loops. sven hammarling, 1/5/02.
           ! Parameters 
           real(sp), parameter :: twenty = 2.0e+01_sp
           integer(${ik}$), parameter :: ldst = 4_${ik}$
           logical(lk), parameter :: wands = .true.
           
           
           
           
           ! Local Scalars 
           logical(lk) :: strong, weak
           integer(${ik}$) :: i, idum, linfo, m
           real(sp) :: bqra21, brqa21, ddum, dnorma, dnormb, dscale, dsum, eps, f, g, sa, sb, &
                     scale, smlnum, thresha, threshb
           ! Local Arrays 
           integer(${ik}$) :: iwork(ldst)
           real(sp) :: ai(2_${ik}$), ar(2_${ik}$), be(2_${ik}$), ir(ldst,ldst), ircop(ldst,ldst), li(ldst,ldst), licop(&
           ldst,ldst), s(ldst,ldst), scpy(ldst,ldst), t(ldst,ldst), taul(ldst), taur(ldst), tcpy(&
                     ldst,ldst)
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           ! quick return if possible
           if( n<=1 .or. n1<=0 .or. n2<=0 )return
           if( n1>n .or. ( j1+n1 )>n )return
           m = n1 + n2
           if( lwork<max( n*m, m*m*2_${ik}$ ) ) then
              info = -16_${ik}$
              work( 1_${ik}$ ) = max( n*m, m*m*2_${ik}$ )
              return
           end if
           weak = .false.
           strong = .false.
           ! make a local copy of selected block
           call stdlib${ii}$_slaset( 'FULL', ldst, ldst, zero, zero, li, ldst )
           call stdlib${ii}$_slaset( 'FULL', ldst, ldst, zero, zero, ir, ldst )
           call stdlib${ii}$_slacpy( 'FULL', m, m, a( j1, j1 ), lda, s, ldst )
           call stdlib${ii}$_slacpy( 'FULL', m, m, b( j1, j1 ), ldb, t, ldst )
           ! compute threshold for testing acceptance of swapping.
           eps = stdlib${ii}$_slamch( 'P' )
           smlnum = stdlib${ii}$_slamch( 'S' ) / eps
           dscale = zero
           dsum = one
           call stdlib${ii}$_slacpy( 'FULL', m, m, s, ldst, work, m )
           call stdlib${ii}$_slassq( m*m, work, 1_${ik}$, dscale, dsum )
           dnorma = dscale*sqrt( dsum )
           dscale = zero
           dsum = one
           call stdlib${ii}$_slacpy( 'FULL', m, m, t, ldst, work, m )
           call stdlib${ii}$_slassq( m*m, work, 1_${ik}$, dscale, dsum )
           dnormb = dscale*sqrt( dsum )
           ! thres has been changed from
              ! thresh = max( ten*eps*sa, smlnum )
           ! to
              ! thresh = max( twenty*eps*sa, smlnum )
           ! on 04/01/10.
           ! "bug" reported by ondra kamenik, confirmed by julie langou, fixed by
           ! jim demmel and guillaume revy. see forum post 1783.
           thresha = max( twenty*eps*dnorma, smlnum )
           threshb = max( twenty*eps*dnormb, smlnum )
           if( m==2_${ik}$ ) then
              ! case 1: swap 1-by-1 and 1-by-1 blocks.
              ! compute orthogonal ql and rq that swap 1-by-1 and 1-by-1 blocks
              ! using givens rotations and perform the swap tentatively.
              f = s( 2_${ik}$, 2_${ik}$ )*t( 1_${ik}$, 1_${ik}$ ) - t( 2_${ik}$, 2_${ik}$ )*s( 1_${ik}$, 1_${ik}$ )
              g = s( 2_${ik}$, 2_${ik}$ )*t( 1_${ik}$, 2_${ik}$ ) - t( 2_${ik}$, 2_${ik}$ )*s( 1_${ik}$, 2_${ik}$ )
              sa = abs( s( 2_${ik}$, 2_${ik}$ ) ) * abs( t( 1_${ik}$, 1_${ik}$ ) )
              sb = abs( s( 1_${ik}$, 1_${ik}$ ) ) * abs( t( 2_${ik}$, 2_${ik}$ ) )
              call stdlib${ii}$_slartg( f, g, ir( 1_${ik}$, 2_${ik}$ ), ir( 1_${ik}$, 1_${ik}$ ), ddum )
              ir( 2_${ik}$, 1_${ik}$ ) = -ir( 1_${ik}$, 2_${ik}$ )
              ir( 2_${ik}$, 2_${ik}$ ) = ir( 1_${ik}$, 1_${ik}$ )
              call stdlib${ii}$_srot( 2_${ik}$, s( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, s( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, ir( 1_${ik}$, 1_${ik}$ ),ir( 2_${ik}$, 1_${ik}$ ) )
              call stdlib${ii}$_srot( 2_${ik}$, t( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, t( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, ir( 1_${ik}$, 1_${ik}$ ),ir( 2_${ik}$, 1_${ik}$ ) )
              if( sa>=sb ) then
                 call stdlib${ii}$_slartg( s( 1_${ik}$, 1_${ik}$ ), s( 2_${ik}$, 1_${ik}$ ), li( 1_${ik}$, 1_${ik}$ ), li( 2_${ik}$, 1_${ik}$ ),ddum )
              else
                 call stdlib${ii}$_slartg( t( 1_${ik}$, 1_${ik}$ ), t( 2_${ik}$, 1_${ik}$ ), li( 1_${ik}$, 1_${ik}$ ), li( 2_${ik}$, 1_${ik}$ ),ddum )
              end if
              call stdlib${ii}$_srot( 2_${ik}$, s( 1_${ik}$, 1_${ik}$ ), ldst, s( 2_${ik}$, 1_${ik}$ ), ldst, li( 1_${ik}$, 1_${ik}$ ),li( 2_${ik}$, 1_${ik}$ ) )
                        
              call stdlib${ii}$_srot( 2_${ik}$, t( 1_${ik}$, 1_${ik}$ ), ldst, t( 2_${ik}$, 1_${ik}$ ), ldst, li( 1_${ik}$, 1_${ik}$ ),li( 2_${ik}$, 1_${ik}$ ) )
                        
              li( 2_${ik}$, 2_${ik}$ ) = li( 1_${ik}$, 1_${ik}$ )
              li( 1_${ik}$, 2_${ik}$ ) = -li( 2_${ik}$, 1_${ik}$ )
              ! weak stability test: |s21| <= o(eps f-norm((a)))
                                 ! and  |t21| <= o(eps f-norm((b)))
              weak = abs( s( 2_${ik}$, 1_${ik}$ ) ) <= thresha .and.abs( t( 2_${ik}$, 1_${ik}$ ) ) <= threshb
              if( .not.weak )go to 70
              if( wands ) then
                 ! strong stability test:
                     ! f-norm((a-ql**h*s*qr)) <= o(eps*f-norm((a)))
                     ! and
                     ! f-norm((b-ql**h*t*qr)) <= o(eps*f-norm((b)))
                 call stdlib${ii}$_slacpy( 'FULL', m, m, a( j1, j1 ), lda, work( m*m+1 ),m )
                 call stdlib${ii}$_sgemm( 'N', 'N', m, m, m, one, li, ldst, s, ldst, zero,work, m )
                           
                 call stdlib${ii}$_sgemm( 'N', 'T', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),&
                            m )
                 dscale = zero
                 dsum = one
                 call stdlib${ii}$_slassq( m*m, work( m*m+1 ), 1_${ik}$, dscale, dsum )
                 sa = dscale*sqrt( dsum )
                 call stdlib${ii}$_slacpy( 'FULL', m, m, b( j1, j1 ), ldb, work( m*m+1 ),m )
                 call stdlib${ii}$_sgemm( 'N', 'N', m, m, m, one, li, ldst, t, ldst, zero,work, m )
                           
                 call stdlib${ii}$_sgemm( 'N', 'T', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),&
                            m )
                 dscale = zero
                 dsum = one
                 call stdlib${ii}$_slassq( m*m, work( m*m+1 ), 1_${ik}$, dscale, dsum )
                 sb = dscale*sqrt( dsum )
                 strong = sa<=thresha .and. sb<=threshb
                 if( .not.strong )go to 70
              end if
              ! update (a(j1:j1+m-1, m+j1:n), b(j1:j1+m-1, m+j1:n)) and
                     ! (a(1:j1-1, j1:j1+m), b(1:j1-1, j1:j1+m)).
              call stdlib${ii}$_srot( j1+1, a( 1_${ik}$, j1 ), 1_${ik}$, a( 1_${ik}$, j1+1 ), 1_${ik}$, ir( 1_${ik}$, 1_${ik}$ ),ir( 2_${ik}$, 1_${ik}$ ) )
                        
              call stdlib${ii}$_srot( j1+1, b( 1_${ik}$, j1 ), 1_${ik}$, b( 1_${ik}$, j1+1 ), 1_${ik}$, ir( 1_${ik}$, 1_${ik}$ ),ir( 2_${ik}$, 1_${ik}$ ) )
                        
              call stdlib${ii}$_srot( n-j1+1, a( j1, j1 ), lda, a( j1+1, j1 ), lda,li( 1_${ik}$, 1_${ik}$ ), li( 2_${ik}$, 1_${ik}$ &
                        ) )
              call stdlib${ii}$_srot( n-j1+1, b( j1, j1 ), ldb, b( j1+1, j1 ), ldb,li( 1_${ik}$, 1_${ik}$ ), li( 2_${ik}$, 1_${ik}$ &
                        ) )
              ! set  n1-by-n2 (2,1) - blocks to zero.
              a( j1+1, j1 ) = zero
              b( j1+1, j1 ) = zero
              ! accumulate transformations into q and z if requested.
              if( wantz )call stdlib${ii}$_srot( n, z( 1_${ik}$, j1 ), 1_${ik}$, z( 1_${ik}$, j1+1 ), 1_${ik}$, ir( 1_${ik}$, 1_${ik}$ ),ir( 2_${ik}$, 1_${ik}$ &
                        ) )
              if( wantq )call stdlib${ii}$_srot( n, q( 1_${ik}$, j1 ), 1_${ik}$, q( 1_${ik}$, j1+1 ), 1_${ik}$, li( 1_${ik}$, 1_${ik}$ ),li( 2_${ik}$, 1_${ik}$ &
                        ) )
              ! exit with info = 0 if swap was successfully performed.
              return
           else
              ! case 2: swap 1-by-1 and 2-by-2 blocks, or 2-by-2
                      ! and 2-by-2 blocks.
              ! solve the generalized sylvester equation
                       ! s11 * r - l * s22 = scale * s12
                       ! t11 * r - l * t22 = scale * t12
              ! for r and l. solutions in li and ir.
              call stdlib${ii}$_slacpy( 'FULL', n1, n2, t( 1_${ik}$, n1+1 ), ldst, li, ldst )
              call stdlib${ii}$_slacpy( 'FULL', n1, n2, s( 1_${ik}$, n1+1 ), ldst,ir( n2+1, n1+1 ), ldst )
                        
              call stdlib${ii}$_stgsy2( 'N', 0_${ik}$, n1, n2, s, ldst, s( n1+1, n1+1 ), ldst,ir( n2+1, n1+1 ),&
               ldst, t, ldst, t( n1+1, n1+1 ),ldst, li, ldst, scale, dsum, dscale, iwork, idum,&
                         linfo )
              if( linfo/=0 )go to 70
              ! compute orthogonal matrix ql:
                          ! ql**t * li = [ tl ]
                                       ! [ 0  ]
              ! where
                          ! li =  [      -l              ]
                                ! [ scale * identity(n2) ]
              do i = 1, n2
                 call stdlib${ii}$_sscal( n1, -one, li( 1_${ik}$, i ), 1_${ik}$ )
                 li( n1+i, i ) = scale
              end do
              call stdlib${ii}$_sgeqr2( m, n2, li, ldst, taul, work, linfo )
              if( linfo/=0 )go to 70
              call stdlib${ii}$_sorg2r( m, m, n2, li, ldst, taul, work, linfo )
              if( linfo/=0 )go to 70
              ! compute orthogonal matrix rq:
                          ! ir * rq**t =   [ 0  tr],
               ! where ir = [ scale * identity(n1), r ]
              do i = 1, n1
                 ir( n2+i, i ) = scale
              end do
              call stdlib${ii}$_sgerq2( n1, m, ir( n2+1, 1_${ik}$ ), ldst, taur, work, linfo )
              if( linfo/=0 )go to 70
              call stdlib${ii}$_sorgr2( m, m, n1, ir, ldst, taur, work, linfo )
              if( linfo/=0 )go to 70
              ! perform the swapping tentatively:
              call stdlib${ii}$_sgemm( 'T', 'N', m, m, m, one, li, ldst, s, ldst, zero,work, m )
              call stdlib${ii}$_sgemm( 'N', 'T', m, m, m, one, work, m, ir, ldst, zero, s,ldst )
              call stdlib${ii}$_sgemm( 'T', 'N', m, m, m, one, li, ldst, t, ldst, zero,work, m )
              call stdlib${ii}$_sgemm( 'N', 'T', m, m, m, one, work, m, ir, ldst, zero, t,ldst )
              call stdlib${ii}$_slacpy( 'F', m, m, s, ldst, scpy, ldst )
              call stdlib${ii}$_slacpy( 'F', m, m, t, ldst, tcpy, ldst )
              call stdlib${ii}$_slacpy( 'F', m, m, ir, ldst, ircop, ldst )
              call stdlib${ii}$_slacpy( 'F', m, m, li, ldst, licop, ldst )
              ! triangularize the b-part by an rq factorization.
              ! apply transformation (from left) to a-part, giving s.
              call stdlib${ii}$_sgerq2( m, m, t, ldst, taur, work, linfo )
              if( linfo/=0 )go to 70
              call stdlib${ii}$_sormr2( 'R', 'T', m, m, m, t, ldst, taur, s, ldst, work,linfo )
              if( linfo/=0 )go to 70
              call stdlib${ii}$_sormr2( 'L', 'N', m, m, m, t, ldst, taur, ir, ldst, work,linfo )
              if( linfo/=0 )go to 70
              ! compute f-norm(s21) in brqa21. (t21 is 0.)
              dscale = zero
              dsum = one
              do i = 1, n2
                 call stdlib${ii}$_slassq( n1, s( n2+1, i ), 1_${ik}$, dscale, dsum )
              end do
              brqa21 = dscale*sqrt( dsum )
              ! triangularize the b-part by a qr factorization.
              ! apply transformation (from right) to a-part, giving s.
              call stdlib${ii}$_sgeqr2( m, m, tcpy, ldst, taul, work, linfo )
              if( linfo/=0 )go to 70
              call stdlib${ii}$_sorm2r( 'L', 'T', m, m, m, tcpy, ldst, taul, scpy, ldst,work, info )
                        
              call stdlib${ii}$_sorm2r( 'R', 'N', m, m, m, tcpy, ldst, taul, licop, ldst,work, info )
                        
              if( linfo/=0 )go to 70
              ! compute f-norm(s21) in bqra21. (t21 is 0.)
              dscale = zero
              dsum = one
              do i = 1, n2
                 call stdlib${ii}$_slassq( n1, scpy( n2+1, i ), 1_${ik}$, dscale, dsum )
              end do
              bqra21 = dscale*sqrt( dsum )
              ! decide which method to use.
                ! weak stability test:
                   ! f-norm(s21) <= o(eps * f-norm((s)))
              if( bqra21<=brqa21 .and. bqra21<=thresha ) then
                 call stdlib${ii}$_slacpy( 'F', m, m, scpy, ldst, s, ldst )
                 call stdlib${ii}$_slacpy( 'F', m, m, tcpy, ldst, t, ldst )
                 call stdlib${ii}$_slacpy( 'F', m, m, ircop, ldst, ir, ldst )
                 call stdlib${ii}$_slacpy( 'F', m, m, licop, ldst, li, ldst )
              else if( brqa21>=thresha ) then
                 go to 70
              end if
              ! set lower triangle of b-part to zero
              if (m>1_${ik}$) call stdlib${ii}$_slaset( 'LOWER', m-1, m-1, zero, zero, t(2_${ik}$,1_${ik}$), ldst )
              if( wands ) then
                 ! strong stability test:
                     ! f-norm((a-ql**h*s*qr)) <= o(eps*f-norm((a)))
                     ! and
                     ! f-norm((b-ql**h*t*qr)) <= o(eps*f-norm((b)))
                 call stdlib${ii}$_slacpy( 'FULL', m, m, a( j1, j1 ), lda, work( m*m+1 ),m )
                 call stdlib${ii}$_sgemm( 'N', 'N', m, m, m, one, li, ldst, s, ldst, zero,work, m )
                           
                 call stdlib${ii}$_sgemm( 'N', 'N', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),&
                            m )
                 dscale = zero
                 dsum = one
                 call stdlib${ii}$_slassq( m*m, work( m*m+1 ), 1_${ik}$, dscale, dsum )
                 sa = dscale*sqrt( dsum )
                 call stdlib${ii}$_slacpy( 'FULL', m, m, b( j1, j1 ), ldb, work( m*m+1 ),m )
                 call stdlib${ii}$_sgemm( 'N', 'N', m, m, m, one, li, ldst, t, ldst, zero,work, m )
                           
                 call stdlib${ii}$_sgemm( 'N', 'N', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),&
                            m )
                 dscale = zero
                 dsum = one
                 call stdlib${ii}$_slassq( m*m, work( m*m+1 ), 1_${ik}$, dscale, dsum )
                 sb = dscale*sqrt( dsum )
                 strong = sa<=thresha .and. sb<=threshb
                 if( .not.strong )go to 70
              end if
              ! if the swap is accepted ("weakly" and "strongly"), apply the
              ! transformations and set n1-by-n2 (2,1)-block to zero.
              call stdlib${ii}$_slaset( 'FULL', n1, n2, zero, zero, s(n2+1,1_${ik}$), ldst )
              ! copy back m-by-m diagonal block starting at index j1 of (a, b)
              call stdlib${ii}$_slacpy( 'F', m, m, s, ldst, a( j1, j1 ), lda )
              call stdlib${ii}$_slacpy( 'F', m, m, t, ldst, b( j1, j1 ), ldb )
              call stdlib${ii}$_slaset( 'FULL', ldst, ldst, zero, zero, t, ldst )
              ! standardize existing 2-by-2 blocks.
              call stdlib${ii}$_slaset( 'FULL', m, m, zero, zero, work, m )
              work( 1_${ik}$ ) = one
              t( 1_${ik}$, 1_${ik}$ ) = one
              idum = lwork - m*m - 2_${ik}$
              if( n2>1_${ik}$ ) then
                 call stdlib${ii}$_slagv2( a( j1, j1 ), lda, b( j1, j1 ), ldb, ar, ai, be,work( 1_${ik}$ ), &
                           work( 2_${ik}$ ), t( 1_${ik}$, 1_${ik}$ ), t( 2_${ik}$, 1_${ik}$ ) )
                 work( m+1 ) = -work( 2_${ik}$ )
                 work( m+2 ) = work( 1_${ik}$ )
                 t( n2, n2 ) = t( 1_${ik}$, 1_${ik}$ )
                 t( 1_${ik}$, 2_${ik}$ ) = -t( 2_${ik}$, 1_${ik}$ )
              end if
              work( m*m ) = one
              t( m, m ) = one
              if( n1>1_${ik}$ ) then
                 call stdlib${ii}$_slagv2( a( j1+n2, j1+n2 ), lda, b( j1+n2, j1+n2 ), ldb,taur, taul, &
                 work( m*m+1 ), work( n2*m+n2+1 ),work( n2*m+n2+2 ), t( n2+1, n2+1 ),t( m, m-1 ) )
                           
                 work( m*m ) = work( n2*m+n2+1 )
                 work( m*m-1 ) = -work( n2*m+n2+2 )
                 t( m, m ) = t( n2+1, n2+1 )
                 t( m-1, m ) = -t( m, m-1 )
              end if
              call stdlib${ii}$_sgemm( 'T', 'N', n2, n1, n2, one, work, m, a( j1, j1+n2 ),lda, zero, &
                        work( m*m+1 ), n2 )
              call stdlib${ii}$_slacpy( 'FULL', n2, n1, work( m*m+1 ), n2, a( j1, j1+n2 ),lda )
              call stdlib${ii}$_sgemm( 'T', 'N', n2, n1, n2, one, work, m, b( j1, j1+n2 ),ldb, zero, &
                        work( m*m+1 ), n2 )
              call stdlib${ii}$_slacpy( 'FULL', n2, n1, work( m*m+1 ), n2, b( j1, j1+n2 ),ldb )
              call stdlib${ii}$_sgemm( 'N', 'N', m, m, m, one, li, ldst, work, m, zero,work( m*m+1 ), m &
                        )
              call stdlib${ii}$_slacpy( 'FULL', m, m, work( m*m+1 ), m, li, ldst )
              call stdlib${ii}$_sgemm( 'N', 'N', n2, n1, n1, one, a( j1, j1+n2 ), lda,t( n2+1, n2+1 ), &
                        ldst, zero, work, n2 )
              call stdlib${ii}$_slacpy( 'FULL', n2, n1, work, n2, a( j1, j1+n2 ), lda )
              call stdlib${ii}$_sgemm( 'N', 'N', n2, n1, n1, one, b( j1, j1+n2 ), ldb,t( n2+1, n2+1 ), &
                        ldst, zero, work, n2 )
              call stdlib${ii}$_slacpy( 'FULL', n2, n1, work, n2, b( j1, j1+n2 ), ldb )
              call stdlib${ii}$_sgemm( 'T', 'N', m, m, m, one, ir, ldst, t, ldst, zero,work, m )
              call stdlib${ii}$_slacpy( 'FULL', m, m, work, m, ir, ldst )
              ! accumulate transformations into q and z if requested.
              if( wantq ) then
                 call stdlib${ii}$_sgemm( 'N', 'N', n, m, m, one, q( 1_${ik}$, j1 ), ldq, li,ldst, zero, work, &
                           n )
                 call stdlib${ii}$_slacpy( 'FULL', n, m, work, n, q( 1_${ik}$, j1 ), ldq )
              end if
              if( wantz ) then
                 call stdlib${ii}$_sgemm( 'N', 'N', n, m, m, one, z( 1_${ik}$, j1 ), ldz, ir,ldst, zero, work, &
                           n )
                 call stdlib${ii}$_slacpy( 'FULL', n, m, work, n, z( 1_${ik}$, j1 ), ldz )
              end if
              ! update (a(j1:j1+m-1, m+j1:n), b(j1:j1+m-1, m+j1:n)) and
                      ! (a(1:j1-1, j1:j1+m), b(1:j1-1, j1:j1+m)).
              i = j1 + m
              if( i<=n ) then
                 call stdlib${ii}$_sgemm( 'T', 'N', m, n-i+1, m, one, li, ldst,a( j1, i ), lda, zero, &
                           work, m )
                 call stdlib${ii}$_slacpy( 'FULL', m, n-i+1, work, m, a( j1, i ), lda )
                 call stdlib${ii}$_sgemm( 'T', 'N', m, n-i+1, m, one, li, ldst,b( j1, i ), ldb, zero, &
                           work, m )
                 call stdlib${ii}$_slacpy( 'FULL', m, n-i+1, work, m, b( j1, i ), ldb )
              end if
              i = j1 - 1_${ik}$
              if( i>0_${ik}$ ) then
                 call stdlib${ii}$_sgemm( 'N', 'N', i, m, m, one, a( 1_${ik}$, j1 ), lda, ir,ldst, zero, work, &
                           i )
                 call stdlib${ii}$_slacpy( 'FULL', i, m, work, i, a( 1_${ik}$, j1 ), lda )
                 call stdlib${ii}$_sgemm( 'N', 'N', i, m, m, one, b( 1_${ik}$, j1 ), ldb, ir,ldst, zero, work, &
                           i )
                 call stdlib${ii}$_slacpy( 'FULL', i, m, work, i, b( 1_${ik}$, j1 ), ldb )
              end if
              ! exit with info = 0 if swap was successfully performed.
              return
           end if
           ! exit with info = 1 if swap was rejected.
           70 continue
           info = 1_${ik}$
           return
     end subroutine stdlib${ii}$_stgex2

     pure module subroutine stdlib${ii}$_dtgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, j1, n1, n2, &
     !! DTGEX2 swaps adjacent diagonal blocks (A11, B11) and (A22, B22)
     !! of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair
     !! (A, B) by an orthogonal equivalence transformation.
     !! (A, B) must be in generalized real Schur canonical form (as returned
     !! by DGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2
     !! diagonal blocks. B is upper triangular.
     !! Optionally, the matrices Q and Z of generalized Schur vectors are
     !! updated.
     !! Q(in) * A(in) * Z(in)**T = Q(out) * A(out) * Z(out)**T
     !! Q(in) * B(in) * Z(in)**T = Q(out) * B(out) * Z(out)**T
               work, lwork, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: wantq, wantz
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: j1, lda, ldb, ldq, ldz, lwork, n, n1, n2
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
           real(dp), intent(out) :: work(*)
        ! =====================================================================
        ! replaced various illegal calls to stdlib${ii}$_dcopy by calls to stdlib${ii}$_dlaset, or by do
        ! loops. sven hammarling, 1/5/02.
           ! Parameters 
           real(dp), parameter :: twenty = 2.0e+01_dp
           integer(${ik}$), parameter :: ldst = 4_${ik}$
           logical(lk), parameter :: wands = .true.
           
           
           
           
           ! Local Scalars 
           logical(lk) :: strong, weak
           integer(${ik}$) :: i, idum, linfo, m
           real(dp) :: bqra21, brqa21, ddum, dnorma, dnormb, dscale, dsum, eps, f, g, sa, sb, &
                     scale, smlnum, thresha, threshb
           ! Local Arrays 
           integer(${ik}$) :: iwork(ldst)
           real(dp) :: ai(2_${ik}$), ar(2_${ik}$), be(2_${ik}$), ir(ldst,ldst), ircop(ldst,ldst), li(ldst,ldst), licop(&
           ldst,ldst), s(ldst,ldst), scpy(ldst,ldst), t(ldst,ldst), taul(ldst), taur(ldst), tcpy(&
                     ldst,ldst)
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           ! quick return if possible
           if( n<=1 .or. n1<=0 .or. n2<=0 )return
           if( n1>n .or. ( j1+n1 )>n )return
           m = n1 + n2
           if( lwork<max( 1_${ik}$, n*m, m*m*2_${ik}$ ) ) then
              info = -16_${ik}$
              work( 1_${ik}$ ) = max( 1_${ik}$, n*m, m*m*2_${ik}$ )
              return
           end if
           weak = .false.
           strong = .false.
           ! make a local copy of selected block
           call stdlib${ii}$_dlaset( 'FULL', ldst, ldst, zero, zero, li, ldst )
           call stdlib${ii}$_dlaset( 'FULL', ldst, ldst, zero, zero, ir, ldst )
           call stdlib${ii}$_dlacpy( 'FULL', m, m, a( j1, j1 ), lda, s, ldst )
           call stdlib${ii}$_dlacpy( 'FULL', m, m, b( j1, j1 ), ldb, t, ldst )
           ! compute threshold for testing acceptance of swapping.
           eps = stdlib${ii}$_dlamch( 'P' )
           smlnum = stdlib${ii}$_dlamch( 'S' ) / eps
           dscale = zero
           dsum = one
           call stdlib${ii}$_dlacpy( 'FULL', m, m, s, ldst, work, m )
           call stdlib${ii}$_dlassq( m*m, work, 1_${ik}$, dscale, dsum )
           dnorma = dscale*sqrt( dsum )
           dscale = zero
           dsum = one
           call stdlib${ii}$_dlacpy( 'FULL', m, m, t, ldst, work, m )
           call stdlib${ii}$_dlassq( m*m, work, 1_${ik}$, dscale, dsum )
           dnormb = dscale*sqrt( dsum )
           ! thres has been changed from
              ! thresh = max( ten*eps*sa, smlnum )
           ! to
              ! thresh = max( twenty*eps*sa, smlnum )
           ! on 04/01/10.
           ! "bug" reported by ondra kamenik, confirmed by julie langou, fixed by
           ! jim demmel and guillaume revy. see forum post 1783.
           thresha = max( twenty*eps*dnorma, smlnum )
           threshb = max( twenty*eps*dnormb, smlnum )
           if( m==2_${ik}$ ) then
              ! case 1: swap 1-by-1 and 1-by-1 blocks.
              ! compute orthogonal ql and rq that swap 1-by-1 and 1-by-1 blocks
              ! using givens rotations and perform the swap tentatively.
              f = s( 2_${ik}$, 2_${ik}$ )*t( 1_${ik}$, 1_${ik}$ ) - t( 2_${ik}$, 2_${ik}$ )*s( 1_${ik}$, 1_${ik}$ )
              g = s( 2_${ik}$, 2_${ik}$ )*t( 1_${ik}$, 2_${ik}$ ) - t( 2_${ik}$, 2_${ik}$ )*s( 1_${ik}$, 2_${ik}$ )
              sa = abs( s( 2_${ik}$, 2_${ik}$ ) ) * abs( t( 1_${ik}$, 1_${ik}$ ) )
              sb = abs( s( 1_${ik}$, 1_${ik}$ ) ) * abs( t( 2_${ik}$, 2_${ik}$ ) )
              call stdlib${ii}$_dlartg( f, g, ir( 1_${ik}$, 2_${ik}$ ), ir( 1_${ik}$, 1_${ik}$ ), ddum )
              ir( 2_${ik}$, 1_${ik}$ ) = -ir( 1_${ik}$, 2_${ik}$ )
              ir( 2_${ik}$, 2_${ik}$ ) = ir( 1_${ik}$, 1_${ik}$ )
              call stdlib${ii}$_drot( 2_${ik}$, s( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, s( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, ir( 1_${ik}$, 1_${ik}$ ),ir( 2_${ik}$, 1_${ik}$ ) )
              call stdlib${ii}$_drot( 2_${ik}$, t( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, t( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, ir( 1_${ik}$, 1_${ik}$ ),ir( 2_${ik}$, 1_${ik}$ ) )
              if( sa>=sb ) then
                 call stdlib${ii}$_dlartg( s( 1_${ik}$, 1_${ik}$ ), s( 2_${ik}$, 1_${ik}$ ), li( 1_${ik}$, 1_${ik}$ ), li( 2_${ik}$, 1_${ik}$ ),ddum )
              else
                 call stdlib${ii}$_dlartg( t( 1_${ik}$, 1_${ik}$ ), t( 2_${ik}$, 1_${ik}$ ), li( 1_${ik}$, 1_${ik}$ ), li( 2_${ik}$, 1_${ik}$ ),ddum )
              end if
              call stdlib${ii}$_drot( 2_${ik}$, s( 1_${ik}$, 1_${ik}$ ), ldst, s( 2_${ik}$, 1_${ik}$ ), ldst, li( 1_${ik}$, 1_${ik}$ ),li( 2_${ik}$, 1_${ik}$ ) )
                        
              call stdlib${ii}$_drot( 2_${ik}$, t( 1_${ik}$, 1_${ik}$ ), ldst, t( 2_${ik}$, 1_${ik}$ ), ldst, li( 1_${ik}$, 1_${ik}$ ),li( 2_${ik}$, 1_${ik}$ ) )
                        
              li( 2_${ik}$, 2_${ik}$ ) = li( 1_${ik}$, 1_${ik}$ )
              li( 1_${ik}$, 2_${ik}$ ) = -li( 2_${ik}$, 1_${ik}$ )
              ! weak stability test: |s21| <= o(eps f-norm((a)))
                                 ! and  |t21| <= o(eps f-norm((b)))
              weak = abs( s( 2_${ik}$, 1_${ik}$ ) ) <= thresha .and.abs( t( 2_${ik}$, 1_${ik}$ ) ) <= threshb
              if( .not.weak )go to 70
              if( wands ) then
                 ! strong stability test:
                     ! f-norm((a-ql**h*s*qr)) <= o(eps*f-norm((a)))
                     ! and
                     ! f-norm((b-ql**h*t*qr)) <= o(eps*f-norm((b)))
                 call stdlib${ii}$_dlacpy( 'FULL', m, m, a( j1, j1 ), lda, work( m*m+1 ),m )
                 call stdlib${ii}$_dgemm( 'N', 'N', m, m, m, one, li, ldst, s, ldst, zero,work, m )
                           
                 call stdlib${ii}$_dgemm( 'N', 'T', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),&
                            m )
                 dscale = zero
                 dsum = one
                 call stdlib${ii}$_dlassq( m*m, work( m*m+1 ), 1_${ik}$, dscale, dsum )
                 sa = dscale*sqrt( dsum )
                 call stdlib${ii}$_dlacpy( 'FULL', m, m, b( j1, j1 ), ldb, work( m*m+1 ),m )
                 call stdlib${ii}$_dgemm( 'N', 'N', m, m, m, one, li, ldst, t, ldst, zero,work, m )
                           
                 call stdlib${ii}$_dgemm( 'N', 'T', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),&
                            m )
                 dscale = zero
                 dsum = one
                 call stdlib${ii}$_dlassq( m*m, work( m*m+1 ), 1_${ik}$, dscale, dsum )
                 sb = dscale*sqrt( dsum )
                 strong = sa<=thresha .and. sb<=threshb
                 if( .not.strong )go to 70
              end if
              ! update (a(j1:j1+m-1, m+j1:n), b(j1:j1+m-1, m+j1:n)) and
                     ! (a(1:j1-1, j1:j1+m), b(1:j1-1, j1:j1+m)).
              call stdlib${ii}$_drot( j1+1, a( 1_${ik}$, j1 ), 1_${ik}$, a( 1_${ik}$, j1+1 ), 1_${ik}$, ir( 1_${ik}$, 1_${ik}$ ),ir( 2_${ik}$, 1_${ik}$ ) )
                        
              call stdlib${ii}$_drot( j1+1, b( 1_${ik}$, j1 ), 1_${ik}$, b( 1_${ik}$, j1+1 ), 1_${ik}$, ir( 1_${ik}$, 1_${ik}$ ),ir( 2_${ik}$, 1_${ik}$ ) )
                        
              call stdlib${ii}$_drot( n-j1+1, a( j1, j1 ), lda, a( j1+1, j1 ), lda,li( 1_${ik}$, 1_${ik}$ ), li( 2_${ik}$, 1_${ik}$ &
                        ) )
              call stdlib${ii}$_drot( n-j1+1, b( j1, j1 ), ldb, b( j1+1, j1 ), ldb,li( 1_${ik}$, 1_${ik}$ ), li( 2_${ik}$, 1_${ik}$ &
                        ) )
              ! set  n1-by-n2 (2,1) - blocks to zero.
              a( j1+1, j1 ) = zero
              b( j1+1, j1 ) = zero
              ! accumulate transformations into q and z if requested.
              if( wantz )call stdlib${ii}$_drot( n, z( 1_${ik}$, j1 ), 1_${ik}$, z( 1_${ik}$, j1+1 ), 1_${ik}$, ir( 1_${ik}$, 1_${ik}$ ),ir( 2_${ik}$, 1_${ik}$ &
                        ) )
              if( wantq )call stdlib${ii}$_drot( n, q( 1_${ik}$, j1 ), 1_${ik}$, q( 1_${ik}$, j1+1 ), 1_${ik}$, li( 1_${ik}$, 1_${ik}$ ),li( 2_${ik}$, 1_${ik}$ &
                        ) )
              ! exit with info = 0 if swap was successfully performed.
              return
           else
              ! case 2: swap 1-by-1 and 2-by-2 blocks, or 2-by-2
                      ! and 2-by-2 blocks.
              ! solve the generalized sylvester equation
                       ! s11 * r - l * s22 = scale * s12
                       ! t11 * r - l * t22 = scale * t12
              ! for r and l. solutions in li and ir.
              call stdlib${ii}$_dlacpy( 'FULL', n1, n2, t( 1_${ik}$, n1+1 ), ldst, li, ldst )
              call stdlib${ii}$_dlacpy( 'FULL', n1, n2, s( 1_${ik}$, n1+1 ), ldst,ir( n2+1, n1+1 ), ldst )
                        
              call stdlib${ii}$_dtgsy2( 'N', 0_${ik}$, n1, n2, s, ldst, s( n1+1, n1+1 ), ldst,ir( n2+1, n1+1 ),&
               ldst, t, ldst, t( n1+1, n1+1 ),ldst, li, ldst, scale, dsum, dscale, iwork, idum,&
                         linfo )
              if( linfo/=0 )go to 70
              ! compute orthogonal matrix ql:
                          ! ql**t * li = [ tl ]
                                       ! [ 0  ]
              ! where
                          ! li =  [      -l              ]
                                ! [ scale * identity(n2) ]
              do i = 1, n2
                 call stdlib${ii}$_dscal( n1, -one, li( 1_${ik}$, i ), 1_${ik}$ )
                 li( n1+i, i ) = scale
              end do
              call stdlib${ii}$_dgeqr2( m, n2, li, ldst, taul, work, linfo )
              if( linfo/=0 )go to 70
              call stdlib${ii}$_dorg2r( m, m, n2, li, ldst, taul, work, linfo )
              if( linfo/=0 )go to 70
              ! compute orthogonal matrix rq:
                          ! ir * rq**t =   [ 0  tr],
               ! where ir = [ scale * identity(n1), r ]
              do i = 1, n1
                 ir( n2+i, i ) = scale
              end do
              call stdlib${ii}$_dgerq2( n1, m, ir( n2+1, 1_${ik}$ ), ldst, taur, work, linfo )
              if( linfo/=0 )go to 70
              call stdlib${ii}$_dorgr2( m, m, n1, ir, ldst, taur, work, linfo )
              if( linfo/=0 )go to 70
              ! perform the swapping tentatively:
              call stdlib${ii}$_dgemm( 'T', 'N', m, m, m, one, li, ldst, s, ldst, zero,work, m )
              call stdlib${ii}$_dgemm( 'N', 'T', m, m, m, one, work, m, ir, ldst, zero, s,ldst )
              call stdlib${ii}$_dgemm( 'T', 'N', m, m, m, one, li, ldst, t, ldst, zero,work, m )
              call stdlib${ii}$_dgemm( 'N', 'T', m, m, m, one, work, m, ir, ldst, zero, t,ldst )
              call stdlib${ii}$_dlacpy( 'F', m, m, s, ldst, scpy, ldst )
              call stdlib${ii}$_dlacpy( 'F', m, m, t, ldst, tcpy, ldst )
              call stdlib${ii}$_dlacpy( 'F', m, m, ir, ldst, ircop, ldst )
              call stdlib${ii}$_dlacpy( 'F', m, m, li, ldst, licop, ldst )
              ! triangularize the b-part by an rq factorization.
              ! apply transformation (from left) to a-part, giving s.
              call stdlib${ii}$_dgerq2( m, m, t, ldst, taur, work, linfo )
              if( linfo/=0 )go to 70
              call stdlib${ii}$_dormr2( 'R', 'T', m, m, m, t, ldst, taur, s, ldst, work,linfo )
              if( linfo/=0 )go to 70
              call stdlib${ii}$_dormr2( 'L', 'N', m, m, m, t, ldst, taur, ir, ldst, work,linfo )
              if( linfo/=0 )go to 70
              ! compute f-norm(s21) in brqa21. (t21 is 0.)
              dscale = zero
              dsum = one
              do i = 1, n2
                 call stdlib${ii}$_dlassq( n1, s( n2+1, i ), 1_${ik}$, dscale, dsum )
              end do
              brqa21 = dscale*sqrt( dsum )
              ! triangularize the b-part by a qr factorization.
              ! apply transformation (from right) to a-part, giving s.
              call stdlib${ii}$_dgeqr2( m, m, tcpy, ldst, taul, work, linfo )
              if( linfo/=0 )go to 70
              call stdlib${ii}$_dorm2r( 'L', 'T', m, m, m, tcpy, ldst, taul, scpy, ldst,work, info )
                        
              call stdlib${ii}$_dorm2r( 'R', 'N', m, m, m, tcpy, ldst, taul, licop, ldst,work, info )
                        
              if( linfo/=0 )go to 70
              ! compute f-norm(s21) in bqra21. (t21 is 0.)
              dscale = zero
              dsum = one
              do i = 1, n2
                 call stdlib${ii}$_dlassq( n1, scpy( n2+1, i ), 1_${ik}$, dscale, dsum )
              end do
              bqra21 = dscale*sqrt( dsum )
              ! decide which method to use.
                ! weak stability test:
                   ! f-norm(s21) <= o(eps * f-norm((s)))
              if( bqra21<=brqa21 .and. bqra21<=thresha ) then
                 call stdlib${ii}$_dlacpy( 'F', m, m, scpy, ldst, s, ldst )
                 call stdlib${ii}$_dlacpy( 'F', m, m, tcpy, ldst, t, ldst )
                 call stdlib${ii}$_dlacpy( 'F', m, m, ircop, ldst, ir, ldst )
                 call stdlib${ii}$_dlacpy( 'F', m, m, licop, ldst, li, ldst )
              else if( brqa21>=thresha ) then
                 go to 70
              end if
              ! set lower triangle of b-part to zero
              call stdlib${ii}$_dlaset( 'LOWER', m-1, m-1, zero, zero, t(2_${ik}$,1_${ik}$), ldst )
              if( wands ) then
                 ! strong stability test:
                     ! f-norm((a-ql**h*s*qr)) <= o(eps*f-norm((a)))
                     ! and
                     ! f-norm((b-ql**h*t*qr)) <= o(eps*f-norm((b)))
                 call stdlib${ii}$_dlacpy( 'FULL', m, m, a( j1, j1 ), lda, work( m*m+1 ),m )
                 call stdlib${ii}$_dgemm( 'N', 'N', m, m, m, one, li, ldst, s, ldst, zero,work, m )
                           
                 call stdlib${ii}$_dgemm( 'N', 'N', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),&
                            m )
                 dscale = zero
                 dsum = one
                 call stdlib${ii}$_dlassq( m*m, work( m*m+1 ), 1_${ik}$, dscale, dsum )
                 sa = dscale*sqrt( dsum )
                 call stdlib${ii}$_dlacpy( 'FULL', m, m, b( j1, j1 ), ldb, work( m*m+1 ),m )
                 call stdlib${ii}$_dgemm( 'N', 'N', m, m, m, one, li, ldst, t, ldst, zero,work, m )
                           
                 call stdlib${ii}$_dgemm( 'N', 'N', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),&
                            m )
                 dscale = zero
                 dsum = one
                 call stdlib${ii}$_dlassq( m*m, work( m*m+1 ), 1_${ik}$, dscale, dsum )
                 sb = dscale*sqrt( dsum )
                 strong = sa<=thresha .and. sb<=threshb
                 if( .not.strong )go to 70
              end if
              ! if the swap is accepted ("weakly" and "strongly"), apply the
              ! transformations and set n1-by-n2 (2,1)-block to zero.
              call stdlib${ii}$_dlaset( 'FULL', n1, n2, zero, zero, s(n2+1,1_${ik}$), ldst )
              ! copy back m-by-m diagonal block starting at index j1 of (a, b)
              call stdlib${ii}$_dlacpy( 'F', m, m, s, ldst, a( j1, j1 ), lda )
              call stdlib${ii}$_dlacpy( 'F', m, m, t, ldst, b( j1, j1 ), ldb )
              call stdlib${ii}$_dlaset( 'FULL', ldst, ldst, zero, zero, t, ldst )
              ! standardize existing 2-by-2 blocks.
              call stdlib${ii}$_dlaset( 'FULL', m, m, zero, zero, work, m )
              work( 1_${ik}$ ) = one
              t( 1_${ik}$, 1_${ik}$ ) = one
              idum = lwork - m*m - 2_${ik}$
              if( n2>1_${ik}$ ) then
                 call stdlib${ii}$_dlagv2( a( j1, j1 ), lda, b( j1, j1 ), ldb, ar, ai, be,work( 1_${ik}$ ), &
                           work( 2_${ik}$ ), t( 1_${ik}$, 1_${ik}$ ), t( 2_${ik}$, 1_${ik}$ ) )
                 work( m+1 ) = -work( 2_${ik}$ )
                 work( m+2 ) = work( 1_${ik}$ )
                 t( n2, n2 ) = t( 1_${ik}$, 1_${ik}$ )
                 t( 1_${ik}$, 2_${ik}$ ) = -t( 2_${ik}$, 1_${ik}$ )
              end if
              work( m*m ) = one
              t( m, m ) = one
              if( n1>1_${ik}$ ) then
                 call stdlib${ii}$_dlagv2( a( j1+n2, j1+n2 ), lda, b( j1+n2, j1+n2 ), ldb,taur, taul, &
                 work( m*m+1 ), work( n2*m+n2+1 ),work( n2*m+n2+2 ), t( n2+1, n2+1 ),t( m, m-1 ) )
                           
                 work( m*m ) = work( n2*m+n2+1 )
                 work( m*m-1 ) = -work( n2*m+n2+2 )
                 t( m, m ) = t( n2+1, n2+1 )
                 t( m-1, m ) = -t( m, m-1 )
              end if
              call stdlib${ii}$_dgemm( 'T', 'N', n2, n1, n2, one, work, m, a( j1, j1+n2 ),lda, zero, &
                        work( m*m+1 ), n2 )
              call stdlib${ii}$_dlacpy( 'FULL', n2, n1, work( m*m+1 ), n2, a( j1, j1+n2 ),lda )
              call stdlib${ii}$_dgemm( 'T', 'N', n2, n1, n2, one, work, m, b( j1, j1+n2 ),ldb, zero, &
                        work( m*m+1 ), n2 )
              call stdlib${ii}$_dlacpy( 'FULL', n2, n1, work( m*m+1 ), n2, b( j1, j1+n2 ),ldb )
              call stdlib${ii}$_dgemm( 'N', 'N', m, m, m, one, li, ldst, work, m, zero,work( m*m+1 ), m &
                        )
              call stdlib${ii}$_dlacpy( 'FULL', m, m, work( m*m+1 ), m, li, ldst )
              call stdlib${ii}$_dgemm( 'N', 'N', n2, n1, n1, one, a( j1, j1+n2 ), lda,t( n2+1, n2+1 ), &
                        ldst, zero, work, n2 )
              call stdlib${ii}$_dlacpy( 'FULL', n2, n1, work, n2, a( j1, j1+n2 ), lda )
              call stdlib${ii}$_dgemm( 'N', 'N', n2, n1, n1, one, b( j1, j1+n2 ), ldb,t( n2+1, n2+1 ), &
                        ldst, zero, work, n2 )
              call stdlib${ii}$_dlacpy( 'FULL', n2, n1, work, n2, b( j1, j1+n2 ), ldb )
              call stdlib${ii}$_dgemm( 'T', 'N', m, m, m, one, ir, ldst, t, ldst, zero,work, m )
              call stdlib${ii}$_dlacpy( 'FULL', m, m, work, m, ir, ldst )
              ! accumulate transformations into q and z if requested.
              if( wantq ) then
                 call stdlib${ii}$_dgemm( 'N', 'N', n, m, m, one, q( 1_${ik}$, j1 ), ldq, li,ldst, zero, work, &
                           n )
                 call stdlib${ii}$_dlacpy( 'FULL', n, m, work, n, q( 1_${ik}$, j1 ), ldq )
              end if
              if( wantz ) then
                 call stdlib${ii}$_dgemm( 'N', 'N', n, m, m, one, z( 1_${ik}$, j1 ), ldz, ir,ldst, zero, work, &
                           n )
                 call stdlib${ii}$_dlacpy( 'FULL', n, m, work, n, z( 1_${ik}$, j1 ), ldz )
              end if
              ! update (a(j1:j1+m-1, m+j1:n), b(j1:j1+m-1, m+j1:n)) and
                      ! (a(1:j1-1, j1:j1+m), b(1:j1-1, j1:j1+m)).
              i = j1 + m
              if( i<=n ) then
                 call stdlib${ii}$_dgemm( 'T', 'N', m, n-i+1, m, one, li, ldst,a( j1, i ), lda, zero, &
                           work, m )
                 call stdlib${ii}$_dlacpy( 'FULL', m, n-i+1, work, m, a( j1, i ), lda )
                 call stdlib${ii}$_dgemm( 'T', 'N', m, n-i+1, m, one, li, ldst,b( j1, i ), ldb, zero, &
                           work, m )
                 call stdlib${ii}$_dlacpy( 'FULL', m, n-i+1, work, m, b( j1, i ), ldb )
              end if
              i = j1 - 1_${ik}$
              if( i>0_${ik}$ ) then
                 call stdlib${ii}$_dgemm( 'N', 'N', i, m, m, one, a( 1_${ik}$, j1 ), lda, ir,ldst, zero, work, &
                           i )
                 call stdlib${ii}$_dlacpy( 'FULL', i, m, work, i, a( 1_${ik}$, j1 ), lda )
                 call stdlib${ii}$_dgemm( 'N', 'N', i, m, m, one, b( 1_${ik}$, j1 ), ldb, ir,ldst, zero, work, &
                           i )
                 call stdlib${ii}$_dlacpy( 'FULL', i, m, work, i, b( 1_${ik}$, j1 ), ldb )
              end if
              ! exit with info = 0 if swap was successfully performed.
              return
           end if
           ! exit with info = 1 if swap was rejected.
           70 continue
           info = 1_${ik}$
           return
     end subroutine stdlib${ii}$_dtgex2

#:for rk,rt,ri in REAL_KINDS_TYPES
#:if not rk in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ri}$tgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, j1, n1, n2, &
     !! DTGEX2: swaps adjacent diagonal blocks (A11, B11) and (A22, B22)
     !! of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair
     !! (A, B) by an orthogonal equivalence transformation.
     !! (A, B) must be in generalized real Schur canonical form (as returned
     !! by DGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2
     !! diagonal blocks. B is upper triangular.
     !! Optionally, the matrices Q and Z of generalized Schur vectors are
     !! updated.
     !! Q(in) * A(in) * Z(in)**T = Q(out) * A(out) * Z(out)**T
     !! Q(in) * B(in) * Z(in)**T = Q(out) * B(out) * Z(out)**T
               work, lwork, info )
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${rk}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: wantq, wantz
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: j1, lda, ldb, ldq, ldz, lwork, n, n1, n2
           ! Array Arguments 
           real(${rk}$), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
           real(${rk}$), intent(out) :: work(*)
        ! =====================================================================
        ! replaced various illegal calls to stdlib${ii}$_${ri}$copy by calls to stdlib${ii}$_${ri}$laset, or by do
        ! loops. sven hammarling, 1/5/02.
           ! Parameters 
           real(${rk}$), parameter :: twenty = 2.0e+01_${rk}$
           integer(${ik}$), parameter :: ldst = 4_${ik}$
           logical(lk), parameter :: wands = .true.
           
           
           
           
           ! Local Scalars 
           logical(lk) :: strong, weak
           integer(${ik}$) :: i, idum, linfo, m
           real(${rk}$) :: bqra21, brqa21, ddum, dnorma, dnormb, dscale, dsum, eps, f, g, sa, sb, &
                     scale, smlnum, thresha, threshb
           ! Local Arrays 
           integer(${ik}$) :: iwork(ldst)
           real(${rk}$) :: ai(2_${ik}$), ar(2_${ik}$), be(2_${ik}$), ir(ldst,ldst), ircop(ldst,ldst), li(ldst,ldst), licop(&
           ldst,ldst), s(ldst,ldst), scpy(ldst,ldst), t(ldst,ldst), taul(ldst), taur(ldst), tcpy(&
                     ldst,ldst)
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           ! quick return if possible
           if( n<=1 .or. n1<=0 .or. n2<=0 )return
           if( n1>n .or. ( j1+n1 )>n )return
           m = n1 + n2
           if( lwork<max( 1_${ik}$, n*m, m*m*2_${ik}$ ) ) then
              info = -16_${ik}$
              work( 1_${ik}$ ) = max( 1_${ik}$, n*m, m*m*2_${ik}$ )
              return
           end if
           weak = .false.
           strong = .false.
           ! make a local copy of selected block
           call stdlib${ii}$_${ri}$laset( 'FULL', ldst, ldst, zero, zero, li, ldst )
           call stdlib${ii}$_${ri}$laset( 'FULL', ldst, ldst, zero, zero, ir, ldst )
           call stdlib${ii}$_${ri}$lacpy( 'FULL', m, m, a( j1, j1 ), lda, s, ldst )
           call stdlib${ii}$_${ri}$lacpy( 'FULL', m, m, b( j1, j1 ), ldb, t, ldst )
           ! compute threshold for testing acceptance of swapping.
           eps = stdlib${ii}$_${ri}$lamch( 'P' )
           smlnum = stdlib${ii}$_${ri}$lamch( 'S' ) / eps
           dscale = zero
           dsum = one
           call stdlib${ii}$_${ri}$lacpy( 'FULL', m, m, s, ldst, work, m )
           call stdlib${ii}$_${ri}$lassq( m*m, work, 1_${ik}$, dscale, dsum )
           dnorma = dscale*sqrt( dsum )
           dscale = zero
           dsum = one
           call stdlib${ii}$_${ri}$lacpy( 'FULL', m, m, t, ldst, work, m )
           call stdlib${ii}$_${ri}$lassq( m*m, work, 1_${ik}$, dscale, dsum )
           dnormb = dscale*sqrt( dsum )
           ! thres has been changed from
              ! thresh = max( ten*eps*sa, smlnum )
           ! to
              ! thresh = max( twenty*eps*sa, smlnum )
           ! on 04/01/10.
           ! "bug" reported by ondra kamenik, confirmed by julie langou, fixed by
           ! jim demmel and guillaume revy. see forum post 1783.
           thresha = max( twenty*eps*dnorma, smlnum )
           threshb = max( twenty*eps*dnormb, smlnum )
           if( m==2_${ik}$ ) then
              ! case 1: swap 1-by-1 and 1-by-1 blocks.
              ! compute orthogonal ql and rq that swap 1-by-1 and 1-by-1 blocks
              ! using givens rotations and perform the swap tentatively.
              f = s( 2_${ik}$, 2_${ik}$ )*t( 1_${ik}$, 1_${ik}$ ) - t( 2_${ik}$, 2_${ik}$ )*s( 1_${ik}$, 1_${ik}$ )
              g = s( 2_${ik}$, 2_${ik}$ )*t( 1_${ik}$, 2_${ik}$ ) - t( 2_${ik}$, 2_${ik}$ )*s( 1_${ik}$, 2_${ik}$ )
              sa = abs( s( 2_${ik}$, 2_${ik}$ ) ) * abs( t( 1_${ik}$, 1_${ik}$ ) )
              sb = abs( s( 1_${ik}$, 1_${ik}$ ) ) * abs( t( 2_${ik}$, 2_${ik}$ ) )
              call stdlib${ii}$_${ri}$lartg( f, g, ir( 1_${ik}$, 2_${ik}$ ), ir( 1_${ik}$, 1_${ik}$ ), ddum )
              ir( 2_${ik}$, 1_${ik}$ ) = -ir( 1_${ik}$, 2_${ik}$ )
              ir( 2_${ik}$, 2_${ik}$ ) = ir( 1_${ik}$, 1_${ik}$ )
              call stdlib${ii}$_${ri}$rot( 2_${ik}$, s( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, s( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, ir( 1_${ik}$, 1_${ik}$ ),ir( 2_${ik}$, 1_${ik}$ ) )
              call stdlib${ii}$_${ri}$rot( 2_${ik}$, t( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, t( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, ir( 1_${ik}$, 1_${ik}$ ),ir( 2_${ik}$, 1_${ik}$ ) )
              if( sa>=sb ) then
                 call stdlib${ii}$_${ri}$lartg( s( 1_${ik}$, 1_${ik}$ ), s( 2_${ik}$, 1_${ik}$ ), li( 1_${ik}$, 1_${ik}$ ), li( 2_${ik}$, 1_${ik}$ ),ddum )
              else
                 call stdlib${ii}$_${ri}$lartg( t( 1_${ik}$, 1_${ik}$ ), t( 2_${ik}$, 1_${ik}$ ), li( 1_${ik}$, 1_${ik}$ ), li( 2_${ik}$, 1_${ik}$ ),ddum )
              end if
              call stdlib${ii}$_${ri}$rot( 2_${ik}$, s( 1_${ik}$, 1_${ik}$ ), ldst, s( 2_${ik}$, 1_${ik}$ ), ldst, li( 1_${ik}$, 1_${ik}$ ),li( 2_${ik}$, 1_${ik}$ ) )
                        
              call stdlib${ii}$_${ri}$rot( 2_${ik}$, t( 1_${ik}$, 1_${ik}$ ), ldst, t( 2_${ik}$, 1_${ik}$ ), ldst, li( 1_${ik}$, 1_${ik}$ ),li( 2_${ik}$, 1_${ik}$ ) )
                        
              li( 2_${ik}$, 2_${ik}$ ) = li( 1_${ik}$, 1_${ik}$ )
              li( 1_${ik}$, 2_${ik}$ ) = -li( 2_${ik}$, 1_${ik}$ )
              ! weak stability test: |s21| <= o(eps f-norm((a)))
                                 ! and  |t21| <= o(eps f-norm((b)))
              weak = abs( s( 2_${ik}$, 1_${ik}$ ) ) <= thresha .and.abs( t( 2_${ik}$, 1_${ik}$ ) ) <= threshb
              if( .not.weak )go to 70
              if( wands ) then
                 ! strong stability test:
                     ! f-norm((a-ql**h*s*qr)) <= o(eps*f-norm((a)))
                     ! and
                     ! f-norm((b-ql**h*t*qr)) <= o(eps*f-norm((b)))
                 call stdlib${ii}$_${ri}$lacpy( 'FULL', m, m, a( j1, j1 ), lda, work( m*m+1 ),m )
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, m, m, one, li, ldst, s, ldst, zero,work, m )
                           
                 call stdlib${ii}$_${ri}$gemm( 'N', 'T', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),&
                            m )
                 dscale = zero
                 dsum = one
                 call stdlib${ii}$_${ri}$lassq( m*m, work( m*m+1 ), 1_${ik}$, dscale, dsum )
                 sa = dscale*sqrt( dsum )
                 call stdlib${ii}$_${ri}$lacpy( 'FULL', m, m, b( j1, j1 ), ldb, work( m*m+1 ),m )
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, m, m, one, li, ldst, t, ldst, zero,work, m )
                           
                 call stdlib${ii}$_${ri}$gemm( 'N', 'T', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),&
                            m )
                 dscale = zero
                 dsum = one
                 call stdlib${ii}$_${ri}$lassq( m*m, work( m*m+1 ), 1_${ik}$, dscale, dsum )
                 sb = dscale*sqrt( dsum )
                 strong = sa<=thresha .and. sb<=threshb
                 if( .not.strong )go to 70
              end if
              ! update (a(j1:j1+m-1, m+j1:n), b(j1:j1+m-1, m+j1:n)) and
                     ! (a(1:j1-1, j1:j1+m), b(1:j1-1, j1:j1+m)).
              call stdlib${ii}$_${ri}$rot( j1+1, a( 1_${ik}$, j1 ), 1_${ik}$, a( 1_${ik}$, j1+1 ), 1_${ik}$, ir( 1_${ik}$, 1_${ik}$ ),ir( 2_${ik}$, 1_${ik}$ ) )
                        
              call stdlib${ii}$_${ri}$rot( j1+1, b( 1_${ik}$, j1 ), 1_${ik}$, b( 1_${ik}$, j1+1 ), 1_${ik}$, ir( 1_${ik}$, 1_${ik}$ ),ir( 2_${ik}$, 1_${ik}$ ) )
                        
              call stdlib${ii}$_${ri}$rot( n-j1+1, a( j1, j1 ), lda, a( j1+1, j1 ), lda,li( 1_${ik}$, 1_${ik}$ ), li( 2_${ik}$, 1_${ik}$ &
                        ) )
              call stdlib${ii}$_${ri}$rot( n-j1+1, b( j1, j1 ), ldb, b( j1+1, j1 ), ldb,li( 1_${ik}$, 1_${ik}$ ), li( 2_${ik}$, 1_${ik}$ &
                        ) )
              ! set  n1-by-n2 (2,1) - blocks to zero.
              a( j1+1, j1 ) = zero
              b( j1+1, j1 ) = zero
              ! accumulate transformations into q and z if requested.
              if( wantz )call stdlib${ii}$_${ri}$rot( n, z( 1_${ik}$, j1 ), 1_${ik}$, z( 1_${ik}$, j1+1 ), 1_${ik}$, ir( 1_${ik}$, 1_${ik}$ ),ir( 2_${ik}$, 1_${ik}$ &
                        ) )
              if( wantq )call stdlib${ii}$_${ri}$rot( n, q( 1_${ik}$, j1 ), 1_${ik}$, q( 1_${ik}$, j1+1 ), 1_${ik}$, li( 1_${ik}$, 1_${ik}$ ),li( 2_${ik}$, 1_${ik}$ &
                        ) )
              ! exit with info = 0 if swap was successfully performed.
              return
           else
              ! case 2: swap 1-by-1 and 2-by-2 blocks, or 2-by-2
                      ! and 2-by-2 blocks.
              ! solve the generalized sylvester equation
                       ! s11 * r - l * s22 = scale * s12
                       ! t11 * r - l * t22 = scale * t12
              ! for r and l. solutions in li and ir.
              call stdlib${ii}$_${ri}$lacpy( 'FULL', n1, n2, t( 1_${ik}$, n1+1 ), ldst, li, ldst )
              call stdlib${ii}$_${ri}$lacpy( 'FULL', n1, n2, s( 1_${ik}$, n1+1 ), ldst,ir( n2+1, n1+1 ), ldst )
                        
              call stdlib${ii}$_${ri}$tgsy2( 'N', 0_${ik}$, n1, n2, s, ldst, s( n1+1, n1+1 ), ldst,ir( n2+1, n1+1 ),&
               ldst, t, ldst, t( n1+1, n1+1 ),ldst, li, ldst, scale, dsum, dscale, iwork, idum,&
                         linfo )
              if( linfo/=0 )go to 70
              ! compute orthogonal matrix ql:
                          ! ql**t * li = [ tl ]
                                       ! [ 0  ]
              ! where
                          ! li =  [      -l              ]
                                ! [ scale * identity(n2) ]
              do i = 1, n2
                 call stdlib${ii}$_${ri}$scal( n1, -one, li( 1_${ik}$, i ), 1_${ik}$ )
                 li( n1+i, i ) = scale
              end do
              call stdlib${ii}$_${ri}$geqr2( m, n2, li, ldst, taul, work, linfo )
              if( linfo/=0 )go to 70
              call stdlib${ii}$_${ri}$org2r( m, m, n2, li, ldst, taul, work, linfo )
              if( linfo/=0 )go to 70
              ! compute orthogonal matrix rq:
                          ! ir * rq**t =   [ 0  tr],
               ! where ir = [ scale * identity(n1), r ]
              do i = 1, n1
                 ir( n2+i, i ) = scale
              end do
              call stdlib${ii}$_${ri}$gerq2( n1, m, ir( n2+1, 1_${ik}$ ), ldst, taur, work, linfo )
              if( linfo/=0 )go to 70
              call stdlib${ii}$_${ri}$orgr2( m, m, n1, ir, ldst, taur, work, linfo )
              if( linfo/=0 )go to 70
              ! perform the swapping tentatively:
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', m, m, m, one, li, ldst, s, ldst, zero,work, m )
              call stdlib${ii}$_${ri}$gemm( 'N', 'T', m, m, m, one, work, m, ir, ldst, zero, s,ldst )
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', m, m, m, one, li, ldst, t, ldst, zero,work, m )
              call stdlib${ii}$_${ri}$gemm( 'N', 'T', m, m, m, one, work, m, ir, ldst, zero, t,ldst )
              call stdlib${ii}$_${ri}$lacpy( 'F', m, m, s, ldst, scpy, ldst )
              call stdlib${ii}$_${ri}$lacpy( 'F', m, m, t, ldst, tcpy, ldst )
              call stdlib${ii}$_${ri}$lacpy( 'F', m, m, ir, ldst, ircop, ldst )
              call stdlib${ii}$_${ri}$lacpy( 'F', m, m, li, ldst, licop, ldst )
              ! triangularize the b-part by an rq factorization.
              ! apply transformation (from left) to a-part, giving s.
              call stdlib${ii}$_${ri}$gerq2( m, m, t, ldst, taur, work, linfo )
              if( linfo/=0 )go to 70
              call stdlib${ii}$_${ri}$ormr2( 'R', 'T', m, m, m, t, ldst, taur, s, ldst, work,linfo )
              if( linfo/=0 )go to 70
              call stdlib${ii}$_${ri}$ormr2( 'L', 'N', m, m, m, t, ldst, taur, ir, ldst, work,linfo )
              if( linfo/=0 )go to 70
              ! compute f-norm(s21) in brqa21. (t21 is 0.)
              dscale = zero
              dsum = one
              do i = 1, n2
                 call stdlib${ii}$_${ri}$lassq( n1, s( n2+1, i ), 1_${ik}$, dscale, dsum )
              end do
              brqa21 = dscale*sqrt( dsum )
              ! triangularize the b-part by a qr factorization.
              ! apply transformation (from right) to a-part, giving s.
              call stdlib${ii}$_${ri}$geqr2( m, m, tcpy, ldst, taul, work, linfo )
              if( linfo/=0 )go to 70
              call stdlib${ii}$_${ri}$orm2r( 'L', 'T', m, m, m, tcpy, ldst, taul, scpy, ldst,work, info )
                        
              call stdlib${ii}$_${ri}$orm2r( 'R', 'N', m, m, m, tcpy, ldst, taul, licop, ldst,work, info )
                        
              if( linfo/=0 )go to 70
              ! compute f-norm(s21) in bqra21. (t21 is 0.)
              dscale = zero
              dsum = one
              do i = 1, n2
                 call stdlib${ii}$_${ri}$lassq( n1, scpy( n2+1, i ), 1_${ik}$, dscale, dsum )
              end do
              bqra21 = dscale*sqrt( dsum )
              ! decide which method to use.
                ! weak stability test:
                   ! f-norm(s21) <= o(eps * f-norm((s)))
              if( bqra21<=brqa21 .and. bqra21<=thresha ) then
                 call stdlib${ii}$_${ri}$lacpy( 'F', m, m, scpy, ldst, s, ldst )
                 call stdlib${ii}$_${ri}$lacpy( 'F', m, m, tcpy, ldst, t, ldst )
                 call stdlib${ii}$_${ri}$lacpy( 'F', m, m, ircop, ldst, ir, ldst )
                 call stdlib${ii}$_${ri}$lacpy( 'F', m, m, licop, ldst, li, ldst )
              else if( brqa21>=thresha ) then
                 go to 70
              end if
              ! set lower triangle of b-part to zero
              if (m>1_${ik}$) call stdlib${ii}$_${ri}$laset( 'LOWER', m-1, m-1, zero, zero, t(2_${ik}$,1_${ik}$), ldst )
              if( wands ) then
                 ! strong stability test:
                     ! f-norm((a-ql**h*s*qr)) <= o(eps*f-norm((a)))
                     ! and
                     ! f-norm((b-ql**h*t*qr)) <= o(eps*f-norm((b)))
                 call stdlib${ii}$_${ri}$lacpy( 'FULL', m, m, a( j1, j1 ), lda, work( m*m+1 ),m )
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, m, m, one, li, ldst, s, ldst, zero,work, m )
                           
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),&
                            m )
                 dscale = zero
                 dsum = one
                 call stdlib${ii}$_${ri}$lassq( m*m, work( m*m+1 ), 1_${ik}$, dscale, dsum )
                 sa = dscale*sqrt( dsum )
                 call stdlib${ii}$_${ri}$lacpy( 'FULL', m, m, b( j1, j1 ), ldb, work( m*m+1 ),m )
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, m, m, one, li, ldst, t, ldst, zero,work, m )
                           
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, m, m, -one, work, m, ir, ldst, one,work( m*m+1 ),&
                            m )
                 dscale = zero
                 dsum = one
                 call stdlib${ii}$_${ri}$lassq( m*m, work( m*m+1 ), 1_${ik}$, dscale, dsum )
                 sb = dscale*sqrt( dsum )
                 strong = sa<=thresha .and. sb<=threshb
                 if( .not.strong )go to 70
              end if
              ! if the swap is accepted ("weakly" and "strongly"), apply the
              ! transformations and set n1-by-n2 (2,1)-block to zero.
              call stdlib${ii}$_${ri}$laset( 'FULL', n1, n2, zero, zero, s(n2+1,1_${ik}$), ldst )
              ! copy back m-by-m diagonal block starting at index j1 of (a, b)
              call stdlib${ii}$_${ri}$lacpy( 'F', m, m, s, ldst, a( j1, j1 ), lda )
              call stdlib${ii}$_${ri}$lacpy( 'F', m, m, t, ldst, b( j1, j1 ), ldb )
              call stdlib${ii}$_${ri}$laset( 'FULL', ldst, ldst, zero, zero, t, ldst )
              ! standardize existing 2-by-2 blocks.
              call stdlib${ii}$_${ri}$laset( 'FULL', m, m, zero, zero, work, m )
              work( 1_${ik}$ ) = one
              t( 1_${ik}$, 1_${ik}$ ) = one
              idum = lwork - m*m - 2_${ik}$
              if( n2>1_${ik}$ ) then
                 call stdlib${ii}$_${ri}$lagv2( a( j1, j1 ), lda, b( j1, j1 ), ldb, ar, ai, be,work( 1_${ik}$ ), &
                           work( 2_${ik}$ ), t( 1_${ik}$, 1_${ik}$ ), t( 2_${ik}$, 1_${ik}$ ) )
                 work( m+1 ) = -work( 2_${ik}$ )
                 work( m+2 ) = work( 1_${ik}$ )
                 t( n2, n2 ) = t( 1_${ik}$, 1_${ik}$ )
                 t( 1_${ik}$, 2_${ik}$ ) = -t( 2_${ik}$, 1_${ik}$ )
              end if
              work( m*m ) = one
              t( m, m ) = one
              if( n1>1_${ik}$ ) then
                 call stdlib${ii}$_${ri}$lagv2( a( j1+n2, j1+n2 ), lda, b( j1+n2, j1+n2 ), ldb,taur, taul, &
                 work( m*m+1 ), work( n2*m+n2+1 ),work( n2*m+n2+2 ), t( n2+1, n2+1 ),t( m, m-1 ) )
                           
                 work( m*m ) = work( n2*m+n2+1 )
                 work( m*m-1 ) = -work( n2*m+n2+2 )
                 t( m, m ) = t( n2+1, n2+1 )
                 t( m-1, m ) = -t( m, m-1 )
              end if
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', n2, n1, n2, one, work, m, a( j1, j1+n2 ),lda, zero, &
                        work( m*m+1 ), n2 )
              call stdlib${ii}$_${ri}$lacpy( 'FULL', n2, n1, work( m*m+1 ), n2, a( j1, j1+n2 ),lda )
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', n2, n1, n2, one, work, m, b( j1, j1+n2 ),ldb, zero, &
                        work( m*m+1 ), n2 )
              call stdlib${ii}$_${ri}$lacpy( 'FULL', n2, n1, work( m*m+1 ), n2, b( j1, j1+n2 ),ldb )
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', m, m, m, one, li, ldst, work, m, zero,work( m*m+1 ), m &
                        )
              call stdlib${ii}$_${ri}$lacpy( 'FULL', m, m, work( m*m+1 ), m, li, ldst )
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', n2, n1, n1, one, a( j1, j1+n2 ), lda,t( n2+1, n2+1 ), &
                        ldst, zero, work, n2 )
              call stdlib${ii}$_${ri}$lacpy( 'FULL', n2, n1, work, n2, a( j1, j1+n2 ), lda )
              call stdlib${ii}$_${ri}$gemm( 'N', 'N', n2, n1, n1, one, b( j1, j1+n2 ), ldb,t( n2+1, n2+1 ), &
                        ldst, zero, work, n2 )
              call stdlib${ii}$_${ri}$lacpy( 'FULL', n2, n1, work, n2, b( j1, j1+n2 ), ldb )
              call stdlib${ii}$_${ri}$gemm( 'T', 'N', m, m, m, one, ir, ldst, t, ldst, zero,work, m )
              call stdlib${ii}$_${ri}$lacpy( 'FULL', m, m, work, m, ir, ldst )
              ! accumulate transformations into q and z if requested.
              if( wantq ) then
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, m, m, one, q( 1_${ik}$, j1 ), ldq, li,ldst, zero, work, &
                           n )
                 call stdlib${ii}$_${ri}$lacpy( 'FULL', n, m, work, n, q( 1_${ik}$, j1 ), ldq )
              end if
              if( wantz ) then
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', n, m, m, one, z( 1_${ik}$, j1 ), ldz, ir,ldst, zero, work, &
                           n )
                 call stdlib${ii}$_${ri}$lacpy( 'FULL', n, m, work, n, z( 1_${ik}$, j1 ), ldz )
              end if
              ! update (a(j1:j1+m-1, m+j1:n), b(j1:j1+m-1, m+j1:n)) and
                      ! (a(1:j1-1, j1:j1+m), b(1:j1-1, j1:j1+m)).
              i = j1 + m
              if( i<=n ) then
                 call stdlib${ii}$_${ri}$gemm( 'T', 'N', m, n-i+1, m, one, li, ldst,a( j1, i ), lda, zero, &
                           work, m )
                 call stdlib${ii}$_${ri}$lacpy( 'FULL', m, n-i+1, work, m, a( j1, i ), lda )
                 call stdlib${ii}$_${ri}$gemm( 'T', 'N', m, n-i+1, m, one, li, ldst,b( j1, i ), ldb, zero, &
                           work, m )
                 call stdlib${ii}$_${ri}$lacpy( 'FULL', m, n-i+1, work, m, b( j1, i ), ldb )
              end if
              i = j1 - 1_${ik}$
              if( i>0_${ik}$ ) then
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', i, m, m, one, a( 1_${ik}$, j1 ), lda, ir,ldst, zero, work, &
                           i )
                 call stdlib${ii}$_${ri}$lacpy( 'FULL', i, m, work, i, a( 1_${ik}$, j1 ), lda )
                 call stdlib${ii}$_${ri}$gemm( 'N', 'N', i, m, m, one, b( 1_${ik}$, j1 ), ldb, ir,ldst, zero, work, &
                           i )
                 call stdlib${ii}$_${ri}$lacpy( 'FULL', i, m, work, i, b( 1_${ik}$, j1 ), ldb )
              end if
              ! exit with info = 0 if swap was successfully performed.
              return
           end if
           ! exit with info = 1 if swap was rejected.
           70 continue
           info = 1_${ik}$
           return
     end subroutine stdlib${ii}$_${ri}$tgex2

#:endif
#:endfor

     pure module subroutine stdlib${ii}$_ctgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, j1, info )
     !! CTGEX2 swaps adjacent diagonal 1 by 1 blocks (A11,B11) and (A22,B22)
     !! in an upper triangular matrix pair (A, B) by an unitary equivalence
     !! transformation.
     !! (A, B) must be in generalized Schur canonical form, that is, A and
     !! B are both upper triangular.
     !! Optionally, the matrices Q and Z of generalized Schur vectors are
     !! updated.
     !! Q(in) * A(in) * Z(in)**H = Q(out) * A(out) * Z(out)**H
     !! Q(in) * B(in) * Z(in)**H = Q(out) * B(out) * Z(out)**H
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: wantq, wantz
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: j1, lda, ldb, ldq, ldz, n
           ! Array Arguments 
           complex(sp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
        ! =====================================================================
           ! Parameters 
           real(sp), parameter :: twenty = 2.0e+1_sp
           integer(${ik}$), parameter :: ldst = 2_${ik}$
           logical(lk), parameter :: wands = .true.
           
           
           
           
           ! Local Scalars 
           logical(lk) :: strong, weak
           integer(${ik}$) :: i, m
           real(sp) :: cq, cz, eps, sa, sb, scale, smlnum, sum, thresha, threshb
           complex(sp) :: cdum, f, g, sq, sz
           ! Local Arrays 
           complex(sp) :: s(ldst,ldst), t(ldst,ldst), work(8_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           ! quick return if possible
           if( n<=1 )return
           m = ldst
           weak = .false.
           strong = .false.
           ! make a local copy of selected block in (a, b)
           call stdlib${ii}$_clacpy( 'FULL', m, m, a( j1, j1 ), lda, s, ldst )
           call stdlib${ii}$_clacpy( 'FULL', m, m, b( j1, j1 ), ldb, t, ldst )
           ! compute the threshold for testing the acceptance of swapping.
           eps = stdlib${ii}$_slamch( 'P' )
           smlnum = stdlib${ii}$_slamch( 'S' ) / eps
           scale = real( czero,KIND=sp)
           sum = real( cone,KIND=sp)
           call stdlib${ii}$_clacpy( 'FULL', m, m, s, ldst, work, m )
           call stdlib${ii}$_clacpy( 'FULL', m, m, t, ldst, work( m*m+1 ), m )
           call stdlib${ii}$_classq( m*m, work, 1_${ik}$, scale, sum )
           sa = scale*sqrt( sum )
           scale = real( czero,KIND=sp)
           sum = real( cone,KIND=sp)
           call stdlib${ii}$_classq( m*m, work(m*m+1), 1_${ik}$, scale, sum )
           sb = scale*sqrt( sum )
           ! thres has been changed from
              ! thresh = max( ten*eps*sa, smlnum )
           ! to
              ! thresh = max( twenty*eps*sa, smlnum )
           ! on 04/01/10.
           ! "bug" reported by ondra kamenik, confirmed by julie langou, fixed by
           ! jim demmel and guillaume revy. see forum post 1783.
           thresha = max( twenty*eps*sa, smlnum )
           threshb = max( twenty*eps*sb, smlnum )
           ! compute unitary ql and rq that swap 1-by-1 and 1-by-1 blocks
           ! using givens rotations and perform the swap tentatively.
           f = s( 2_${ik}$, 2_${ik}$ )*t( 1_${ik}$, 1_${ik}$ ) - t( 2_${ik}$, 2_${ik}$ )*s( 1_${ik}$, 1_${ik}$ )
           g = s( 2_${ik}$, 2_${ik}$ )*t( 1_${ik}$, 2_${ik}$ ) - t( 2_${ik}$, 2_${ik}$ )*s( 1_${ik}$, 2_${ik}$ )
           sa = abs( s( 2_${ik}$, 2_${ik}$ ) ) * abs( t( 1_${ik}$, 1_${ik}$ ) )
           sb = abs( s( 1_${ik}$, 1_${ik}$ ) ) * abs( t( 2_${ik}$, 2_${ik}$ ) )
           call stdlib${ii}$_clartg( g, f, cz, sz, cdum )
           sz = -sz
           call stdlib${ii}$_crot( 2_${ik}$, s( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, s( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, cz, conjg( sz ) )
           call stdlib${ii}$_crot( 2_${ik}$, t( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, t( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, cz, conjg( sz ) )
           if( sa>=sb ) then
              call stdlib${ii}$_clartg( s( 1_${ik}$, 1_${ik}$ ), s( 2_${ik}$, 1_${ik}$ ), cq, sq, cdum )
           else
              call stdlib${ii}$_clartg( t( 1_${ik}$, 1_${ik}$ ), t( 2_${ik}$, 1_${ik}$ ), cq, sq, cdum )
           end if
           call stdlib${ii}$_crot( 2_${ik}$, s( 1_${ik}$, 1_${ik}$ ), ldst, s( 2_${ik}$, 1_${ik}$ ), ldst, cq, sq )
           call stdlib${ii}$_crot( 2_${ik}$, t( 1_${ik}$, 1_${ik}$ ), ldst, t( 2_${ik}$, 1_${ik}$ ), ldst, cq, sq )
           ! weak stability test: |s21| <= o(eps f-norm((a)))
                                ! and  |t21| <= o(eps f-norm((b)))
           weak = abs( s( 2_${ik}$, 1_${ik}$ ) )<=thresha .and.abs( t( 2_${ik}$, 1_${ik}$ ) )<=threshb
           if( .not.weak )go to 20
           if( wands ) then
              ! strong stability test:
                 ! f-norm((a-ql**h*s*qr, b-ql**h*t*qr)) <= o(eps*f-norm((a, b)))
              call stdlib${ii}$_clacpy( 'FULL', m, m, s, ldst, work, m )
              call stdlib${ii}$_clacpy( 'FULL', m, m, t, ldst, work( m*m+1 ), m )
              call stdlib${ii}$_crot( 2_${ik}$, work, 1_${ik}$, work( 3_${ik}$ ), 1_${ik}$, cz, -conjg( sz ) )
              call stdlib${ii}$_crot( 2_${ik}$, work( 5_${ik}$ ), 1_${ik}$, work( 7_${ik}$ ), 1_${ik}$, cz, -conjg( sz ) )
              call stdlib${ii}$_crot( 2_${ik}$, work, 2_${ik}$, work( 2_${ik}$ ), 2_${ik}$, cq, -sq )
              call stdlib${ii}$_crot( 2_${ik}$, work( 5_${ik}$ ), 2_${ik}$, work( 6_${ik}$ ), 2_${ik}$, cq, -sq )
              do i = 1, 2
                 work( i ) = work( i ) - a( j1+i-1, j1 )
                 work( i+2 ) = work( i+2 ) - a( j1+i-1, j1+1 )
                 work( i+4 ) = work( i+4 ) - b( j1+i-1, j1 )
                 work( i+6 ) = work( i+6 ) - b( j1+i-1, j1+1 )
              end do
              scale = real( czero,KIND=sp)
              sum = real( cone,KIND=sp)
              call stdlib${ii}$_classq( m*m, work, 1_${ik}$, scale, sum )
              sa = scale*sqrt( sum )
              scale = real( czero,KIND=sp)
              sum = real( cone,KIND=sp)
              call stdlib${ii}$_classq( m*m, work(m*m+1), 1_${ik}$, scale, sum )
              sb = scale*sqrt( sum )
              strong = sa<=thresha .and. sb<=threshb
              if( .not.strong )go to 20
           end if
           ! if the swap is accepted ("weakly" and "strongly"), apply the
           ! equivalence transformations to the original matrix pair (a,b)
           call stdlib${ii}$_crot( j1+1, a( 1_${ik}$, j1 ), 1_${ik}$, a( 1_${ik}$, j1+1 ), 1_${ik}$, cz, conjg( sz ) )
           call stdlib${ii}$_crot( j1+1, b( 1_${ik}$, j1 ), 1_${ik}$, b( 1_${ik}$, j1+1 ), 1_${ik}$, cz, conjg( sz ) )
           call stdlib${ii}$_crot( n-j1+1, a( j1, j1 ), lda, a( j1+1, j1 ), lda, cq, sq )
           call stdlib${ii}$_crot( n-j1+1, b( j1, j1 ), ldb, b( j1+1, j1 ), ldb, cq, sq )
           ! set  n1 by n2 (2,1) blocks to 0
           a( j1+1, j1 ) = czero
           b( j1+1, j1 ) = czero
           ! accumulate transformations into q and z if requested.
           if( wantz )call stdlib${ii}$_crot( n, z( 1_${ik}$, j1 ), 1_${ik}$, z( 1_${ik}$, j1+1 ), 1_${ik}$, cz, conjg( sz ) )
                     
           if( wantq )call stdlib${ii}$_crot( n, q( 1_${ik}$, j1 ), 1_${ik}$, q( 1_${ik}$, j1+1 ), 1_${ik}$, cq, conjg( sq ) )
                     
           ! exit with info = 0 if swap was successfully performed.
           return
           ! exit with info = 1 if swap was rejected.
           20 continue
           info = 1_${ik}$
           return
     end subroutine stdlib${ii}$_ctgex2

     pure module subroutine stdlib${ii}$_ztgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, j1, info )
     !! ZTGEX2 swaps adjacent diagonal 1 by 1 blocks (A11,B11) and (A22,B22)
     !! in an upper triangular matrix pair (A, B) by an unitary equivalence
     !! transformation.
     !! (A, B) must be in generalized Schur canonical form, that is, A and
     !! B are both upper triangular.
     !! Optionally, the matrices Q and Z of generalized Schur vectors are
     !! updated.
     !! Q(in) * A(in) * Z(in)**H = Q(out) * A(out) * Z(out)**H
     !! Q(in) * B(in) * Z(in)**H = Q(out) * B(out) * Z(out)**H
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: wantq, wantz
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: j1, lda, ldb, ldq, ldz, n
           ! Array Arguments 
           complex(dp), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
        ! =====================================================================
           ! Parameters 
           real(dp), parameter :: twenty = 2.0e+1_dp
           integer(${ik}$), parameter :: ldst = 2_${ik}$
           logical(lk), parameter :: wands = .true.
           
           
           
           
           ! Local Scalars 
           logical(lk) :: strong, weak
           integer(${ik}$) :: i, m
           real(dp) :: cq, cz, eps, sa, sb, scale, smlnum, sum, thresha, threshb
           complex(dp) :: cdum, f, g, sq, sz
           ! Local Arrays 
           complex(dp) :: s(ldst,ldst), t(ldst,ldst), work(8_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           ! quick return if possible
           if( n<=1 )return
           m = ldst
           weak = .false.
           strong = .false.
           ! make a local copy of selected block in (a, b)
           call stdlib${ii}$_zlacpy( 'FULL', m, m, a( j1, j1 ), lda, s, ldst )
           call stdlib${ii}$_zlacpy( 'FULL', m, m, b( j1, j1 ), ldb, t, ldst )
           ! compute the threshold for testing the acceptance of swapping.
           eps = stdlib${ii}$_dlamch( 'P' )
           smlnum = stdlib${ii}$_dlamch( 'S' ) / eps
           scale = real( czero,KIND=dp)
           sum = real( cone,KIND=dp)
           call stdlib${ii}$_zlacpy( 'FULL', m, m, s, ldst, work, m )
           call stdlib${ii}$_zlacpy( 'FULL', m, m, t, ldst, work( m*m+1 ), m )
           call stdlib${ii}$_zlassq( m*m, work, 1_${ik}$, scale, sum )
           sa = scale*sqrt( sum )
           scale = real( czero,KIND=dp)
           sum = real( cone,KIND=dp)
           call stdlib${ii}$_zlassq( m*m, work(m*m+1), 1_${ik}$, scale, sum )
           sb = scale*sqrt( sum )
           ! thres has been changed from
              ! thresh = max( ten*eps*sa, smlnum )
           ! to
              ! thresh = max( twenty*eps*sa, smlnum )
           ! on 04/01/10.
           ! "bug" reported by ondra kamenik, confirmed by julie langou, fixed by
           ! jim demmel and guillaume revy. see forum post 1783.
           thresha = max( twenty*eps*sa, smlnum )
           threshb = max( twenty*eps*sb, smlnum )
           ! compute unitary ql and rq that swap 1-by-1 and 1-by-1 blocks
           ! using givens rotations and perform the swap tentatively.
           f = s( 2_${ik}$, 2_${ik}$ )*t( 1_${ik}$, 1_${ik}$ ) - t( 2_${ik}$, 2_${ik}$ )*s( 1_${ik}$, 1_${ik}$ )
           g = s( 2_${ik}$, 2_${ik}$ )*t( 1_${ik}$, 2_${ik}$ ) - t( 2_${ik}$, 2_${ik}$ )*s( 1_${ik}$, 2_${ik}$ )
           sa = abs( s( 2_${ik}$, 2_${ik}$ ) ) * abs( t( 1_${ik}$, 1_${ik}$ ) )
           sb = abs( s( 1_${ik}$, 1_${ik}$ ) ) * abs( t( 2_${ik}$, 2_${ik}$ ) )
           call stdlib${ii}$_zlartg( g, f, cz, sz, cdum )
           sz = -sz
           call stdlib${ii}$_zrot( 2_${ik}$, s( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, s( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, cz, conjg( sz ) )
           call stdlib${ii}$_zrot( 2_${ik}$, t( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, t( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, cz, conjg( sz ) )
           if( sa>=sb ) then
              call stdlib${ii}$_zlartg( s( 1_${ik}$, 1_${ik}$ ), s( 2_${ik}$, 1_${ik}$ ), cq, sq, cdum )
           else
              call stdlib${ii}$_zlartg( t( 1_${ik}$, 1_${ik}$ ), t( 2_${ik}$, 1_${ik}$ ), cq, sq, cdum )
           end if
           call stdlib${ii}$_zrot( 2_${ik}$, s( 1_${ik}$, 1_${ik}$ ), ldst, s( 2_${ik}$, 1_${ik}$ ), ldst, cq, sq )
           call stdlib${ii}$_zrot( 2_${ik}$, t( 1_${ik}$, 1_${ik}$ ), ldst, t( 2_${ik}$, 1_${ik}$ ), ldst, cq, sq )
           ! weak stability test: |s21| <= o(eps f-norm((a)))
                                ! and  |t21| <= o(eps f-norm((b)))
           weak = abs( s( 2_${ik}$, 1_${ik}$ ) )<=thresha .and.abs( t( 2_${ik}$, 1_${ik}$ ) )<=threshb
           if( .not.weak )go to 20
           if( wands ) then
              ! strong stability test:
                 ! f-norm((a-ql**h*s*qr)) <= o(eps*f-norm((a)))
                 ! and
                 ! f-norm((b-ql**h*t*qr)) <= o(eps*f-norm((b)))
              call stdlib${ii}$_zlacpy( 'FULL', m, m, s, ldst, work, m )
              call stdlib${ii}$_zlacpy( 'FULL', m, m, t, ldst, work( m*m+1 ), m )
              call stdlib${ii}$_zrot( 2_${ik}$, work, 1_${ik}$, work( 3_${ik}$ ), 1_${ik}$, cz, -conjg( sz ) )
              call stdlib${ii}$_zrot( 2_${ik}$, work( 5_${ik}$ ), 1_${ik}$, work( 7_${ik}$ ), 1_${ik}$, cz, -conjg( sz ) )
              call stdlib${ii}$_zrot( 2_${ik}$, work, 2_${ik}$, work( 2_${ik}$ ), 2_${ik}$, cq, -sq )
              call stdlib${ii}$_zrot( 2_${ik}$, work( 5_${ik}$ ), 2_${ik}$, work( 6_${ik}$ ), 2_${ik}$, cq, -sq )
              do i = 1, 2
                 work( i ) = work( i ) - a( j1+i-1, j1 )
                 work( i+2 ) = work( i+2 ) - a( j1+i-1, j1+1 )
                 work( i+4 ) = work( i+4 ) - b( j1+i-1, j1 )
                 work( i+6 ) = work( i+6 ) - b( j1+i-1, j1+1 )
              end do
              scale = real( czero,KIND=dp)
              sum = real( cone,KIND=dp)
              call stdlib${ii}$_zlassq( m*m, work, 1_${ik}$, scale, sum )
              sa = scale*sqrt( sum )
              scale = real( czero,KIND=dp)
              sum = real( cone,KIND=dp)
              call stdlib${ii}$_zlassq( m*m, work(m*m+1), 1_${ik}$, scale, sum )
              sb = scale*sqrt( sum )
              strong = sa<=thresha .and. sb<=threshb
              if( .not.strong )go to 20
           end if
           ! if the swap is accepted ("weakly" and "strongly"), apply the
           ! equivalence transformations to the original matrix pair (a,b)
           call stdlib${ii}$_zrot( j1+1, a( 1_${ik}$, j1 ), 1_${ik}$, a( 1_${ik}$, j1+1 ), 1_${ik}$, cz,conjg( sz ) )
           call stdlib${ii}$_zrot( j1+1, b( 1_${ik}$, j1 ), 1_${ik}$, b( 1_${ik}$, j1+1 ), 1_${ik}$, cz,conjg( sz ) )
           call stdlib${ii}$_zrot( n-j1+1, a( j1, j1 ), lda, a( j1+1, j1 ), lda, cq, sq )
           call stdlib${ii}$_zrot( n-j1+1, b( j1, j1 ), ldb, b( j1+1, j1 ), ldb, cq, sq )
           ! set  n1 by n2 (2,1) blocks to 0
           a( j1+1, j1 ) = czero
           b( j1+1, j1 ) = czero
           ! accumulate transformations into q and z if requested.
           if( wantz )call stdlib${ii}$_zrot( n, z( 1_${ik}$, j1 ), 1_${ik}$, z( 1_${ik}$, j1+1 ), 1_${ik}$, cz,conjg( sz ) )
                     
           if( wantq )call stdlib${ii}$_zrot( n, q( 1_${ik}$, j1 ), 1_${ik}$, q( 1_${ik}$, j1+1 ), 1_${ik}$, cq,conjg( sq ) )
                     
           ! exit with info = 0 if swap was successfully performed.
           return
           ! exit with info = 1 if swap was rejected.
           20 continue
           info = 1_${ik}$
           return
     end subroutine stdlib${ii}$_ztgex2

#:for ck,ct,ci in CMPLX_KINDS_TYPES
#:if not ck in ["sp","dp"]
     pure module subroutine stdlib${ii}$_${ci}$tgex2( wantq, wantz, n, a, lda, b, ldb, q, ldq, z,ldz, j1, info )
     !! ZTGEX2: swaps adjacent diagonal 1 by 1 blocks (A11,B11) and (A22,B22)
     !! in an upper triangular matrix pair (A, B) by an unitary equivalence
     !! transformation.
     !! (A, B) must be in generalized Schur canonical form, that is, A and
     !! B are both upper triangular.
     !! Optionally, the matrices Q and Z of generalized Schur vectors are
     !! updated.
     !! Q(in) * A(in) * Z(in)**H = Q(out) * A(out) * Z(out)**H
     !! Q(in) * B(in) * Z(in)**H = Q(out) * B(out) * Z(out)**H
               
        ! -- lapack auxiliary routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           logical(lk), intent(in) :: wantq, wantz
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: j1, lda, ldb, ldq, ldz, n
           ! Array Arguments 
           complex(${ck}$), intent(inout) :: a(lda,*), b(ldb,*), q(ldq,*), z(ldz,*)
        ! =====================================================================
           ! Parameters 
           real(${ck}$), parameter :: twenty = 2.0e+1_${ck}$
           integer(${ik}$), parameter :: ldst = 2_${ik}$
           logical(lk), parameter :: wands = .true.
           
           
           
           
           ! Local Scalars 
           logical(lk) :: strong, weak
           integer(${ik}$) :: i, m
           real(${ck}$) :: cq, cz, eps, sa, sb, scale, smlnum, sum, thresha, threshb
           complex(${ck}$) :: cdum, f, g, sq, sz
           ! Local Arrays 
           complex(${ck}$) :: s(ldst,ldst), t(ldst,ldst), work(8_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           info = 0_${ik}$
           ! quick return if possible
           if( n<=1 )return
           m = ldst
           weak = .false.
           strong = .false.
           ! make a local copy of selected block in (a, b)
           call stdlib${ii}$_${ci}$lacpy( 'FULL', m, m, a( j1, j1 ), lda, s, ldst )
           call stdlib${ii}$_${ci}$lacpy( 'FULL', m, m, b( j1, j1 ), ldb, t, ldst )
           ! compute the threshold for testing the acceptance of swapping.
           eps = stdlib${ii}$_${c2ri(ci)}$lamch( 'P' )
           smlnum = stdlib${ii}$_${c2ri(ci)}$lamch( 'S' ) / eps
           scale = real( czero,KIND=${ck}$)
           sum = real( cone,KIND=${ck}$)
           call stdlib${ii}$_${ci}$lacpy( 'FULL', m, m, s, ldst, work, m )
           call stdlib${ii}$_${ci}$lacpy( 'FULL', m, m, t, ldst, work( m*m+1 ), m )
           call stdlib${ii}$_${ci}$lassq( m*m, work, 1_${ik}$, scale, sum )
           sa = scale*sqrt( sum )
           scale = real( czero,KIND=${ck}$)
           sum = real( cone,KIND=${ck}$)
           call stdlib${ii}$_${ci}$lassq( m*m, work(m*m+1), 1_${ik}$, scale, sum )
           sb = scale*sqrt( sum )
           ! thres has been changed from
              ! thresh = max( ten*eps*sa, smlnum )
           ! to
              ! thresh = max( twenty*eps*sa, smlnum )
           ! on 04/01/10.
           ! "bug" reported by ondra kamenik, confirmed by julie langou, fixed by
           ! jim demmel and guillaume revy. see forum post 1783.
           thresha = max( twenty*eps*sa, smlnum )
           threshb = max( twenty*eps*sb, smlnum )
           ! compute unitary ql and rq that swap 1-by-1 and 1-by-1 blocks
           ! using givens rotations and perform the swap tentatively.
           f = s( 2_${ik}$, 2_${ik}$ )*t( 1_${ik}$, 1_${ik}$ ) - t( 2_${ik}$, 2_${ik}$ )*s( 1_${ik}$, 1_${ik}$ )
           g = s( 2_${ik}$, 2_${ik}$ )*t( 1_${ik}$, 2_${ik}$ ) - t( 2_${ik}$, 2_${ik}$ )*s( 1_${ik}$, 2_${ik}$ )
           sa = abs( s( 2_${ik}$, 2_${ik}$ ) ) * abs( t( 1_${ik}$, 1_${ik}$ ) )
           sb = abs( s( 1_${ik}$, 1_${ik}$ ) ) * abs( t( 2_${ik}$, 2_${ik}$ ) )
           call stdlib${ii}$_${ci}$lartg( g, f, cz, sz, cdum )
           sz = -sz
           call stdlib${ii}$_${ci}$rot( 2_${ik}$, s( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, s( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, cz, conjg( sz ) )
           call stdlib${ii}$_${ci}$rot( 2_${ik}$, t( 1_${ik}$, 1_${ik}$ ), 1_${ik}$, t( 1_${ik}$, 2_${ik}$ ), 1_${ik}$, cz, conjg( sz ) )
           if( sa>=sb ) then
              call stdlib${ii}$_${ci}$lartg( s( 1_${ik}$, 1_${ik}$ ), s( 2_${ik}$, 1_${ik}$ ), cq, sq, cdum )
           else
              call stdlib${ii}$_${ci}$lartg( t( 1_${ik}$, 1_${ik}$ ), t( 2_${ik}$, 1_${ik}$ ), cq, sq, cdum )
           end if
           call stdlib${ii}$_${ci}$rot( 2_${ik}$, s( 1_${ik}$, 1_${ik}$ ), ldst, s( 2_${ik}$, 1_${ik}$ ), ldst, cq, sq )
           call stdlib${ii}$_${ci}$rot( 2_${ik}$, t( 1_${ik}$, 1_${ik}$ ), ldst, t( 2_${ik}$, 1_${ik}$ ), ldst, cq, sq )
           ! weak stability test: |s21| <= o(eps f-norm((a)))
                                ! and  |t21| <= o(eps f-norm((b)))
           weak = abs( s( 2_${ik}$, 1_${ik}$ ) )<=thresha .and.abs( t( 2_${ik}$, 1_${ik}$ ) )<=threshb
           if( .not.weak )go to 20
           if( wands ) then
              ! strong stability test:
                 ! f-norm((a-ql**h*s*qr)) <= o(eps*f-norm((a)))
                 ! and
                 ! f-norm((b-ql**h*t*qr)) <= o(eps*f-norm((b)))
              call stdlib${ii}$_${ci}$lacpy( 'FULL', m, m, s, ldst, work, m )
              call stdlib${ii}$_${ci}$lacpy( 'FULL', m, m, t, ldst, work( m*m+1 ), m )
              call stdlib${ii}$_${ci}$rot( 2_${ik}$, work, 1_${ik}$, work( 3_${ik}$ ), 1_${ik}$, cz, -conjg( sz ) )
              call stdlib${ii}$_${ci}$rot( 2_${ik}$, work( 5_${ik}$ ), 1_${ik}$, work( 7_${ik}$ ), 1_${ik}$, cz, -conjg( sz ) )
              call stdlib${ii}$_${ci}$rot( 2_${ik}$, work, 2_${ik}$, work( 2_${ik}$ ), 2_${ik}$, cq, -sq )
              call stdlib${ii}$_${ci}$rot( 2_${ik}$, work( 5_${ik}$ ), 2_${ik}$, work( 6_${ik}$ ), 2_${ik}$, cq, -sq )
              do i = 1, 2
                 work( i ) = work( i ) - a( j1+i-1, j1 )
                 work( i+2 ) = work( i+2 ) - a( j1+i-1, j1+1 )
                 work( i+4 ) = work( i+4 ) - b( j1+i-1, j1 )
                 work( i+6 ) = work( i+6 ) - b( j1+i-1, j1+1 )
              end do
              scale = real( czero,KIND=${ck}$)
              sum = real( cone,KIND=${ck}$)
              call stdlib${ii}$_${ci}$lassq( m*m, work, 1_${ik}$, scale, sum )
              sa = scale*sqrt( sum )
              scale = real( czero,KIND=${ck}$)
              sum = real( cone,KIND=${ck}$)
              call stdlib${ii}$_${ci}$lassq( m*m, work(m*m+1), 1_${ik}$, scale, sum )
              sb = scale*sqrt( sum )
              strong = sa<=thresha .and. sb<=threshb
              if( .not.strong )go to 20
           end if
           ! if the swap is accepted ("weakly" and "strongly"), apply the
           ! equivalence transformations to the original matrix pair (a,b)
           call stdlib${ii}$_${ci}$rot( j1+1, a( 1_${ik}$, j1 ), 1_${ik}$, a( 1_${ik}$, j1+1 ), 1_${ik}$, cz,conjg( sz ) )
           call stdlib${ii}$_${ci}$rot( j1+1, b( 1_${ik}$, j1 ), 1_${ik}$, b( 1_${ik}$, j1+1 ), 1_${ik}$, cz,conjg( sz ) )
           call stdlib${ii}$_${ci}$rot( n-j1+1, a( j1, j1 ), lda, a( j1+1, j1 ), lda, cq, sq )
           call stdlib${ii}$_${ci}$rot( n-j1+1, b( j1, j1 ), ldb, b( j1+1, j1 ), ldb, cq, sq )
           ! set  n1 by n2 (2,1) blocks to 0
           a( j1+1, j1 ) = czero
           b( j1+1, j1 ) = czero
           ! accumulate transformations into q and z if requested.
           if( wantz )call stdlib${ii}$_${ci}$rot( n, z( 1_${ik}$, j1 ), 1_${ik}$, z( 1_${ik}$, j1+1 ), 1_${ik}$, cz,conjg( sz ) )
                     
           if( wantq )call stdlib${ii}$_${ci}$rot( n, q( 1_${ik}$, j1 ), 1_${ik}$, q( 1_${ik}$, j1+1 ), 1_${ik}$, cq,conjg( sq ) )
                     
           ! exit with info = 0 if swap was successfully performed.
           return
           ! exit with info = 1 if swap was rejected.
           20 continue
           info = 1_${ik}$
           return
     end subroutine stdlib${ii}$_${ci}$tgex2

#:endif
#:endfor


#:endfor
end submodule stdlib_lapack_eigv_comp2