stdlib_lapack_eigv_svd_drivers.fypp Source File


Source Code

#:include "common.fypp" 
submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_eigv_svd_drivers
  implicit none


  contains
#:for ik,it,ii in LINALG_INT_KINDS_TYPES

     module subroutine stdlib${ii}$_sgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt,work, lwork, info )
     !! SGESVD computes the singular value decomposition (SVD) of a real
     !! M-by-N matrix A, optionally computing the left and/or right singular
     !! vectors. The SVD is written
     !! A = U * SIGMA * transpose(V)
     !! where SIGMA is an M-by-N matrix which is zero except for its
     !! min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
     !! V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
     !! are the singular values of A; they are real and non-negative, and
     !! are returned in descending order.  The first min(m,n) columns of
     !! U and V are the left and right singular vectors of A.
     !! Note that the routine returns V**T, not V.
               
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: jobu, jobvt
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n
           ! Array Arguments 
           real(sp), intent(inout) :: a(lda,*)
           real(sp), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,&
                      wntvs
           integer(${ik}$) :: bdspac, blk, chunk, i, ie, ierr, ir, iscl, itau, itaup, itauq, iu, &
           iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, &
                     wrkbl
           integer(${ik}$) :: lwork_sgeqrf, lwork_sorgqr_n, lwork_sorgqr_m, lwork_sgebrd, &
                     lwork_sorgbr_p, lwork_sorgbr_q, lwork_sgelqf, lwork_sorglq_n, lwork_sorglq_m
           real(sp) :: anrm, bignum, eps, smlnum
           ! Local Arrays 
           real(sp) :: dum(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           minmn = min( m, n )
           wntua = stdlib_lsame( jobu, 'A' )
           wntus = stdlib_lsame( jobu, 'S' )
           wntuas = wntua .or. wntus
           wntuo = stdlib_lsame( jobu, 'O' )
           wntun = stdlib_lsame( jobu, 'N' )
           wntva = stdlib_lsame( jobvt, 'A' )
           wntvs = stdlib_lsame( jobvt, 'S' )
           wntvas = wntva .or. wntvs
           wntvo = stdlib_lsame( jobvt, 'O' )
           wntvn = stdlib_lsame( jobvt, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then
              info = -1_${ik}$
           else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) &
                     then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldu<1_${ik}$ .or. ( wntuas .and. ldu<m ) ) then
              info = -9_${ik}$
           else if( ldvt<1_${ik}$ .or. ( wntva .and. ldvt<n ) .or.( wntvs .and. ldvt<minmn ) ) &
                     then
              info = -11_${ik}$
           end if
           ! compute workspace
            ! (note: comments in the code beginning "workspace:" describe the
             ! minimal amount of workspace needed at that point in the code,
             ! as well as the preferred amount for good performance.
             ! nb refers to the optimal block size for the immediately
             ! following subroutine, as returned by stdlib${ii}$_ilaenv.)
           if( info==0_${ik}$ ) then
              minwrk = 1_${ik}$
              maxwrk = 1_${ik}$
              if( m>=n .and. minmn>0_${ik}$ ) then
                 ! compute space needed for stdlib${ii}$_sbdsqr
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'SGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ )
                 bdspac = 5_${ik}$*n
                 ! compute space needed for stdlib${ii}$_sgeqrf
                 call stdlib${ii}$_sgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr )
                 lwork_sgeqrf = int( dum(1_${ik}$),KIND=${ik}$)
                 ! compute space needed for stdlib${ii}$_sorgqr
                 call stdlib${ii}$_sorgqr( m, n, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr )
                 lwork_sorgqr_n = int( dum(1_${ik}$),KIND=${ik}$)
                 call stdlib${ii}$_sorgqr( m, m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr )
                 lwork_sorgqr_m = int( dum(1_${ik}$),KIND=${ik}$)
                 ! compute space needed for stdlib${ii}$_sgebrd
                 call stdlib${ii}$_sgebrd( n, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr )
                           
                 lwork_sgebrd = int( dum(1_${ik}$),KIND=${ik}$)
                 ! compute space needed for stdlib${ii}$_sorgbr p
                 call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr )
                 lwork_sorgbr_p = int( dum(1_${ik}$),KIND=${ik}$)
                 ! compute space needed for stdlib${ii}$_sorgbr q
                 call stdlib${ii}$_sorgbr( 'Q', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr )
                 lwork_sorgbr_q = int( dum(1_${ik}$),KIND=${ik}$)
                 if( m>=mnthr ) then
                    if( wntun ) then
                       ! path 1 (m much larger than n, jobu='n')
                       maxwrk = n + lwork_sgeqrf
                       maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sgebrd )
                       if( wntvo .or. wntvas )maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sorgbr_p )
                       maxwrk = max( maxwrk, bdspac )
                       minwrk = max( 4_${ik}$*n, bdspac )
                    else if( wntuo .and. wntvn ) then
                       ! path 2 (m much larger than n, jobu='o', jobvt='n')
                       wrkbl = n + lwork_sgeqrf
                       wrkbl = max( wrkbl, n+lwork_sorgqr_n )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = max( n*n+wrkbl, n*n+m*n+n )
                       minwrk = max( 3_${ik}$*n+m, bdspac )
                    else if( wntuo .and. wntvas ) then
                       ! path 3 (m much larger than n, jobu='o', jobvt='s' or
                       ! 'a')
                       wrkbl = n + lwork_sgeqrf
                       wrkbl = max( wrkbl, n+lwork_sorgqr_n )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = max( n*n+wrkbl, n*n+m*n+n )
                       minwrk = max( 3_${ik}$*n+m, bdspac )
                    else if( wntus .and. wntvn ) then
                       ! path 4 (m much larger than n, jobu='s', jobvt='n')
                       wrkbl = n + lwork_sgeqrf
                       wrkbl = max( wrkbl, n+lwork_sorgqr_n )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = n*n + wrkbl
                       minwrk = max( 3_${ik}$*n+m, bdspac )
                    else if( wntus .and. wntvo ) then
                       ! path 5 (m much larger than n, jobu='s', jobvt='o')
                       wrkbl = n + lwork_sgeqrf
                       wrkbl = max( wrkbl, n+lwork_sorgqr_n )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = 2_${ik}$*n*n + wrkbl
                       minwrk = max( 3_${ik}$*n+m, bdspac )
                    else if( wntus .and. wntvas ) then
                       ! path 6 (m much larger than n, jobu='s', jobvt='s' or
                       ! 'a')
                       wrkbl = n + lwork_sgeqrf
                       wrkbl = max( wrkbl, n+lwork_sorgqr_n )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = n*n + wrkbl
                       minwrk = max( 3_${ik}$*n+m, bdspac )
                    else if( wntua .and. wntvn ) then
                       ! path 7 (m much larger than n, jobu='a', jobvt='n')
                       wrkbl = n + lwork_sgeqrf
                       wrkbl = max( wrkbl, n+lwork_sorgqr_m )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = n*n + wrkbl
                       minwrk = max( 3_${ik}$*n+m, bdspac )
                    else if( wntua .and. wntvo ) then
                       ! path 8 (m much larger than n, jobu='a', jobvt='o')
                       wrkbl = n + lwork_sgeqrf
                       wrkbl = max( wrkbl, n+lwork_sorgqr_m )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = 2_${ik}$*n*n + wrkbl
                       minwrk = max( 3_${ik}$*n+m, bdspac )
                    else if( wntua .and. wntvas ) then
                       ! path 9 (m much larger than n, jobu='a', jobvt='s' or
                       ! 'a')
                       wrkbl = n + lwork_sgeqrf
                       wrkbl = max( wrkbl, n+lwork_sorgqr_m )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_q )
                       wrkbl = max( wrkbl, 3_${ik}$*n+lwork_sorgbr_p )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = n*n + wrkbl
                       minwrk = max( 3_${ik}$*n+m, bdspac )
                    end if
                 else
                    ! path 10 (m at least n, but not much larger)
                    call stdlib${ii}$_sgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr )
                              
                    lwork_sgebrd = int( dum(1_${ik}$),KIND=${ik}$)
                    maxwrk = 3_${ik}$*n + lwork_sgebrd
                    if( wntus .or. wntuo ) then
                       call stdlib${ii}$_sorgbr( 'Q', m, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr )
                       lwork_sorgbr_q = int( dum(1_${ik}$),KIND=${ik}$)
                       maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sorgbr_q )
                    end if
                    if( wntua ) then
                       call stdlib${ii}$_sorgbr( 'Q', m, m, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr )
                       lwork_sorgbr_q = int( dum(1_${ik}$),KIND=${ik}$)
                       maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sorgbr_q )
                    end if
                    if( .not.wntvn ) then
                      maxwrk = max( maxwrk, 3_${ik}$*n+lwork_sorgbr_p )
                    end if
                    maxwrk = max( maxwrk, bdspac )
                    minwrk = max( 3_${ik}$*n+m, bdspac )
                 end if
              else if( minmn>0_${ik}$ ) then
                 ! compute space needed for stdlib${ii}$_sbdsqr
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'SGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ )
                 bdspac = 5_${ik}$*m
                 ! compute space needed for stdlib${ii}$_sgelqf
                 call stdlib${ii}$_sgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr )
                 lwork_sgelqf = int( dum(1_${ik}$),KIND=${ik}$)
                 ! compute space needed for stdlib${ii}$_sorglq
                 call stdlib${ii}$_sorglq( n, n, m, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr )
                 lwork_sorglq_n = int( dum(1_${ik}$),KIND=${ik}$)
                 call stdlib${ii}$_sorglq( m, n, m, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr )
                 lwork_sorglq_m = int( dum(1_${ik}$),KIND=${ik}$)
                 ! compute space needed for stdlib${ii}$_sgebrd
                 call stdlib${ii}$_sgebrd( m, m, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr )
                           
                 lwork_sgebrd = int( dum(1_${ik}$),KIND=${ik}$)
                  ! compute space needed for stdlib${ii}$_sorgbr p
                 call stdlib${ii}$_sorgbr( 'P', m, m, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr )
                 lwork_sorgbr_p = int( dum(1_${ik}$),KIND=${ik}$)
                 ! compute space needed for stdlib${ii}$_sorgbr q
                 call stdlib${ii}$_sorgbr( 'Q', m, m, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr )
                 lwork_sorgbr_q = int( dum(1_${ik}$),KIND=${ik}$)
                 if( n>=mnthr ) then
                    if( wntvn ) then
                       ! path 1t(n much larger than m, jobvt='n')
                       maxwrk = m + lwork_sgelqf
                       maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sgebrd )
                       if( wntuo .or. wntuas )maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sorgbr_q )
                       maxwrk = max( maxwrk, bdspac )
                       minwrk = max( 4_${ik}$*m, bdspac )
                    else if( wntvo .and. wntun ) then
                       ! path 2t(n much larger than m, jobu='n', jobvt='o')
                       wrkbl = m + lwork_sgelqf
                       wrkbl = max( wrkbl, m+lwork_sorglq_m )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = max( m*m+wrkbl, m*m+m*n+m )
                       minwrk = max( 3_${ik}$*m+n, bdspac )
                    else if( wntvo .and. wntuas ) then
                       ! path 3t(n much larger than m, jobu='s' or 'a',
                       ! jobvt='o')
                       wrkbl = m + lwork_sgelqf
                       wrkbl = max( wrkbl, m+lwork_sorglq_m )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = max( m*m+wrkbl, m*m+m*n+m )
                       minwrk = max( 3_${ik}$*m+n, bdspac )
                    else if( wntvs .and. wntun ) then
                       ! path 4t(n much larger than m, jobu='n', jobvt='s')
                       wrkbl = m + lwork_sgelqf
                       wrkbl = max( wrkbl, m+lwork_sorglq_m )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = m*m + wrkbl
                       minwrk = max( 3_${ik}$*m+n, bdspac )
                    else if( wntvs .and. wntuo ) then
                       ! path 5t(n much larger than m, jobu='o', jobvt='s')
                       wrkbl = m + lwork_sgelqf
                       wrkbl = max( wrkbl, m+lwork_sorglq_m )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = 2_${ik}$*m*m + wrkbl
                       minwrk = max( 3_${ik}$*m+n, bdspac )
                       maxwrk = max( maxwrk, minwrk )
                    else if( wntvs .and. wntuas ) then
                       ! path 6t(n much larger than m, jobu='s' or 'a',
                       ! jobvt='s')
                       wrkbl = m + lwork_sgelqf
                       wrkbl = max( wrkbl, m+lwork_sorglq_m )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = m*m + wrkbl
                       minwrk = max( 3_${ik}$*m+n, bdspac )
                    else if( wntva .and. wntun ) then
                       ! path 7t(n much larger than m, jobu='n', jobvt='a')
                       wrkbl = m + lwork_sgelqf
                       wrkbl = max( wrkbl, m+lwork_sorglq_n )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = m*m + wrkbl
                       minwrk = max( 3_${ik}$*m+n, bdspac )
                    else if( wntva .and. wntuo ) then
                       ! path 8t(n much larger than m, jobu='o', jobvt='a')
                       wrkbl = m + lwork_sgelqf
                       wrkbl = max( wrkbl, m+lwork_sorglq_n )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = 2_${ik}$*m*m + wrkbl
                       minwrk = max( 3_${ik}$*m+n, bdspac )
                    else if( wntva .and. wntuas ) then
                       ! path 9t(n much larger than m, jobu='s' or 'a',
                       ! jobvt='a')
                       wrkbl = m + lwork_sgelqf
                       wrkbl = max( wrkbl, m+lwork_sorglq_n )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_p )
                       wrkbl = max( wrkbl, 3_${ik}$*m+lwork_sorgbr_q )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = m*m + wrkbl
                       minwrk = max( 3_${ik}$*m+n, bdspac )
                    end if
                 else
                    ! path 10t(n greater than m, but not much larger)
                    call stdlib${ii}$_sgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr )
                              
                    lwork_sgebrd = int( dum(1_${ik}$),KIND=${ik}$)
                    maxwrk = 3_${ik}$*m + lwork_sgebrd
                    if( wntvs .or. wntvo ) then
                      ! compute space needed for stdlib${ii}$_sorgbr p
                      call stdlib${ii}$_sorgbr( 'P', m, n, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr )
                      lwork_sorgbr_p = int( dum(1_${ik}$),KIND=${ik}$)
                      maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sorgbr_p )
                    end if
                    if( wntva ) then
                      call stdlib${ii}$_sorgbr( 'P', n, n, m, a, n, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr )
                      lwork_sorgbr_p = int( dum(1_${ik}$),KIND=${ik}$)
                      maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sorgbr_p )
                    end if
                    if( .not.wntun ) then
                       maxwrk = max( maxwrk, 3_${ik}$*m+lwork_sorgbr_q )
                    end if
                    maxwrk = max( maxwrk, bdspac )
                    minwrk = max( 3_${ik}$*m+n, bdspac )
                 end if
              end if
              maxwrk = max( maxwrk, minwrk )
              work( 1_${ik}$ ) = maxwrk
              if( lwork<minwrk .and. .not.lquery ) then
                 info = -13_${ik}$
              end if
           end if
           if( info/=0_${ik}$ ) then
              call stdlib${ii}$_xerbla( 'SGESVD', -info )
              return
           else if( lquery ) then
              return
           end if
           ! quick return if possible
           if( m==0_${ik}$ .or. n==0_${ik}$ ) then
              return
           end if
           ! get machine constants
           eps = stdlib${ii}$_slamch( 'P' )
           smlnum = sqrt( stdlib${ii}$_slamch( 'S' ) ) / eps
           bignum = one / smlnum
           ! scale a if max element outside range [smlnum,bignum]
           anrm = stdlib${ii}$_slange( 'M', m, n, a, lda, dum )
           iscl = 0_${ik}$
           if( anrm>zero .and. anrm<smlnum ) then
              iscl = 1_${ik}$
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, smlnum, m, n, a, lda, ierr )
           else if( anrm>bignum ) then
              iscl = 1_${ik}$
              call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, anrm, bignum, m, n, a, lda, ierr )
           end if
           if( m>=n ) then
              ! a has at least as many rows as columns. if a has sufficiently
              ! more rows than columns, first reduce using the qr
              ! decomposition (if sufficient workspace available)
              if( m>=mnthr ) then
                 if( wntun ) then
                    ! path 1 (m much larger than n, jobu='n')
                    ! no left singular vectors to be computed
                    itau = 1_${ik}$
                    iwork = itau + n
                    ! compute a=q*r
                    ! (workspace: need 2*n, prefer n+n*nb)
                    call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, &
                              ierr )
                    ! zero out below r
                    if( n > 1_${ik}$ ) then
                       call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero, a( 2_${ik}$, 1_${ik}$ ),lda )
                    end if
                    ie = 1_${ik}$
                    itauq = ie + n
                    itaup = itauq + n
                    iwork = itaup + n
                    ! bidiagonalize r in a
                    ! (workspace: need 4*n, prefer 3*n+2*n*nb)
                    call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), &
                              work( iwork ), lwork-iwork+1,ierr )
                    ncvt = 0_${ik}$
                    if( wntvo .or. wntvas ) then
                       ! if right singular vectors desired, generate p'.
                       ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb)
                       call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), &
                                 lwork-iwork+1, ierr )
                       ncvt = n
                    end if
                    iwork = ie + n
                    ! perform bidiagonal qr iteration, computing right
                    ! singular vectors of a in a if desired
                    ! (workspace: need bdspac)
                    call stdlib${ii}$_sbdsqr( 'U', n, ncvt, 0_${ik}$, 0_${ik}$, s, work( ie ), a, lda,dum, 1_${ik}$, dum, 1_${ik}$, &
                              work( iwork ), info )
                    ! if right singular vectors desired in vt, copy them there
                    if( wntvas )call stdlib${ii}$_slacpy( 'F', n, n, a, lda, vt, ldvt )
                 else if( wntuo .and. wntvn ) then
                    ! path 2 (m much larger than n, jobu='o', jobvt='n')
                    ! n left singular vectors to be overwritten on a and
                    ! no right singular vectors to be computed
                    if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then
                       ! sufficient workspace for a fast algorithm
                       ir = 1_${ik}$
                       if( lwork>=max( wrkbl, lda*n+n )+lda*n ) then
                          ! work(iu) is lda by n, work(ir) is lda by n
                          ldwrku = lda
                          ldwrkr = lda
                       else if( lwork>=max( wrkbl, lda*n+n )+n*n ) then
                          ! work(iu) is lda by n, work(ir) is n by n
                          ldwrku = lda
                          ldwrkr = n
                       else
                          ! work(iu) is ldwrku by n, work(ir) is n by n
                          ldwrku = ( lwork-n*n-n ) / n
                          ldwrkr = n
                       end if
                       itau = ir + ldwrkr*n
                       iwork = itau + n
                       ! compute a=q*r
                       ! (workspace: need n*n+2*n, prefer n*n+n+n*nb)
                       call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+&
                                 1_${ik}$, ierr )
                       ! copy r to work(ir) and zero out below it
                       call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
                       call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),ldwrkr )
                                 
                       ! generate q in a
                       ! (workspace: need n*n+2*n, prefer n*n+n+n*nb)
                       call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-&
                                 iwork+1, ierr )
                       ie = itau
                       itauq = ie + n
                       itaup = itauq + n
                       iwork = itaup + n
                       ! bidiagonalize r in work(ir)
                       ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb)
                       call stdlib${ii}$_sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),work( itauq ), &
                                 work( itaup ),work( iwork ), lwork-iwork+1, ierr )
                       ! generate left vectors bidiagonalizing r
                       ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb)
                       call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( &
                                 iwork ),lwork-iwork+1, ierr )
                       iwork = ie + n
                       ! perform bidiagonal qr iteration, computing left
                       ! singular vectors of r in work(ir)
                       ! (workspace: need n*n+bdspac)
                       call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum, 1_${ik}$,work( ir ), &
                                 ldwrkr, dum, 1_${ik}$,work( iwork ), info )
                       iu = ie + n
                       ! multiply q in a by left singular vectors of r in
                       ! work(ir), storing result in work(iu) and copying to a
                       ! (workspace: need n*n+2*n, prefer n*n+m*n+n)
                       do i = 1, m, ldwrku
                          chunk = min( m-i+1, ldwrku )
                          call stdlib${ii}$_sgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( ir )&
                                    , ldwrkr, zero,work( iu ), ldwrku )
                          call stdlib${ii}$_slacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda )
                                    
                       end do
                    else
                       ! insufficient workspace for a fast algorithm
                       ie = 1_${ik}$
                       itauq = ie + n
                       itaup = itauq + n
                       iwork = itaup + n
                       ! bidiagonalize a
                       ! (workspace: need 3*n+m, prefer 3*n+(m+n)*nb)
                       call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup &
                                 ),work( iwork ), lwork-iwork+1, ierr )
                       ! generate left vectors bidiagonalizing a
                       ! (workspace: need 4*n, prefer 3*n+n*nb)
                       call stdlib${ii}$_sorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), &
                                 lwork-iwork+1, ierr )
                       iwork = ie + n
                       ! perform bidiagonal qr iteration, computing left
                       ! singular vectors of a in a
                       ! (workspace: need bdspac)
                       call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum, 1_${ik}$,a, lda, dum, 1_${ik}$, &
                                 work( iwork ), info )
                    end if
                 else if( wntuo .and. wntvas ) then
                    ! path 3 (m much larger than n, jobu='o', jobvt='s' or 'a')
                    ! n left singular vectors to be overwritten on a and
                    ! n right singular vectors to be computed in vt
                    if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then
                       ! sufficient workspace for a fast algorithm
                       ir = 1_${ik}$
                       if( lwork>=max( wrkbl, lda*n+n )+lda*n ) then
                          ! work(iu) is lda by n and work(ir) is lda by n
                          ldwrku = lda
                          ldwrkr = lda
                       else if( lwork>=max( wrkbl, lda*n+n )+n*n ) then
                          ! work(iu) is lda by n and work(ir) is n by n
                          ldwrku = lda
                          ldwrkr = n
                       else
                          ! work(iu) is ldwrku by n and work(ir) is n by n
                          ldwrku = ( lwork-n*n-n ) / n
                          ldwrkr = n
                       end if
                       itau = ir + ldwrkr*n
                       iwork = itau + n
                       ! compute a=q*r
                       ! (workspace: need n*n+2*n, prefer n*n+n+n*nb)
                       call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+&
                                 1_${ik}$, ierr )
                       ! copy r to vt, zeroing out below it
                       call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt )
                       if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt )
                                 
                       ! generate q in a
                       ! (workspace: need n*n+2*n, prefer n*n+n+n*nb)
                       call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-&
                                 iwork+1, ierr )
                       ie = itau
                       itauq = ie + n
                       itaup = itauq + n
                       iwork = itaup + n
                       ! bidiagonalize r in vt, copying result to work(ir)
                       ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb)
                       call stdlib${ii}$_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( &
                                 itaup ),work( iwork ), lwork-iwork+1, ierr )
                       call stdlib${ii}$_slacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
                       ! generate left vectors bidiagonalizing r in work(ir)
                       ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb)
                       call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), work( &
                                 iwork ),lwork-iwork+1, ierr )
                       ! generate right vectors bidiagonalizing r in vt
                       ! (workspace: need n*n+4*n-1, prefer n*n+3*n+(n-1)*nb)
                       call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), &
                                 lwork-iwork+1, ierr )
                       iwork = ie + n
                       ! perform bidiagonal qr iteration, computing left
                       ! singular vectors of r in work(ir) and computing right
                       ! singular vectors of r in vt
                       ! (workspace: need n*n+bdspac)
                       call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt, ldvt,work( ir ), &
                                 ldwrkr, dum, 1_${ik}$,work( iwork ), info )
                       iu = ie + n
                       ! multiply q in a by left singular vectors of r in
                       ! work(ir), storing result in work(iu) and copying to a
                       ! (workspace: need n*n+2*n, prefer n*n+m*n+n)
                       do i = 1, m, ldwrku
                          chunk = min( m-i+1, ldwrku )
                          call stdlib${ii}$_sgemm( 'N', 'N', chunk, n, n, one, a( i, 1_${ik}$ ),lda, work( ir )&
                                    , ldwrkr, zero,work( iu ), ldwrku )
                          call stdlib${ii}$_slacpy( 'F', chunk, n, work( iu ), ldwrku,a( i, 1_${ik}$ ), lda )
                                    
                       end do
                    else
                       ! insufficient workspace for a fast algorithm
                       itau = 1_${ik}$
                       iwork = itau + n
                       ! compute a=q*r
                       ! (workspace: need 2*n, prefer n+n*nb)
                       call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+&
                                 1_${ik}$, ierr )
                       ! copy r to vt, zeroing out below it
                       call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt )
                       if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt )
                                 
                       ! generate q in a
                       ! (workspace: need 2*n, prefer n+n*nb)
                       call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-&
                                 iwork+1, ierr )
                       ie = itau
                       itauq = ie + n
                       itaup = itauq + n
                       iwork = itaup + n
                       ! bidiagonalize r in vt
                       ! (workspace: need 4*n, prefer 3*n+2*n*nb)
                       call stdlib${ii}$_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( &
                                 itaup ),work( iwork ), lwork-iwork+1, ierr )
                       ! multiply q in a by left vectors bidiagonalizing r
                       ! (workspace: need 3*n+m, prefer 3*n+m*nb)
                       call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), a, lda,&
                                  work( iwork ),lwork-iwork+1, ierr )
                       ! generate right vectors bidiagonalizing r in vt
                       ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb)
                       call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), &
                                 lwork-iwork+1, ierr )
                       iwork = ie + n
                       ! perform bidiagonal qr iteration, computing left
                       ! singular vectors of a in a and computing right
                       ! singular vectors of a in vt
                       ! (workspace: need bdspac)
                       call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt, ldvt,a, lda, dum, &
                                 1_${ik}$, work( iwork ), info )
                    end if
                 else if( wntus ) then
                    if( wntvn ) then
                       ! path 4 (m much larger than n, jobu='s', jobvt='n')
                       ! n left singular vectors to be computed in u and
                       ! no right singular vectors to be computed
                       if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then
                          ! sufficient workspace for a fast algorithm
                          ir = 1_${ik}$
                          if( lwork>=wrkbl+lda*n ) then
                             ! work(ir) is lda by n
                             ldwrkr = lda
                          else
                             ! work(ir) is n by n
                             ldwrkr = n
                          end if
                          itau = ir + ldwrkr*n
                          iwork = itau + n
                          ! compute a=q*r
                          ! (workspace: need n*n+2*n, prefer n*n+n+n*nb)
                          call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ! copy r to work(ir), zeroing out below it
                          call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( ir ),ldwrkr )
                          call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr )
                                    
                          ! generate q in a
                          ! (workspace: need n*n+2*n, prefer n*n+n+n*nb)
                          call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ie = itau
                          itauq = ie + n
                          itaup = itauq + n
                          iwork = itaup + n
                          ! bidiagonalize r in work(ir)
                          ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb)
                          call stdlib${ii}$_sgebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq &
                                    ),work( itaup ), work( iwork ),lwork-iwork+1, ierr )
                          ! generate left vectors bidiagonalizing r in work(ir)
                          ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb)
                          call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), &
                                    work( iwork ),lwork-iwork+1, ierr )
                          iwork = ie + n
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of r in work(ir)
                          ! (workspace: need n*n+bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum,1_${ik}$, work( ir ), &
                                    ldwrkr, dum, 1_${ik}$,work( iwork ), info )
                          ! multiply q in a by left singular vectors of r in
                          ! work(ir), storing result in u
                          ! (workspace: need n*n)
                          call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, a, lda,work( ir ), ldwrkr, &
                                    zero, u, ldu )
                       else
                          ! insufficient workspace for a fast algorithm
                          itau = 1_${ik}$
                          iwork = itau + n
                          ! compute a=q*r, copying result to u
                          ! (workspace: need 2*n, prefer n+n*nb)
                          call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu )
                          ! generate q in u
                          ! (workspace: need 2*n, prefer n+n*nb)
                          call stdlib${ii}$_sorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ie = itau
                          itauq = ie + n
                          itaup = itauq + n
                          iwork = itaup + n
                          ! zero out below r in a
                          if( n > 1_${ik}$ ) then
                             call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda )
                                       
                          end if
                          ! bidiagonalize r in a
                          ! (workspace: need 4*n, prefer 3*n+2*n*nb)
                          call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( &
                                    itaup ),work( iwork ), lwork-iwork+1, ierr )
                          ! multiply q in u by left vectors bidiagonalizing r
                          ! (workspace: need 3*n+m, prefer 3*n+m*nb)
                          call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, &
                                    ldu, work( iwork ),lwork-iwork+1, ierr )
                          iwork = ie + n
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of a in u
                          ! (workspace: need bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum,1_${ik}$, u, ldu, dum, &
                                    1_${ik}$, work( iwork ),info )
                       end if
                    else if( wntvo ) then
                       ! path 5 (m much larger than n, jobu='s', jobvt='o')
                       ! n left singular vectors to be computed in u and
                       ! n right singular vectors to be overwritten on a
                       if( lwork>=2_${ik}$*n*n+max( 4_${ik}$*n, bdspac ) ) then
                          ! sufficient workspace for a fast algorithm
                          iu = 1_${ik}$
                          if( lwork>=wrkbl+2*lda*n ) then
                             ! work(iu) is lda by n and work(ir) is lda by n
                             ldwrku = lda
                             ir = iu + ldwrku*n
                             ldwrkr = lda
                          else if( lwork>=wrkbl+( lda+n )*n ) then
                             ! work(iu) is lda by n and work(ir) is n by n
                             ldwrku = lda
                             ir = iu + ldwrku*n
                             ldwrkr = n
                          else
                             ! work(iu) is n by n and work(ir) is n by n
                             ldwrku = n
                             ir = iu + ldwrku*n
                             ldwrkr = n
                          end if
                          itau = ir + ldwrkr*n
                          iwork = itau + n
                          ! compute a=q*r
                          ! (workspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb)
                          call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ! copy r to work(iu), zeroing out below it
                          call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku )
                          call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku )
                                    
                          ! generate q in a
                          ! (workspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb)
                          call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ie = itau
                          itauq = ie + n
                          itaup = itauq + n
                          iwork = itaup + n
                          ! bidiagonalize r in work(iu), copying result to
                          ! work(ir)
                          ! (workspace: need 2*n*n+4*n,
                                      ! prefer 2*n*n+3*n+2*n*nb)
                          call stdlib${ii}$_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq &
                                    ),work( itaup ), work( iwork ),lwork-iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr )
                                    
                          ! generate left bidiagonalizing vectors in work(iu)
                          ! (workspace: need 2*n*n+4*n, prefer 2*n*n+3*n+n*nb)
                          call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), &
                                    work( iwork ),lwork-iwork+1, ierr )
                          ! generate right bidiagonalizing vectors in work(ir)
                          ! (workspace: need 2*n*n+4*n-1,
                                      ! prefer 2*n*n+3*n+(n-1)*nb)
                          call stdlib${ii}$_sorgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), &
                                    work( iwork ),lwork-iwork+1, ierr )
                          iwork = ie + n
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of r in work(iu) and computing
                          ! right singular vectors of r in work(ir)
                          ! (workspace: need 2*n*n+bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, &
                                    work( iu ),ldwrku, dum, 1_${ik}$, work( iwork ), info )
                          ! multiply q in a by left singular vectors of r in
                          ! work(iu), storing result in u
                          ! (workspace: need n*n)
                          call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, &
                                    zero, u, ldu )
                          ! copy right singular vectors of r to a
                          ! (workspace: need n*n)
                          call stdlib${ii}$_slacpy( 'F', n, n, work( ir ), ldwrkr, a,lda )
                       else
                          ! insufficient workspace for a fast algorithm
                          itau = 1_${ik}$
                          iwork = itau + n
                          ! compute a=q*r, copying result to u
                          ! (workspace: need 2*n, prefer n+n*nb)
                          call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu )
                          ! generate q in u
                          ! (workspace: need 2*n, prefer n+n*nb)
                          call stdlib${ii}$_sorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ie = itau
                          itauq = ie + n
                          itaup = itauq + n
                          iwork = itaup + n
                          ! zero out below r in a
                          if( n > 1_${ik}$ ) then
                             call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda )
                                       
                          end if
                          ! bidiagonalize r in a
                          ! (workspace: need 4*n, prefer 3*n+2*n*nb)
                          call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( &
                                    itaup ),work( iwork ), lwork-iwork+1, ierr )
                          ! multiply q in u by left vectors bidiagonalizing r
                          ! (workspace: need 3*n+m, prefer 3*n+m*nb)
                          call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, &
                                    ldu, work( iwork ),lwork-iwork+1, ierr )
                          ! generate right vectors bidiagonalizing r in a
                          ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb)
                          call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), &
                                    lwork-iwork+1, ierr )
                          iwork = ie + n
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of a in u and computing right
                          ! singular vectors of a in a
                          ! (workspace: need bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), a,lda, u, ldu, dum, &
                                    1_${ik}$, work( iwork ),info )
                       end if
                    else if( wntvas ) then
                       ! path 6 (m much larger than n, jobu='s', jobvt='s'
                               ! or 'a')
                       ! n left singular vectors to be computed in u and
                       ! n right singular vectors to be computed in vt
                       if( lwork>=n*n+max( 4_${ik}$*n, bdspac ) ) then
                          ! sufficient workspace for a fast algorithm
                          iu = 1_${ik}$
                          if( lwork>=wrkbl+lda*n ) then
                             ! work(iu) is lda by n
                             ldwrku = lda
                          else
                             ! work(iu) is n by n
                             ldwrku = n
                          end if
                          itau = iu + ldwrku*n
                          iwork = itau + n
                          ! compute a=q*r
                          ! (workspace: need n*n+2*n, prefer n*n+n+n*nb)
                          call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ! copy r to work(iu), zeroing out below it
                          call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku )
                          call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku )
                                    
                          ! generate q in a
                          ! (workspace: need n*n+2*n, prefer n*n+n+n*nb)
                          call stdlib${ii}$_sorgqr( m, n, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ie = itau
                          itauq = ie + n
                          itaup = itauq + n
                          iwork = itaup + n
                          ! bidiagonalize r in work(iu), copying result to vt
                          ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb)
                          call stdlib${ii}$_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq &
                                    ),work( itaup ), work( iwork ),lwork-iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt )
                          ! generate left bidiagonalizing vectors in work(iu)
                          ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb)
                          call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), &
                                    work( iwork ),lwork-iwork+1, ierr )
                          ! generate right bidiagonalizing vectors in vt
                          ! (workspace: need n*n+4*n-1,
                                      ! prefer n*n+3*n+(n-1)*nb)
                          call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),&
                                     lwork-iwork+1, ierr )
                          iwork = ie + n
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of r in work(iu) and computing
                          ! right singular vectors of r in vt
                          ! (workspace: need n*n+bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt,ldvt, work( iu ),&
                                     ldwrku, dum, 1_${ik}$,work( iwork ), info )
                          ! multiply q in a by left singular vectors of r in
                          ! work(iu), storing result in u
                          ! (workspace: need n*n)
                          call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, a, lda,work( iu ), ldwrku, &
                                    zero, u, ldu )
                       else
                          ! insufficient workspace for a fast algorithm
                          itau = 1_${ik}$
                          iwork = itau + n
                          ! compute a=q*r, copying result to u
                          ! (workspace: need 2*n, prefer n+n*nb)
                          call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu )
                          ! generate q in u
                          ! (workspace: need 2*n, prefer n+n*nb)
                          call stdlib${ii}$_sorgqr( m, n, n, u, ldu, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ! copy r to vt, zeroing out below it
                          call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt )
                          if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt &
                                    )
                          ie = itau
                          itauq = ie + n
                          itaup = itauq + n
                          iwork = itaup + n
                          ! bidiagonalize r in vt
                          ! (workspace: need 4*n, prefer 3*n+2*n*nb)
                          call stdlib${ii}$_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( &
                                    itaup ),work( iwork ), lwork-iwork+1, ierr )
                          ! multiply q in u by left bidiagonalizing vectors
                          ! in vt
                          ! (workspace: need 3*n+m, prefer 3*n+m*nb)
                          call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, &
                                    ldu, work( iwork ),lwork-iwork+1, ierr )
                          ! generate right bidiagonalizing vectors in vt
                          ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb)
                          call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),&
                                     lwork-iwork+1, ierr )
                          iwork = ie + n
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of a in u and computing right
                          ! singular vectors of a in vt
                          ! (workspace: need bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, &
                                    dum, 1_${ik}$, work( iwork ),info )
                       end if
                    end if
                 else if( wntua ) then
                    if( wntvn ) then
                       ! path 7 (m much larger than n, jobu='a', jobvt='n')
                       ! m left singular vectors to be computed in u and
                       ! no right singular vectors to be computed
                       if( lwork>=n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then
                          ! sufficient workspace for a fast algorithm
                          ir = 1_${ik}$
                          if( lwork>=wrkbl+lda*n ) then
                             ! work(ir) is lda by n
                             ldwrkr = lda
                          else
                             ! work(ir) is n by n
                             ldwrkr = n
                          end if
                          itau = ir + ldwrkr*n
                          iwork = itau + n
                          ! compute a=q*r, copying result to u
                          ! (workspace: need n*n+2*n, prefer n*n+n+n*nb)
                          call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu )
                          ! copy r to work(ir), zeroing out below it
                          call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( ir ),ldwrkr )
                          call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( ir+1 ), ldwrkr )
                                    
                          ! generate q in u
                          ! (workspace: need n*n+n+m, prefer n*n+n+m*nb)
                          call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ie = itau
                          itauq = ie + n
                          itaup = itauq + n
                          iwork = itaup + n
                          ! bidiagonalize r in work(ir)
                          ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb)
                          call stdlib${ii}$_sgebrd( n, n, work( ir ), ldwrkr, s,work( ie ), work( itauq &
                                    ),work( itaup ), work( iwork ),lwork-iwork+1, ierr )
                          ! generate left bidiagonalizing vectors in work(ir)
                          ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb)
                          call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,work( itauq ), &
                                    work( iwork ),lwork-iwork+1, ierr )
                          iwork = ie + n
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of r in work(ir)
                          ! (workspace: need n*n+bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, n, 0_${ik}$, s, work( ie ), dum,1_${ik}$, work( ir ), &
                                    ldwrkr, dum, 1_${ik}$,work( iwork ), info )
                          ! multiply q in u by left singular vectors of r in
                          ! work(ir), storing result in a
                          ! (workspace: need n*n)
                          call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, u, ldu,work( ir ), ldwrkr, &
                                    zero, a, lda )
                          ! copy left singular vectors of a from a to u
                          call stdlib${ii}$_slacpy( 'F', m, n, a, lda, u, ldu )
                       else
                          ! insufficient workspace for a fast algorithm
                          itau = 1_${ik}$
                          iwork = itau + n
                          ! compute a=q*r, copying result to u
                          ! (workspace: need 2*n, prefer n+n*nb)
                          call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu )
                          ! generate q in u
                          ! (workspace: need n+m, prefer n+m*nb)
                          call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ie = itau
                          itauq = ie + n
                          itaup = itauq + n
                          iwork = itaup + n
                          ! zero out below r in a
                          if( n > 1_${ik}$ ) then
                             call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda )
                                       
                          end if
                          ! bidiagonalize r in a
                          ! (workspace: need 4*n, prefer 3*n+2*n*nb)
                          call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( &
                                    itaup ),work( iwork ), lwork-iwork+1, ierr )
                          ! multiply q in u by left bidiagonalizing vectors
                          ! in a
                          ! (workspace: need 3*n+m, prefer 3*n+m*nb)
                          call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, &
                                    ldu, work( iwork ),lwork-iwork+1, ierr )
                          iwork = ie + n
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of a in u
                          ! (workspace: need bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', n, 0_${ik}$, m, 0_${ik}$, s, work( ie ), dum,1_${ik}$, u, ldu, dum, &
                                    1_${ik}$, work( iwork ),info )
                       end if
                    else if( wntvo ) then
                       ! path 8 (m much larger than n, jobu='a', jobvt='o')
                       ! m left singular vectors to be computed in u and
                       ! n right singular vectors to be overwritten on a
                       if( lwork>=2_${ik}$*n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then
                          ! sufficient workspace for a fast algorithm
                          iu = 1_${ik}$
                          if( lwork>=wrkbl+2*lda*n ) then
                             ! work(iu) is lda by n and work(ir) is lda by n
                             ldwrku = lda
                             ir = iu + ldwrku*n
                             ldwrkr = lda
                          else if( lwork>=wrkbl+( lda+n )*n ) then
                             ! work(iu) is lda by n and work(ir) is n by n
                             ldwrku = lda
                             ir = iu + ldwrku*n
                             ldwrkr = n
                          else
                             ! work(iu) is n by n and work(ir) is n by n
                             ldwrku = n
                             ir = iu + ldwrku*n
                             ldwrkr = n
                          end if
                          itau = ir + ldwrkr*n
                          iwork = itau + n
                          ! compute a=q*r, copying result to u
                          ! (workspace: need 2*n*n+2*n, prefer 2*n*n+n+n*nb)
                          call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu )
                          ! generate q in u
                          ! (workspace: need 2*n*n+n+m, prefer 2*n*n+n+m*nb)
                          call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ! copy r to work(iu), zeroing out below it
                          call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku )
                          call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku )
                                    
                          ie = itau
                          itauq = ie + n
                          itaup = itauq + n
                          iwork = itaup + n
                          ! bidiagonalize r in work(iu), copying result to
                          ! work(ir)
                          ! (workspace: need 2*n*n+4*n,
                                      ! prefer 2*n*n+3*n+2*n*nb)
                          call stdlib${ii}$_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq &
                                    ),work( itaup ), work( iwork ),lwork-iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'U', n, n, work( iu ), ldwrku,work( ir ), ldwrkr )
                                    
                          ! generate left bidiagonalizing vectors in work(iu)
                          ! (workspace: need 2*n*n+4*n, prefer 2*n*n+3*n+n*nb)
                          call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), &
                                    work( iwork ),lwork-iwork+1, ierr )
                          ! generate right bidiagonalizing vectors in work(ir)
                          ! (workspace: need 2*n*n+4*n-1,
                                      ! prefer 2*n*n+3*n+(n-1)*nb)
                          call stdlib${ii}$_sorgbr( 'P', n, n, n, work( ir ), ldwrkr,work( itaup ), &
                                    work( iwork ),lwork-iwork+1, ierr )
                          iwork = ie + n
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of r in work(iu) and computing
                          ! right singular vectors of r in work(ir)
                          ! (workspace: need 2*n*n+bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, &
                                    work( iu ),ldwrku, dum, 1_${ik}$, work( iwork ), info )
                          ! multiply q in u by left singular vectors of r in
                          ! work(iu), storing result in a
                          ! (workspace: need n*n)
                          call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, &
                                    zero, a, lda )
                          ! copy left singular vectors of a from a to u
                          call stdlib${ii}$_slacpy( 'F', m, n, a, lda, u, ldu )
                          ! copy right singular vectors of r from work(ir) to a
                          call stdlib${ii}$_slacpy( 'F', n, n, work( ir ), ldwrkr, a,lda )
                       else
                          ! insufficient workspace for a fast algorithm
                          itau = 1_${ik}$
                          iwork = itau + n
                          ! compute a=q*r, copying result to u
                          ! (workspace: need 2*n, prefer n+n*nb)
                          call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu )
                          ! generate q in u
                          ! (workspace: need n+m, prefer n+m*nb)
                          call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ie = itau
                          itauq = ie + n
                          itaup = itauq + n
                          iwork = itaup + n
                          ! zero out below r in a
                          if( n > 1_${ik}$ ) then
                             call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,a( 2_${ik}$, 1_${ik}$ ), lda )
                                       
                          end if
                          ! bidiagonalize r in a
                          ! (workspace: need 4*n, prefer 3*n+2*n*nb)
                          call stdlib${ii}$_sgebrd( n, n, a, lda, s, work( ie ),work( itauq ), work( &
                                    itaup ),work( iwork ), lwork-iwork+1, ierr )
                          ! multiply q in u by left bidiagonalizing vectors
                          ! in a
                          ! (workspace: need 3*n+m, prefer 3*n+m*nb)
                          call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, a, lda,work( itauq ), u, &
                                    ldu, work( iwork ),lwork-iwork+1, ierr )
                          ! generate right bidiagonalizing vectors in a
                          ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb)
                          call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), &
                                    lwork-iwork+1, ierr )
                          iwork = ie + n
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of a in u and computing right
                          ! singular vectors of a in a
                          ! (workspace: need bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), a,lda, u, ldu, dum, &
                                    1_${ik}$, work( iwork ),info )
                       end if
                    else if( wntvas ) then
                       ! path 9 (m much larger than n, jobu='a', jobvt='s'
                               ! or 'a')
                       ! m left singular vectors to be computed in u and
                       ! n right singular vectors to be computed in vt
                       if( lwork>=n*n+max( n+m, 4_${ik}$*n, bdspac ) ) then
                          ! sufficient workspace for a fast algorithm
                          iu = 1_${ik}$
                          if( lwork>=wrkbl+lda*n ) then
                             ! work(iu) is lda by n
                             ldwrku = lda
                          else
                             ! work(iu) is n by n
                             ldwrku = n
                          end if
                          itau = iu + ldwrku*n
                          iwork = itau + n
                          ! compute a=q*r, copying result to u
                          ! (workspace: need n*n+2*n, prefer n*n+n+n*nb)
                          call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu )
                          ! generate q in u
                          ! (workspace: need n*n+n+m, prefer n*n+n+m*nb)
                          call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ! copy r to work(iu), zeroing out below it
                          call stdlib${ii}$_slacpy( 'U', n, n, a, lda, work( iu ),ldwrku )
                          call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,work( iu+1 ), ldwrku )
                                    
                          ie = itau
                          itauq = ie + n
                          itaup = itauq + n
                          iwork = itaup + n
                          ! bidiagonalize r in work(iu), copying result to vt
                          ! (workspace: need n*n+4*n, prefer n*n+3*n+2*n*nb)
                          call stdlib${ii}$_sgebrd( n, n, work( iu ), ldwrku, s,work( ie ), work( itauq &
                                    ),work( itaup ), work( iwork ),lwork-iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'U', n, n, work( iu ), ldwrku, vt,ldvt )
                          ! generate left bidiagonalizing vectors in work(iu)
                          ! (workspace: need n*n+4*n, prefer n*n+3*n+n*nb)
                          call stdlib${ii}$_sorgbr( 'Q', n, n, n, work( iu ), ldwrku,work( itauq ), &
                                    work( iwork ),lwork-iwork+1, ierr )
                          ! generate right bidiagonalizing vectors in vt
                          ! (workspace: need n*n+4*n-1,
                                      ! prefer n*n+3*n+(n-1)*nb)
                          call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),&
                                     lwork-iwork+1, ierr )
                          iwork = ie + n
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of r in work(iu) and computing
                          ! right singular vectors of r in vt
                          ! (workspace: need n*n+bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', n, n, n, 0_${ik}$, s, work( ie ), vt,ldvt, work( iu ),&
                                     ldwrku, dum, 1_${ik}$,work( iwork ), info )
                          ! multiply q in u by left singular vectors of r in
                          ! work(iu), storing result in a
                          ! (workspace: need n*n)
                          call stdlib${ii}$_sgemm( 'N', 'N', m, n, n, one, u, ldu,work( iu ), ldwrku, &
                                    zero, a, lda )
                          ! copy left singular vectors of a from a to u
                          call stdlib${ii}$_slacpy( 'F', m, n, a, lda, u, ldu )
                       else
                          ! insufficient workspace for a fast algorithm
                          itau = 1_${ik}$
                          iwork = itau + n
                          ! compute a=q*r, copying result to u
                          ! (workspace: need 2*n, prefer n+n*nb)
                          call stdlib${ii}$_sgeqrf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu )
                          ! generate q in u
                          ! (workspace: need n+m, prefer n+m*nb)
                          call stdlib${ii}$_sorgqr( m, m, n, u, ldu, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ! copy r from a to vt, zeroing out below it
                          call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt )
                          if( n>1_${ik}$ )call stdlib${ii}$_slaset( 'L', n-1, n-1, zero, zero,vt( 2_${ik}$, 1_${ik}$ ), ldvt &
                                    )
                          ie = itau
                          itauq = ie + n
                          itaup = itauq + n
                          iwork = itaup + n
                          ! bidiagonalize r in vt
                          ! (workspace: need 4*n, prefer 3*n+2*n*nb)
                          call stdlib${ii}$_sgebrd( n, n, vt, ldvt, s, work( ie ),work( itauq ), work( &
                                    itaup ),work( iwork ), lwork-iwork+1, ierr )
                          ! multiply q in u by left bidiagonalizing vectors
                          ! in vt
                          ! (workspace: need 3*n+m, prefer 3*n+m*nb)
                          call stdlib${ii}$_sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,work( itauq ), u, &
                                    ldu, work( iwork ),lwork-iwork+1, ierr )
                          ! generate right bidiagonalizing vectors in vt
                          ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb)
                          call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ),&
                                     lwork-iwork+1, ierr )
                          iwork = ie + n
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of a in u and computing right
                          ! singular vectors of a in vt
                          ! (workspace: need bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', n, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, &
                                    dum, 1_${ik}$, work( iwork ),info )
                       end if
                    end if
                 end if
              else
                 ! m < mnthr
                 ! path 10 (m at least n, but not much larger)
                 ! reduce to bidiagonal form without qr decomposition
                 ie = 1_${ik}$
                 itauq = ie + n
                 itaup = itauq + n
                 iwork = itaup + n
                 ! bidiagonalize a
                 ! (workspace: need 3*n+m, prefer 3*n+(m+n)*nb)
                 call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), &
                           work( iwork ), lwork-iwork+1,ierr )
                 if( wntuas ) then
                    ! if left singular vectors desired in u, copy result to u
                    ! and generate left bidiagonalizing vectors in u
                    ! (workspace: need 3*n+ncu, prefer 3*n+ncu*nb)
                    call stdlib${ii}$_slacpy( 'L', m, n, a, lda, u, ldu )
                    if( wntus )ncu = n
                    if( wntua )ncu = m
                    call stdlib${ii}$_sorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),work( iwork ), &
                              lwork-iwork+1, ierr )
                 end if
                 if( wntvas ) then
                    ! if right singular vectors desired in vt, copy result to
                    ! vt and generate right bidiagonalizing vectors in vt
                    ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb)
                    call stdlib${ii}$_slacpy( 'U', n, n, a, lda, vt, ldvt )
                    call stdlib${ii}$_sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),work( iwork ), &
                              lwork-iwork+1, ierr )
                 end if
                 if( wntuo ) then
                    ! if left singular vectors desired in a, generate left
                    ! bidiagonalizing vectors in a
                    ! (workspace: need 4*n, prefer 3*n+n*nb)
                    call stdlib${ii}$_sorgbr( 'Q', m, n, n, a, lda, work( itauq ),work( iwork ), lwork-&
                              iwork+1, ierr )
                 end if
                 if( wntvo ) then
                    ! if right singular vectors desired in a, generate right
                    ! bidiagonalizing vectors in a
                    ! (workspace: need 4*n-1, prefer 3*n+(n-1)*nb)
                    call stdlib${ii}$_sorgbr( 'P', n, n, n, a, lda, work( itaup ),work( iwork ), lwork-&
                              iwork+1, ierr )
                 end if
                 iwork = ie + n
                 if( wntuas .or. wntuo )nru = m
                 if( wntun )nru = 0_${ik}$
                 if( wntvas .or. wntvo )ncvt = n
                 if( wntvn )ncvt = 0_${ik}$
                 if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then
                    ! perform bidiagonal qr iteration, if desired, computing
                    ! left singular vectors in u and computing right singular
                    ! vectors in vt
                    ! (workspace: need bdspac)
                    call stdlib${ii}$_sbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, dum,&
                               1_${ik}$, work( iwork ), info )
                 else if( ( .not.wntuo ) .and. wntvo ) then
                    ! perform bidiagonal qr iteration, if desired, computing
                    ! left singular vectors in u and computing right singular
                    ! vectors in a
                    ! (workspace: need bdspac)
                    call stdlib${ii}$_sbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, &
                              1_${ik}$, work( iwork ), info )
                 else
                    ! perform bidiagonal qr iteration, if desired, computing
                    ! left singular vectors in a and computing right singular
                    ! vectors in vt
                    ! (workspace: need bdspac)
                    call stdlib${ii}$_sbdsqr( 'U', n, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, dum,&
                               1_${ik}$, work( iwork ), info )
                 end if
              end if
           else
              ! a has more columns than rows. if a has sufficiently more
              ! columns than rows, first reduce using the lq decomposition (if
              ! sufficient workspace available)
              if( n>=mnthr ) then
                 if( wntvn ) then
                    ! path 1t(n much larger than m, jobvt='n')
                    ! no right singular vectors to be computed
                    itau = 1_${ik}$
                    iwork = itau + m
                    ! compute a=l*q
                    ! (workspace: need 2*m, prefer m+m*nb)
                    call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ), work( iwork ),lwork-iwork+1, &
                              ierr )
                    ! zero out above l
                    if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ), lda )
                    ie = 1_${ik}$
                    itauq = ie + m
                    itaup = itauq + m
                    iwork = itaup + m
                    ! bidiagonalize l in a
                    ! (workspace: need 4*m, prefer 3*m+2*m*nb)
                    call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),work( itaup ), &
                              work( iwork ), lwork-iwork+1,ierr )
                    if( wntuo .or. wntuas ) then
                       ! if left singular vectors desired, generate q
                       ! (workspace: need 4*m, prefer 3*m+m*nb)
                       call stdlib${ii}$_sorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), &
                                 lwork-iwork+1, ierr )
                    end if
                    iwork = ie + m
                    nru = 0_${ik}$
                    if( wntuo .or. wntuas )nru = m
                    ! perform bidiagonal qr iteration, computing left singular
                    ! vectors of a in a if desired
                    ! (workspace: need bdspac)
                    call stdlib${ii}$_sbdsqr( 'U', m, 0_${ik}$, nru, 0_${ik}$, s, work( ie ), dum, 1_${ik}$, a,lda, dum, 1_${ik}$, &
                              work( iwork ), info )
                    ! if left singular vectors desired in u, copy them there
                    if( wntuas )call stdlib${ii}$_slacpy( 'F', m, m, a, lda, u, ldu )
                 else if( wntvo .and. wntun ) then
                    ! path 2t(n much larger than m, jobu='n', jobvt='o')
                    ! m right singular vectors to be overwritten on a and
                    ! no left singular vectors to be computed
                    if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then
                       ! sufficient workspace for a fast algorithm
                       ir = 1_${ik}$
                       if( lwork>=max( wrkbl, lda*n+m )+lda*m ) then
                          ! work(iu) is lda by n and work(ir) is lda by m
                          ldwrku = lda
                          chunk = n
                          ldwrkr = lda
                       else if( lwork>=max( wrkbl, lda*n+m )+m*m ) then
                          ! work(iu) is lda by n and work(ir) is m by m
                          ldwrku = lda
                          chunk = n
                          ldwrkr = m
                       else
                          ! work(iu) is m by chunk and work(ir) is m by m
                          ldwrku = m
                          chunk = ( lwork-m*m-m ) / m
                          ldwrkr = m
                       end if
                       itau = ir + ldwrkr*m
                       iwork = itau + m
                       ! compute a=l*q
                       ! (workspace: need m*m+2*m, prefer m*m+m+m*nb)
                       call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+&
                                 1_${ik}$, ierr )
                       ! copy l to work(ir) and zero out above it
                       call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
                       call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr )
                                 
                       ! generate q in a
                       ! (workspace: need m*m+2*m, prefer m*m+m+m*nb)
                       call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-&
                                 iwork+1, ierr )
                       ie = itau
                       itauq = ie + m
                       itaup = itauq + m
                       iwork = itaup + m
                       ! bidiagonalize l in work(ir)
                       ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb)
                       call stdlib${ii}$_sgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),work( itauq ), &
                                 work( itaup ),work( iwork ), lwork-iwork+1, ierr )
                       ! generate right vectors bidiagonalizing l
                       ! (workspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb)
                       call stdlib${ii}$_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( &
                                 iwork ),lwork-iwork+1, ierr )
                       iwork = ie + m
                       ! perform bidiagonal qr iteration, computing right
                       ! singular vectors of l in work(ir)
                       ! (workspace: need m*m+bdspac)
                       call stdlib${ii}$_sbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, dum,&
                                  1_${ik}$, dum, 1_${ik}$,work( iwork ), info )
                       iu = ie + m
                       ! multiply right singular vectors of l in work(ir) by q
                       ! in a, storing result in work(iu) and copying to a
                       ! (workspace: need m*m+2*m, prefer m*m+m*n+m)
                       do i = 1, n, chunk
                          blk = min( n-i+1, chunk )
                          call stdlib${ii}$_sgemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1_${ik}$, i &
                                    ), lda, zero,work( iu ), ldwrku )
                          call stdlib${ii}$_slacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda )
                                    
                       end do
                    else
                       ! insufficient workspace for a fast algorithm
                       ie = 1_${ik}$
                       itauq = ie + m
                       itaup = itauq + m
                       iwork = itaup + m
                       ! bidiagonalize a
                       ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb)
                       call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ),work( itauq ), work( itaup &
                                 ),work( iwork ), lwork-iwork+1, ierr )
                       ! generate right vectors bidiagonalizing a
                       ! (workspace: need 4*m, prefer 3*m+m*nb)
                       call stdlib${ii}$_sorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), &
                                 lwork-iwork+1, ierr )
                       iwork = ie + m
                       ! perform bidiagonal qr iteration, computing right
                       ! singular vectors of a in a
                       ! (workspace: need bdspac)
                       call stdlib${ii}$_sbdsqr( 'L', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), a, lda,dum, 1_${ik}$, dum, 1_${ik}$, &
                                 work( iwork ), info )
                    end if
                 else if( wntvo .and. wntuas ) then
                    ! path 3t(n much larger than m, jobu='s' or 'a', jobvt='o')
                    ! m right singular vectors to be overwritten on a and
                    ! m left singular vectors to be computed in u
                    if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then
                       ! sufficient workspace for a fast algorithm
                       ir = 1_${ik}$
                       if( lwork>=max( wrkbl, lda*n+m )+lda*m ) then
                          ! work(iu) is lda by n and work(ir) is lda by m
                          ldwrku = lda
                          chunk = n
                          ldwrkr = lda
                       else if( lwork>=max( wrkbl, lda*n+m )+m*m ) then
                          ! work(iu) is lda by n and work(ir) is m by m
                          ldwrku = lda
                          chunk = n
                          ldwrkr = m
                       else
                          ! work(iu) is m by chunk and work(ir) is m by m
                          ldwrku = m
                          chunk = ( lwork-m*m-m ) / m
                          ldwrkr = m
                       end if
                       itau = ir + ldwrkr*m
                       iwork = itau + m
                       ! compute a=l*q
                       ! (workspace: need m*m+2*m, prefer m*m+m+m*nb)
                       call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+&
                                 1_${ik}$, ierr )
                       ! copy l to u, zeroing about above it
                       call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu )
                       if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu )
                       ! generate q in a
                       ! (workspace: need m*m+2*m, prefer m*m+m+m*nb)
                       call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-&
                                 iwork+1, ierr )
                       ie = itau
                       itauq = ie + m
                       itaup = itauq + m
                       iwork = itaup + m
                       ! bidiagonalize l in u, copying result to work(ir)
                       ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb)
                       call stdlib${ii}$_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup &
                                 ),work( iwork ), lwork-iwork+1, ierr )
                       call stdlib${ii}$_slacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
                       ! generate right vectors bidiagonalizing l in work(ir)
                       ! (workspace: need m*m+4*m-1, prefer m*m+3*m+(m-1)*nb)
                       call stdlib${ii}$_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), work( &
                                 iwork ),lwork-iwork+1, ierr )
                       ! generate left vectors bidiagonalizing l in u
                       ! (workspace: need m*m+4*m, prefer m*m+3*m+m*nb)
                       call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), &
                                 lwork-iwork+1, ierr )
                       iwork = ie + m
                       ! perform bidiagonal qr iteration, computing left
                       ! singular vectors of l in u, and computing right
                       ! singular vectors of l in work(ir)
                       ! (workspace: need m*m+bdspac)
                       call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, u, &
                                 ldu, dum, 1_${ik}$,work( iwork ), info )
                       iu = ie + m
                       ! multiply right singular vectors of l in work(ir) by q
                       ! in a, storing result in work(iu) and copying to a
                       ! (workspace: need m*m+2*m, prefer m*m+m*n+m))
                       do i = 1, n, chunk
                          blk = min( n-i+1, chunk )
                          call stdlib${ii}$_sgemm( 'N', 'N', m, blk, m, one, work( ir ),ldwrkr, a( 1_${ik}$, i &
                                    ), lda, zero,work( iu ), ldwrku )
                          call stdlib${ii}$_slacpy( 'F', m, blk, work( iu ), ldwrku,a( 1_${ik}$, i ), lda )
                                    
                       end do
                    else
                       ! insufficient workspace for a fast algorithm
                       itau = 1_${ik}$
                       iwork = itau + m
                       ! compute a=l*q
                       ! (workspace: need 2*m, prefer m+m*nb)
                       call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-iwork+&
                                 1_${ik}$, ierr )
                       ! copy l to u, zeroing out above it
                       call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu )
                       if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu )
                       ! generate q in a
                       ! (workspace: need 2*m, prefer m+m*nb)
                       call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-&
                                 iwork+1, ierr )
                       ie = itau
                       itauq = ie + m
                       itaup = itauq + m
                       iwork = itaup + m
                       ! bidiagonalize l in u
                       ! (workspace: need 4*m, prefer 3*m+2*m*nb)
                       call stdlib${ii}$_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( itaup &
                                 ),work( iwork ), lwork-iwork+1, ierr )
                       ! multiply right vectors bidiagonalizing l by q in a
                       ! (workspace: need 3*m+n, prefer 3*m+n*nb)
                       call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), a, lda, &
                                 work( iwork ),lwork-iwork+1, ierr )
                       ! generate left vectors bidiagonalizing l in u
                       ! (workspace: need 4*m, prefer 3*m+m*nb)
                       call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), &
                                 lwork-iwork+1, ierr )
                       iwork = ie + m
                       ! perform bidiagonal qr iteration, computing left
                       ! singular vectors of a in u and computing right
                       ! singular vectors of a in a
                       ! (workspace: need bdspac)
                       call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, 1_${ik}$, &
                                 work( iwork ), info )
                    end if
                 else if( wntvs ) then
                    if( wntun ) then
                       ! path 4t(n much larger than m, jobu='n', jobvt='s')
                       ! m right singular vectors to be computed in vt and
                       ! no left singular vectors to be computed
                       if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then
                          ! sufficient workspace for a fast algorithm
                          ir = 1_${ik}$
                          if( lwork>=wrkbl+lda*m ) then
                             ! work(ir) is lda by m
                             ldwrkr = lda
                          else
                             ! work(ir) is m by m
                             ldwrkr = m
                          end if
                          itau = ir + ldwrkr*m
                          iwork = itau + m
                          ! compute a=l*q
                          ! (workspace: need m*m+2*m, prefer m*m+m+m*nb)
                          call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ! copy l to work(ir), zeroing out above it
                          call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( ir ),ldwrkr )
                          call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr &
                                    )
                          ! generate q in a
                          ! (workspace: need m*m+2*m, prefer m*m+m+m*nb)
                          call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ie = itau
                          itauq = ie + m
                          itaup = itauq + m
                          iwork = itaup + m
                          ! bidiagonalize l in work(ir)
                          ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb)
                          call stdlib${ii}$_sgebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq &
                                    ),work( itaup ), work( iwork ),lwork-iwork+1, ierr )
                          ! generate right vectors bidiagonalizing l in
                          ! work(ir)
                          ! (workspace: need m*m+4*m, prefer m*m+3*m+(m-1)*nb)
                          call stdlib${ii}$_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), &
                                    work( iwork ),lwork-iwork+1, ierr )
                          iwork = ie + m
                          ! perform bidiagonal qr iteration, computing right
                          ! singular vectors of l in work(ir)
                          ! (workspace: need m*m+bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, &
                                    dum, 1_${ik}$, dum, 1_${ik}$,work( iwork ), info )
                          ! multiply right singular vectors of l in work(ir) by
                          ! q in a, storing result in vt
                          ! (workspace: need m*m)
                          call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, a, lda, &
                                    zero, vt, ldvt )
                       else
                          ! insufficient workspace for a fast algorithm
                          itau = 1_${ik}$
                          iwork = itau + m
                          ! compute a=l*q
                          ! (workspace: need 2*m, prefer m+m*nb)
                          call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ! copy result to vt
                          call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt )
                          ! generate q in vt
                          ! (workspace: need 2*m, prefer m+m*nb)
                          call stdlib${ii}$_sorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), &
                                    lwork-iwork+1, ierr )
                          ie = itau
                          itauq = ie + m
                          itaup = itauq + m
                          iwork = itaup + m
                          ! zero out above l in a
                          if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda )
                          ! bidiagonalize l in a
                          ! (workspace: need 4*m, prefer 3*m+2*m*nb)
                          call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( &
                                    itaup ),work( iwork ), lwork-iwork+1, ierr )
                          ! multiply right vectors bidiagonalizing l by q in vt
                          ! (workspace: need 3*m+n, prefer 3*m+n*nb)
                          call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, &
                                    ldvt,work( iwork ), lwork-iwork+1, ierr )
                          iwork = ie + m
                          ! perform bidiagonal qr iteration, computing right
                          ! singular vectors of a in vt
                          ! (workspace: need bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), vt,ldvt, dum, 1_${ik}$, &
                                    dum, 1_${ik}$, work( iwork ),info )
                       end if
                    else if( wntuo ) then
                       ! path 5t(n much larger than m, jobu='o', jobvt='s')
                       ! m right singular vectors to be computed in vt and
                       ! m left singular vectors to be overwritten on a
                       if( lwork>=2_${ik}$*m*m+max( 4_${ik}$*m, bdspac ) ) then
                          ! sufficient workspace for a fast algorithm
                          iu = 1_${ik}$
                          if( lwork>=wrkbl+2*lda*m ) then
                             ! work(iu) is lda by m and work(ir) is lda by m
                             ldwrku = lda
                             ir = iu + ldwrku*m
                             ldwrkr = lda
                          else if( lwork>=wrkbl+( lda+m )*m ) then
                             ! work(iu) is lda by m and work(ir) is m by m
                             ldwrku = lda
                             ir = iu + ldwrku*m
                             ldwrkr = m
                          else
                             ! work(iu) is m by m and work(ir) is m by m
                             ldwrku = m
                             ir = iu + ldwrku*m
                             ldwrkr = m
                          end if
                          itau = ir + ldwrkr*m
                          iwork = itau + m
                          ! compute a=l*q
                          ! (workspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb)
                          call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ! copy l to work(iu), zeroing out below it
                          call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku )
                          call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku &
                                    )
                          ! generate q in a
                          ! (workspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb)
                          call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ie = itau
                          itauq = ie + m
                          itaup = itauq + m
                          iwork = itaup + m
                          ! bidiagonalize l in work(iu), copying result to
                          ! work(ir)
                          ! (workspace: need 2*m*m+4*m,
                                      ! prefer 2*m*m+3*m+2*m*nb)
                          call stdlib${ii}$_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq &
                                    ),work( itaup ), work( iwork ),lwork-iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr )
                                    
                          ! generate right bidiagonalizing vectors in work(iu)
                          ! (workspace: need 2*m*m+4*m-1,
                                      ! prefer 2*m*m+3*m+(m-1)*nb)
                          call stdlib${ii}$_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), &
                                    work( iwork ),lwork-iwork+1, ierr )
                          ! generate left bidiagonalizing vectors in work(ir)
                          ! (workspace: need 2*m*m+4*m, prefer 2*m*m+3*m+m*nb)
                          call stdlib${ii}$_sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), &
                                    work( iwork ),lwork-iwork+1, ierr )
                          iwork = ie + m
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of l in work(ir) and computing
                          ! right singular vectors of l in work(iu)
                          ! (workspace: need 2*m*m+bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, &
                                    work( ir ),ldwrkr, dum, 1_${ik}$, work( iwork ), info )
                          ! multiply right singular vectors of l in work(iu) by
                          ! q in a, storing result in vt
                          ! (workspace: need m*m)
                          call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, &
                                    zero, vt, ldvt )
                          ! copy left singular vectors of l to a
                          ! (workspace: need m*m)
                          call stdlib${ii}$_slacpy( 'F', m, m, work( ir ), ldwrkr, a,lda )
                       else
                          ! insufficient workspace for a fast algorithm
                          itau = 1_${ik}$
                          iwork = itau + m
                          ! compute a=l*q, copying result to vt
                          ! (workspace: need 2*m, prefer m+m*nb)
                          call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt )
                          ! generate q in vt
                          ! (workspace: need 2*m, prefer m+m*nb)
                          call stdlib${ii}$_sorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), &
                                    lwork-iwork+1, ierr )
                          ie = itau
                          itauq = ie + m
                          itaup = itauq + m
                          iwork = itaup + m
                          ! zero out above l in a
                          if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda )
                          ! bidiagonalize l in a
                          ! (workspace: need 4*m, prefer 3*m+2*m*nb)
                          call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( &
                                    itaup ),work( iwork ), lwork-iwork+1, ierr )
                          ! multiply right vectors bidiagonalizing l by q in vt
                          ! (workspace: need 3*m+n, prefer 3*m+n*nb)
                          call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, &
                                    ldvt,work( iwork ), lwork-iwork+1, ierr )
                          ! generate left bidiagonalizing vectors of l in a
                          ! (workspace: need 4*m, prefer 3*m+m*nb)
                          call stdlib${ii}$_sorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), &
                                    lwork-iwork+1, ierr )
                          iwork = ie + m
                          ! perform bidiagonal qr iteration, compute left
                          ! singular vectors of a in a and compute right
                          ! singular vectors of a in vt
                          ! (workspace: need bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, &
                                    dum, 1_${ik}$, work( iwork ),info )
                       end if
                    else if( wntuas ) then
                       ! path 6t(n much larger than m, jobu='s' or 'a',
                               ! jobvt='s')
                       ! m right singular vectors to be computed in vt and
                       ! m left singular vectors to be computed in u
                       if( lwork>=m*m+max( 4_${ik}$*m, bdspac ) ) then
                          ! sufficient workspace for a fast algorithm
                          iu = 1_${ik}$
                          if( lwork>=wrkbl+lda*m ) then
                             ! work(iu) is lda by n
                             ldwrku = lda
                          else
                             ! work(iu) is lda by m
                             ldwrku = m
                          end if
                          itau = iu + ldwrku*m
                          iwork = itau + m
                          ! compute a=l*q
                          ! (workspace: need m*m+2*m, prefer m*m+m+m*nb)
                          call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ! copy l to work(iu), zeroing out above it
                          call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku )
                          call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku &
                                    )
                          ! generate q in a
                          ! (workspace: need m*m+2*m, prefer m*m+m+m*nb)
                          call stdlib${ii}$_sorglq( m, n, m, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          ie = itau
                          itauq = ie + m
                          itaup = itauq + m
                          iwork = itaup + m
                          ! bidiagonalize l in work(iu), copying result to u
                          ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb)
                          call stdlib${ii}$_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq &
                                    ),work( itaup ), work( iwork ),lwork-iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'L', m, m, work( iu ), ldwrku, u,ldu )
                          ! generate right bidiagonalizing vectors in work(iu)
                          ! (workspace: need m*m+4*m-1,
                                      ! prefer m*m+3*m+(m-1)*nb)
                          call stdlib${ii}$_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), &
                                    work( iwork ),lwork-iwork+1, ierr )
                          ! generate left bidiagonalizing vectors in u
                          ! (workspace: need m*m+4*m, prefer m*m+3*m+m*nb)
                          call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), &
                                    lwork-iwork+1, ierr )
                          iwork = ie + m
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of l in u and computing right
                          ! singular vectors of l in work(iu)
                          ! (workspace: need m*m+bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, &
                                    u, ldu, dum, 1_${ik}$,work( iwork ), info )
                          ! multiply right singular vectors of l in work(iu) by
                          ! q in a, storing result in vt
                          ! (workspace: need m*m)
                          call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, a, lda, &
                                    zero, vt, ldvt )
                       else
                          ! insufficient workspace for a fast algorithm
                          itau = 1_${ik}$
                          iwork = itau + m
                          ! compute a=l*q, copying result to vt
                          ! (workspace: need 2*m, prefer m+m*nb)
                          call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt )
                          ! generate q in vt
                          ! (workspace: need 2*m, prefer m+m*nb)
                          call stdlib${ii}$_sorglq( m, n, m, vt, ldvt, work( itau ),work( iwork ), &
                                    lwork-iwork+1, ierr )
                          ! copy l to u, zeroing out above it
                          call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu )
                          if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu )
                          ie = itau
                          itauq = ie + m
                          itaup = itauq + m
                          iwork = itaup + m
                          ! bidiagonalize l in u
                          ! (workspace: need 4*m, prefer 3*m+2*m*nb)
                          call stdlib${ii}$_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( &
                                    itaup ),work( iwork ), lwork-iwork+1, ierr )
                          ! multiply right bidiagonalizing vectors in u by q
                          ! in vt
                          ! (workspace: need 3*m+n, prefer 3*m+n*nb)
                          call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, &
                                    ldvt,work( iwork ), lwork-iwork+1, ierr )
                          ! generate left bidiagonalizing vectors in u
                          ! (workspace: need 4*m, prefer 3*m+m*nb)
                          call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), &
                                    lwork-iwork+1, ierr )
                          iwork = ie + m
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of a in u and computing right
                          ! singular vectors of a in vt
                          ! (workspace: need bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, &
                                    dum, 1_${ik}$, work( iwork ),info )
                       end if
                    end if
                 else if( wntva ) then
                    if( wntun ) then
                       ! path 7t(n much larger than m, jobu='n', jobvt='a')
                       ! n right singular vectors to be computed in vt and
                       ! no left singular vectors to be computed
                       if( lwork>=m*m+max( n+m, 4_${ik}$*m, bdspac ) ) then
                          ! sufficient workspace for a fast algorithm
                          ir = 1_${ik}$
                          if( lwork>=wrkbl+lda*m ) then
                             ! work(ir) is lda by m
                             ldwrkr = lda
                          else
                             ! work(ir) is m by m
                             ldwrkr = m
                          end if
                          itau = ir + ldwrkr*m
                          iwork = itau + m
                          ! compute a=l*q, copying result to vt
                          ! (workspace: need m*m+2*m, prefer m*m+m+m*nb)
                          call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt )
                          ! copy l to work(ir), zeroing out above it
                          call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( ir ),ldwrkr )
                          call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( ir+ldwrkr ), ldwrkr &
                                    )
                          ! generate q in vt
                          ! (workspace: need m*m+m+n, prefer m*m+m+n*nb)
                          call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), &
                                    lwork-iwork+1, ierr )
                          ie = itau
                          itauq = ie + m
                          itaup = itauq + m
                          iwork = itaup + m
                          ! bidiagonalize l in work(ir)
                          ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb)
                          call stdlib${ii}$_sgebrd( m, m, work( ir ), ldwrkr, s,work( ie ), work( itauq &
                                    ),work( itaup ), work( iwork ),lwork-iwork+1, ierr )
                          ! generate right bidiagonalizing vectors in work(ir)
                          ! (workspace: need m*m+4*m-1,
                                      ! prefer m*m+3*m+(m-1)*nb)
                          call stdlib${ii}$_sorgbr( 'P', m, m, m, work( ir ), ldwrkr,work( itaup ), &
                                    work( iwork ),lwork-iwork+1, ierr )
                          iwork = ie + m
                          ! perform bidiagonal qr iteration, computing right
                          ! singular vectors of l in work(ir)
                          ! (workspace: need m*m+bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', m, m, 0_${ik}$, 0_${ik}$, s, work( ie ),work( ir ), ldwrkr, &
                                    dum, 1_${ik}$, dum, 1_${ik}$,work( iwork ), info )
                          ! multiply right singular vectors of l in work(ir) by
                          ! q in vt, storing result in a
                          ! (workspace: need m*m)
                          call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( ir ),ldwrkr, vt, ldvt, &
                                    zero, a, lda )
                          ! copy right singular vectors of a from a to vt
                          call stdlib${ii}$_slacpy( 'F', m, n, a, lda, vt, ldvt )
                       else
                          ! insufficient workspace for a fast algorithm
                          itau = 1_${ik}$
                          iwork = itau + m
                          ! compute a=l*q, copying result to vt
                          ! (workspace: need 2*m, prefer m+m*nb)
                          call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt )
                          ! generate q in vt
                          ! (workspace: need m+n, prefer m+n*nb)
                          call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), &
                                    lwork-iwork+1, ierr )
                          ie = itau
                          itauq = ie + m
                          itaup = itauq + m
                          iwork = itaup + m
                          ! zero out above l in a
                          if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda )
                          ! bidiagonalize l in a
                          ! (workspace: need 4*m, prefer 3*m+2*m*nb)
                          call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( &
                                    itaup ),work( iwork ), lwork-iwork+1, ierr )
                          ! multiply right bidiagonalizing vectors in a by q
                          ! in vt
                          ! (workspace: need 3*m+n, prefer 3*m+n*nb)
                          call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, &
                                    ldvt,work( iwork ), lwork-iwork+1, ierr )
                          iwork = ie + m
                          ! perform bidiagonal qr iteration, computing right
                          ! singular vectors of a in vt
                          ! (workspace: need bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', m, n, 0_${ik}$, 0_${ik}$, s, work( ie ), vt,ldvt, dum, 1_${ik}$, &
                                    dum, 1_${ik}$, work( iwork ),info )
                       end if
                    else if( wntuo ) then
                       ! path 8t(n much larger than m, jobu='o', jobvt='a')
                       ! n right singular vectors to be computed in vt and
                       ! m left singular vectors to be overwritten on a
                       if( lwork>=2_${ik}$*m*m+max( n+m, 4_${ik}$*m, bdspac ) ) then
                          ! sufficient workspace for a fast algorithm
                          iu = 1_${ik}$
                          if( lwork>=wrkbl+2*lda*m ) then
                             ! work(iu) is lda by m and work(ir) is lda by m
                             ldwrku = lda
                             ir = iu + ldwrku*m
                             ldwrkr = lda
                          else if( lwork>=wrkbl+( lda+m )*m ) then
                             ! work(iu) is lda by m and work(ir) is m by m
                             ldwrku = lda
                             ir = iu + ldwrku*m
                             ldwrkr = m
                          else
                             ! work(iu) is m by m and work(ir) is m by m
                             ldwrku = m
                             ir = iu + ldwrku*m
                             ldwrkr = m
                          end if
                          itau = ir + ldwrkr*m
                          iwork = itau + m
                          ! compute a=l*q, copying result to vt
                          ! (workspace: need 2*m*m+2*m, prefer 2*m*m+m+m*nb)
                          call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt )
                          ! generate q in vt
                          ! (workspace: need 2*m*m+m+n, prefer 2*m*m+m+n*nb)
                          call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), &
                                    lwork-iwork+1, ierr )
                          ! copy l to work(iu), zeroing out above it
                          call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku )
                          call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku &
                                    )
                          ie = itau
                          itauq = ie + m
                          itaup = itauq + m
                          iwork = itaup + m
                          ! bidiagonalize l in work(iu), copying result to
                          ! work(ir)
                          ! (workspace: need 2*m*m+4*m,
                                      ! prefer 2*m*m+3*m+2*m*nb)
                          call stdlib${ii}$_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq &
                                    ),work( itaup ), work( iwork ),lwork-iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'L', m, m, work( iu ), ldwrku,work( ir ), ldwrkr )
                                    
                          ! generate right bidiagonalizing vectors in work(iu)
                          ! (workspace: need 2*m*m+4*m-1,
                                      ! prefer 2*m*m+3*m+(m-1)*nb)
                          call stdlib${ii}$_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), &
                                    work( iwork ),lwork-iwork+1, ierr )
                          ! generate left bidiagonalizing vectors in work(ir)
                          ! (workspace: need 2*m*m+4*m, prefer 2*m*m+3*m+m*nb)
                          call stdlib${ii}$_sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,work( itauq ), &
                                    work( iwork ),lwork-iwork+1, ierr )
                          iwork = ie + m
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of l in work(ir) and computing
                          ! right singular vectors of l in work(iu)
                          ! (workspace: need 2*m*m+bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, &
                                    work( ir ),ldwrkr, dum, 1_${ik}$, work( iwork ), info )
                          ! multiply right singular vectors of l in work(iu) by
                          ! q in vt, storing result in a
                          ! (workspace: need m*m)
                          call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, &
                                    zero, a, lda )
                          ! copy right singular vectors of a from a to vt
                          call stdlib${ii}$_slacpy( 'F', m, n, a, lda, vt, ldvt )
                          ! copy left singular vectors of a from work(ir) to a
                          call stdlib${ii}$_slacpy( 'F', m, m, work( ir ), ldwrkr, a,lda )
                       else
                          ! insufficient workspace for a fast algorithm
                          itau = 1_${ik}$
                          iwork = itau + m
                          ! compute a=l*q, copying result to vt
                          ! (workspace: need 2*m, prefer m+m*nb)
                          call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt )
                          ! generate q in vt
                          ! (workspace: need m+n, prefer m+n*nb)
                          call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), &
                                    lwork-iwork+1, ierr )
                          ie = itau
                          itauq = ie + m
                          itaup = itauq + m
                          iwork = itaup + m
                          ! zero out above l in a
                          if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, a( 1_${ik}$, 2_${ik}$ ),lda )
                          ! bidiagonalize l in a
                          ! (workspace: need 4*m, prefer 3*m+2*m*nb)
                          call stdlib${ii}$_sgebrd( m, m, a, lda, s, work( ie ),work( itauq ), work( &
                                    itaup ),work( iwork ), lwork-iwork+1, ierr )
                          ! multiply right bidiagonalizing vectors in a by q
                          ! in vt
                          ! (workspace: need 3*m+n, prefer 3*m+n*nb)
                          call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, a, lda,work( itaup ), vt, &
                                    ldvt,work( iwork ), lwork-iwork+1, ierr )
                          ! generate left bidiagonalizing vectors in a
                          ! (workspace: need 4*m, prefer 3*m+m*nb)
                          call stdlib${ii}$_sorgbr( 'Q', m, m, m, a, lda, work( itauq ),work( iwork ), &
                                    lwork-iwork+1, ierr )
                          iwork = ie + m
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of a in a and computing right
                          ! singular vectors of a in vt
                          ! (workspace: need bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, &
                                    dum, 1_${ik}$, work( iwork ),info )
                       end if
                    else if( wntuas ) then
                       ! path 9t(n much larger than m, jobu='s' or 'a',
                               ! jobvt='a')
                       ! n right singular vectors to be computed in vt and
                       ! m left singular vectors to be computed in u
                       if( lwork>=m*m+max( n+m, 4_${ik}$*m, bdspac ) ) then
                          ! sufficient workspace for a fast algorithm
                          iu = 1_${ik}$
                          if( lwork>=wrkbl+lda*m ) then
                             ! work(iu) is lda by m
                             ldwrku = lda
                          else
                             ! work(iu) is m by m
                             ldwrku = m
                          end if
                          itau = iu + ldwrku*m
                          iwork = itau + m
                          ! compute a=l*q, copying result to vt
                          ! (workspace: need m*m+2*m, prefer m*m+m+m*nb)
                          call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt )
                          ! generate q in vt
                          ! (workspace: need m*m+m+n, prefer m*m+m+n*nb)
                          call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), &
                                    lwork-iwork+1, ierr )
                          ! copy l to work(iu), zeroing out above it
                          call stdlib${ii}$_slacpy( 'L', m, m, a, lda, work( iu ),ldwrku )
                          call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero,work( iu+ldwrku ), ldwrku &
                                    )
                          ie = itau
                          itauq = ie + m
                          itaup = itauq + m
                          iwork = itaup + m
                          ! bidiagonalize l in work(iu), copying result to u
                          ! (workspace: need m*m+4*m, prefer m*m+3*m+2*m*nb)
                          call stdlib${ii}$_sgebrd( m, m, work( iu ), ldwrku, s,work( ie ), work( itauq &
                                    ),work( itaup ), work( iwork ),lwork-iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'L', m, m, work( iu ), ldwrku, u,ldu )
                          ! generate right bidiagonalizing vectors in work(iu)
                          ! (workspace: need m*m+4*m, prefer m*m+3*m+(m-1)*nb)
                          call stdlib${ii}$_sorgbr( 'P', m, m, m, work( iu ), ldwrku,work( itaup ), &
                                    work( iwork ),lwork-iwork+1, ierr )
                          ! generate left bidiagonalizing vectors in u
                          ! (workspace: need m*m+4*m, prefer m*m+3*m+m*nb)
                          call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), &
                                    lwork-iwork+1, ierr )
                          iwork = ie + m
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of l in u and computing right
                          ! singular vectors of l in work(iu)
                          ! (workspace: need m*m+bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', m, m, m, 0_${ik}$, s, work( ie ),work( iu ), ldwrku, &
                                    u, ldu, dum, 1_${ik}$,work( iwork ), info )
                          ! multiply right singular vectors of l in work(iu) by
                          ! q in vt, storing result in a
                          ! (workspace: need m*m)
                          call stdlib${ii}$_sgemm( 'N', 'N', m, n, m, one, work( iu ),ldwrku, vt, ldvt, &
                                    zero, a, lda )
                          ! copy right singular vectors of a from a to vt
                          call stdlib${ii}$_slacpy( 'F', m, n, a, lda, vt, ldvt )
                       else
                          ! insufficient workspace for a fast algorithm
                          itau = 1_${ik}$
                          iwork = itau + m
                          ! compute a=l*q, copying result to vt
                          ! (workspace: need 2*m, prefer m+m*nb)
                          call stdlib${ii}$_sgelqf( m, n, a, lda, work( itau ),work( iwork ), lwork-&
                                    iwork+1, ierr )
                          call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt )
                          ! generate q in vt
                          ! (workspace: need m+n, prefer m+n*nb)
                          call stdlib${ii}$_sorglq( n, n, m, vt, ldvt, work( itau ),work( iwork ), &
                                    lwork-iwork+1, ierr )
                          ! copy l to u, zeroing out above it
                          call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu )
                          if (m>1_${ik}$) call stdlib${ii}$_slaset( 'U', m-1, m-1, zero, zero, u( 1_${ik}$, 2_${ik}$ ),ldu )
                          ie = itau
                          itauq = ie + m
                          itaup = itauq + m
                          iwork = itaup + m
                          ! bidiagonalize l in u
                          ! (workspace: need 4*m, prefer 3*m+2*m*nb)
                          call stdlib${ii}$_sgebrd( m, m, u, ldu, s, work( ie ),work( itauq ), work( &
                                    itaup ),work( iwork ), lwork-iwork+1, ierr )
                          ! multiply right bidiagonalizing vectors in u by q
                          ! in vt
                          ! (workspace: need 3*m+n, prefer 3*m+n*nb)
                          call stdlib${ii}$_sormbr( 'P', 'L', 'T', m, n, m, u, ldu,work( itaup ), vt, &
                                    ldvt,work( iwork ), lwork-iwork+1, ierr )
                          ! generate left bidiagonalizing vectors in u
                          ! (workspace: need 4*m, prefer 3*m+m*nb)
                          call stdlib${ii}$_sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),work( iwork ), &
                                    lwork-iwork+1, ierr )
                          iwork = ie + m
                          ! perform bidiagonal qr iteration, computing left
                          ! singular vectors of a in u and computing right
                          ! singular vectors of a in vt
                          ! (workspace: need bdspac)
                          call stdlib${ii}$_sbdsqr( 'U', m, n, m, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, &
                                    dum, 1_${ik}$, work( iwork ),info )
                       end if
                    end if
                 end if
              else
                 ! n < mnthr
                 ! path 10t(n greater than m, but not much larger)
                 ! reduce to bidiagonal form without lq decomposition
                 ie = 1_${ik}$
                 itauq = ie + m
                 itaup = itauq + m
                 iwork = itaup + m
                 ! bidiagonalize a
                 ! (workspace: need 3*m+n, prefer 3*m+(m+n)*nb)
                 call stdlib${ii}$_sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),work( itaup ), &
                           work( iwork ), lwork-iwork+1,ierr )
                 if( wntuas ) then
                    ! if left singular vectors desired in u, copy result to u
                    ! and generate left bidiagonalizing vectors in u
                    ! (workspace: need 4*m-1, prefer 3*m+(m-1)*nb)
                    call stdlib${ii}$_slacpy( 'L', m, m, a, lda, u, ldu )
                    call stdlib${ii}$_sorgbr( 'Q', m, m, n, u, ldu, work( itauq ),work( iwork ), lwork-&
                              iwork+1, ierr )
                 end if
                 if( wntvas ) then
                    ! if right singular vectors desired in vt, copy result to
                    ! vt and generate right bidiagonalizing vectors in vt
                    ! (workspace: need 3*m+nrvt, prefer 3*m+nrvt*nb)
                    call stdlib${ii}$_slacpy( 'U', m, n, a, lda, vt, ldvt )
                    if( wntva )nrvt = n
                    if( wntvs )nrvt = m
                    call stdlib${ii}$_sorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),work( iwork ), &
                              lwork-iwork+1, ierr )
                 end if
                 if( wntuo ) then
                    ! if left singular vectors desired in a, generate left
                    ! bidiagonalizing vectors in a
                    ! (workspace: need 4*m-1, prefer 3*m+(m-1)*nb)
                    call stdlib${ii}$_sorgbr( 'Q', m, m, n, a, lda, work( itauq ),work( iwork ), lwork-&
                              iwork+1, ierr )
                 end if
                 if( wntvo ) then
                    ! if right singular vectors desired in a, generate right
                    ! bidiagonalizing vectors in a
                    ! (workspace: need 4*m, prefer 3*m+m*nb)
                    call stdlib${ii}$_sorgbr( 'P', m, n, m, a, lda, work( itaup ),work( iwork ), lwork-&
                              iwork+1, ierr )
                 end if
                 iwork = ie + m
                 if( wntuas .or. wntuo )nru = m
                 if( wntun )nru = 0_${ik}$
                 if( wntvas .or. wntvo )ncvt = n
                 if( wntvn )ncvt = 0_${ik}$
                 if( ( .not.wntuo ) .and. ( .not.wntvo ) ) then
                    ! perform bidiagonal qr iteration, if desired, computing
                    ! left singular vectors in u and computing right singular
                    ! vectors in vt
                    ! (workspace: need bdspac)
                    call stdlib${ii}$_sbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, u, ldu, dum,&
                               1_${ik}$, work( iwork ), info )
                 else if( ( .not.wntuo ) .and. wntvo ) then
                    ! perform bidiagonal qr iteration, if desired, computing
                    ! left singular vectors in u and computing right singular
                    ! vectors in a
                    ! (workspace: need bdspac)
                    call stdlib${ii}$_sbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), a, lda,u, ldu, dum, &
                              1_${ik}$, work( iwork ), info )
                 else
                    ! perform bidiagonal qr iteration, if desired, computing
                    ! left singular vectors in a and computing right singular
                    ! vectors in vt
                    ! (workspace: need bdspac)
                    call stdlib${ii}$_sbdsqr( 'L', m, ncvt, nru, 0_${ik}$, s, work( ie ), vt,ldvt, a, lda, dum,&
                               1_${ik}$, work( iwork ), info )
                 end if
              end if
           end if
           ! if stdlib${ii}$_sbdsqr failed to converge, copy unconverged superdiagonals
           ! to work( 2:minmn )
           if( info/=0_${ik}$ ) then
              if( ie>2_${ik}$ ) then
                 do i = 1, minmn - 1
                    work( i+1 ) = work( i+ie-1 )
                 end do
              end if
              if( ie<2_${ik}$ ) then
                 do i = minmn - 1, 1, -1
                    work( i+1 ) = work( i+ie-1 )
                 end do
              end if
           end if
           ! undo scaling if necessary
           if( iscl==1_${ik}$ ) then
              if( anrm>bignum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn, 1_${ik}$, s, minmn,&
                        ierr )
              if( info/=0_${ik}$ .and. anrm>bignum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, bignum, anrm, minmn-1,&
                         1_${ik}$, work( 2_${ik}$ ),minmn, ierr )
              if( anrm<smlnum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn, 1_${ik}$, s, minmn,&
                        ierr )
              if( info/=0_${ik}$ .and. anrm<smlnum )call stdlib${ii}$_slascl( 'G', 0_${ik}$, 0_${ik}$, smlnum, anrm, minmn-1,&
                         1_${ik}$, work( 2_${ik}$ ),minmn, ierr )
           end if
           ! return optimal workspace in work(1)
           work( 1_${ik}$ ) = maxwrk
           return
     end subroutine stdlib${ii}$_sgesvd

     module subroutine stdlib${ii}$_dgesvd( jobu, jobvt, m, n, a, lda, s, u, ldu,vt, ldvt, work, lwork, info )
     !! DGESVD computes the singular value decomposition (SVD) of a real
     !! M-by-N matrix A, optionally computing the left and/or right singular
     !! vectors. The SVD is written
     !! A = U * SIGMA * transpose(V)
     !! where SIGMA is an M-by-N matrix which is zero except for its
     !! min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
     !! V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
     !! are the singular values of A; they are real and non-negative, and
     !! are returned in descending order.  The first min(m,n) columns of
     !! U and V are the left and right singular vectors of A.
     !! Note that the routine returns V**T, not V.
               
        ! -- lapack driver routine --
        ! -- lapack is a software package provided by univ. of tennessee,    --
        ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..--
           use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone
           ! Scalar Arguments 
           character, intent(in) :: jobu, jobvt
           integer(${ik}$), intent(out) :: info
           integer(${ik}$), intent(in) :: lda, ldu, ldvt, lwork, m, n
           ! Array Arguments 
           real(dp), intent(inout) :: a(lda,*)
           real(dp), intent(out) :: s(*), u(ldu,*), vt(ldvt,*), work(*)
        ! =====================================================================
           
           ! Local Scalars 
           logical(lk) :: lquery, wntua, wntuas, wntun, wntuo, wntus, wntva, wntvas, wntvn, wntvo,&
                      wntvs
           integer(${ik}$) :: bdspac, blk, chunk, i, ie, ierr, ir, iscl, itau, itaup, itauq, iu, &
           iwork, ldwrkr, ldwrku, maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru, nrvt, &
                     wrkbl
           integer(${ik}$) :: lwork_dgeqrf, lwork_dorgqr_n, lwork_dorgqr_m, lwork_dgebrd, &
                     lwork_dorgbr_p, lwork_dorgbr_q, lwork_dgelqf, lwork_dorglq_n, lwork_dorglq_m
           real(dp) :: anrm, bignum, eps, smlnum
           ! Local Arrays 
           real(dp) :: dum(1_${ik}$)
           ! Intrinsic Functions 
           ! Executable Statements 
           ! test the input arguments
           info = 0_${ik}$
           minmn = min( m, n )
           wntua = stdlib_lsame( jobu, 'A' )
           wntus = stdlib_lsame( jobu, 'S' )
           wntuas = wntua .or. wntus
           wntuo = stdlib_lsame( jobu, 'O' )
           wntun = stdlib_lsame( jobu, 'N' )
           wntva = stdlib_lsame( jobvt, 'A' )
           wntvs = stdlib_lsame( jobvt, 'S' )
           wntvas = wntva .or. wntvs
           wntvo = stdlib_lsame( jobvt, 'O' )
           wntvn = stdlib_lsame( jobvt, 'N' )
           lquery = ( lwork==-1_${ik}$ )
           if( .not.( wntua .or. wntus .or. wntuo .or. wntun ) ) then
              info = -1_${ik}$
           else if( .not.( wntva .or. wntvs .or. wntvo .or. wntvn ) .or.( wntvo .and. wntuo ) ) &
                     then
              info = -2_${ik}$
           else if( m<0_${ik}$ ) then
              info = -3_${ik}$
           else if( n<0_${ik}$ ) then
              info = -4_${ik}$
           else if( lda<max( 1_${ik}$, m ) ) then
              info = -6_${ik}$
           else if( ldu<1_${ik}$ .or. ( wntuas .and. ldu<m ) ) then
              info = -9_${ik}$
           else if( ldvt<1_${ik}$ .or. ( wntva .and. ldvt<n ) .or.( wntvs .and. ldvt<minmn ) ) &
                     then
              info = -11_${ik}$
           end if
           ! compute workspace
            ! (note: comments in the code beginning "workspace:" describe the
             ! minimal amount of workspace needed at that point in the code,
             ! as well as the preferred amount for good performance.
             ! nb refers to the optimal block size for the immediately
             ! following subroutine, as returned by stdlib${ii}$_ilaenv.)
           if( info==0_${ik}$ ) then
              minwrk = 1_${ik}$
              maxwrk = 1_${ik}$
              if( m>=n .and. minmn>0_${ik}$ ) then
                 ! compute space needed for stdlib${ii}$_dbdsqr
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ )
                 bdspac = 5_${ik}$*n
                 ! compute space needed for stdlib${ii}$_dgeqrf
                 call stdlib${ii}$_dgeqrf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr )
                 lwork_dgeqrf = int( dum(1_${ik}$),KIND=${ik}$)
                 ! compute space needed for stdlib${ii}$_dorgqr
                 call stdlib${ii}$_dorgqr( m, n, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr )
                 lwork_dorgqr_n = int( dum(1_${ik}$),KIND=${ik}$)
                 call stdlib${ii}$_dorgqr( m, m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr )
                 lwork_dorgqr_m = int( dum(1_${ik}$),KIND=${ik}$)
                 ! compute space needed for stdlib${ii}$_dgebrd
                 call stdlib${ii}$_dgebrd( n, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr )
                           
                 lwork_dgebrd = int( dum(1_${ik}$),KIND=${ik}$)
                 ! compute space needed for stdlib${ii}$_dorgbr p
                 call stdlib${ii}$_dorgbr( 'P', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr )
                 lwork_dorgbr_p = int( dum(1_${ik}$),KIND=${ik}$)
                 ! compute space needed for stdlib${ii}$_dorgbr q
                 call stdlib${ii}$_dorgbr( 'Q', n, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr )
                 lwork_dorgbr_q = int( dum(1_${ik}$),KIND=${ik}$)
                 if( m>=mnthr ) then
                    if( wntun ) then
                       ! path 1 (m much larger than n, jobu='n')
                       maxwrk = n + lwork_dgeqrf
                       maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dgebrd )
                       if( wntvo .or. wntvas )maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr_p )
                       maxwrk = max( maxwrk, bdspac )
                       minwrk = max( 4_${ik}$*n, bdspac )
                    else if( wntuo .and. wntvn ) then
                       ! path 2 (m much larger than n, jobu='o', jobvt='n')
                       wrkbl = n + lwork_dgeqrf
                       wrkbl = max( wrkbl, n + lwork_dorgqr_n )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = max( n*n + wrkbl, n*n + m*n + n )
                       minwrk = max( 3_${ik}$*n + m, bdspac )
                    else if( wntuo .and. wntvas ) then
                       ! path 3 (m much larger than n, jobu='o', jobvt='s' or
                       ! 'a')
                       wrkbl = n + lwork_dgeqrf
                       wrkbl = max( wrkbl, n + lwork_dorgqr_n )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = max( n*n + wrkbl, n*n + m*n + n )
                       minwrk = max( 3_${ik}$*n + m, bdspac )
                    else if( wntus .and. wntvn ) then
                       ! path 4 (m much larger than n, jobu='s', jobvt='n')
                       wrkbl = n + lwork_dgeqrf
                       wrkbl = max( wrkbl, n + lwork_dorgqr_n )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = n*n + wrkbl
                       minwrk = max( 3_${ik}$*n + m, bdspac )
                    else if( wntus .and. wntvo ) then
                       ! path 5 (m much larger than n, jobu='s', jobvt='o')
                       wrkbl = n + lwork_dgeqrf
                       wrkbl = max( wrkbl, n + lwork_dorgqr_n )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = 2_${ik}$*n*n + wrkbl
                       minwrk = max( 3_${ik}$*n + m, bdspac )
                    else if( wntus .and. wntvas ) then
                       ! path 6 (m much larger than n, jobu='s', jobvt='s' or
                       ! 'a')
                       wrkbl = n + lwork_dgeqrf
                       wrkbl = max( wrkbl, n + lwork_dorgqr_n )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = n*n + wrkbl
                       minwrk = max( 3_${ik}$*n + m, bdspac )
                    else if( wntua .and. wntvn ) then
                       ! path 7 (m much larger than n, jobu='a', jobvt='n')
                       wrkbl = n + lwork_dgeqrf
                       wrkbl = max( wrkbl, n + lwork_dorgqr_m )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = n*n + wrkbl
                       minwrk = max( 3_${ik}$*n + m, bdspac )
                    else if( wntua .and. wntvo ) then
                       ! path 8 (m much larger than n, jobu='a', jobvt='o')
                       wrkbl = n + lwork_dgeqrf
                       wrkbl = max( wrkbl, n + lwork_dorgqr_m )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = 2_${ik}$*n*n + wrkbl
                       minwrk = max( 3_${ik}$*n + m, bdspac )
                    else if( wntua .and. wntvas ) then
                       ! path 9 (m much larger than n, jobu='a', jobvt='s' or
                       ! 'a')
                       wrkbl = n + lwork_dgeqrf
                       wrkbl = max( wrkbl, n + lwork_dorgqr_m )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dgebrd )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_q )
                       wrkbl = max( wrkbl, 3_${ik}$*n + lwork_dorgbr_p )
                       wrkbl = max( wrkbl, bdspac )
                       maxwrk = n*n + wrkbl
                       minwrk = max( 3_${ik}$*n + m, bdspac )
                    end if
                 else
                    ! path 10 (m at least n, but not much larger)
                    call stdlib${ii}$_dgebrd( m, n, a, lda, s, dum(1_${ik}$), dum(1_${ik}$),dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr )
                              
                    lwork_dgebrd = int( dum(1_${ik}$),KIND=${ik}$)
                    maxwrk = 3_${ik}$*n + lwork_dgebrd
                    if( wntus .or. wntuo ) then
                       call stdlib${ii}$_dorgbr( 'Q', m, n, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr )
                       lwork_dorgbr_q = int( dum(1_${ik}$),KIND=${ik}$)
                       maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr_q )
                    end if
                    if( wntua ) then
                       call stdlib${ii}$_dorgbr( 'Q', m, m, n, a, lda, dum(1_${ik}$),dum(1_${ik}$), -1_${ik}$, ierr )
                       lwork_dorgbr_q = int( dum(1_${ik}$),KIND=${ik}$)
                       maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr_q )
                    end if
                    if( .not.wntvn ) then
                      maxwrk = max( maxwrk, 3_${ik}$*n + lwork_dorgbr_p )
                    end if
                    maxwrk = max( maxwrk, bdspac )
                    minwrk = max( 3_${ik}$*n + m, bdspac )
                 end if
              else if( minmn>0_${ik}$ ) then
                 ! compute space needed for stdlib${ii}$_dbdsqr
                 mnthr = stdlib${ii}$_ilaenv( 6_${ik}$, 'DGESVD', jobu // jobvt, m, n, 0_${ik}$, 0_${ik}$ )
                 bdspac = 5_${ik}$*m
                 ! compute space needed for stdlib${ii}$_dgelqf
                 call stdlib${ii}$_dgelqf( m, n, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr )
                 lwork_dgelqf = int( dum(1_${ik}$),KIND=${ik}$)
                 ! compute space needed for stdlib${ii}$_dorglq
                 call stdlib${ii}$_dorglq( n, n, m, dum(1_${ik}$), n, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr )
                 lwork_dorglq_n = int( dum(1_${ik}$),KIND=${ik}$)
                 call stdlib${ii}$_dorglq( m, n, m, a, lda, dum(1_${ik}$), dum(1_${ik}$), -1_${ik}$, ierr )
                 lwork_dorglq_m = int( dum(1_${ik}$),KIND=${ik}$)
                 ! compute space needed for stdlib${ii}$_dgebrd
                 call stdlib${ii}$_dgebrd( m, m, a, lda, s, dum(1