#:include "common.fypp" submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_cosine_sine implicit none contains #:for ik,it,ii in LINALG_INT_KINDS_TYPES pure module subroutine stdlib${ii}$_sbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, u1, & !! SBBCSD computes the CS decomposition of an orthogonal matrix in !! bidiagonal-block form, !! [ B11 | B12 0 0 ] !! [ 0 | 0 -I 0 ] !! X = [----------------] !! [ B21 | B22 0 0 ] !! [ 0 | 0 0 I ] !! [ C | -S 0 0 ] !! [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**T !! = [---------] [---------------] [---------] . !! [ | U2 ] [ S | C 0 0 ] [ | V2 ] !! [ 0 | 0 0 I ] !! X is M-by-M, its top-left block is P-by-Q, and Q must be no larger !! than P, M-P, or M-Q. (If Q is not the smallest index, then X must be !! transposed and/or permuted. This can be done in constant time using !! the TRANS and SIGNS options. See SORCSD for details.) !! The bidiagonal matrices B11, B12, B21, and B22 are represented !! implicitly by angles THETA(1:Q) and PHI(1:Q-1). !! The orthogonal matrices U1, U2, V1T, and V2T are input/output. !! The input matrices are pre- or post-multiplied by the appropriate !! singular vector matrices. ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d, b22e, work, & lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, one, ten, cnegone ! Scalar Arguments character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, lwork, m, p, q ! Array Arguments real(sp), intent(out) :: b11d(*), b11e(*), b12d(*), b12e(*), b21d(*), b21e(*), b22d(*),& b22e(*), work(*) real(sp), intent(inout) :: phi(*), theta(*) real(sp), intent(inout) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*) ! =================================================================== ! Parameters integer(${ik}$), parameter :: maxitr = 6_${ik}$ real(sp), parameter :: hundred = 100.0_sp real(sp), parameter :: meighth = -0.125_sp real(sp), parameter :: piover2 = 1.57079632679489661923132169163975144210_sp ! Local Scalars logical(lk) :: colmajor, lquery, restart11, restart12, restart21, restart22, wantu1, & wantu2, wantv1t, wantv2t integer(${ik}$) :: i, imin, imax, iter, iu1cs, iu1sn, iu2cs, iu2sn, iv1tcs, iv1tsn, & iv2tcs, iv2tsn, j, lworkmin, lworkopt, maxit, mini real(sp) :: b11bulge, b12bulge, b21bulge, b22bulge, dummy, eps, mu, nu, r, sigma11, & sigma21, temp, thetamax, thetamin, thresh, tol, tolmul, unfl, x1, x2, y1, y2 ! Intrinsic Functions ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lwork == -1_${ik}$ wantu1 = stdlib_lsame( jobu1, 'Y' ) wantu2 = stdlib_lsame( jobu2, 'Y' ) wantv1t = stdlib_lsame( jobv1t, 'Y' ) wantv2t = stdlib_lsame( jobv2t, 'Y' ) colmajor = .not. stdlib_lsame( trans, 'T' ) if( m < 0_${ik}$ ) then info = -6_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -7_${ik}$ else if( q < 0_${ik}$ .or. q > m ) then info = -8_${ik}$ else if( q > p .or. q > m-p .or. q > m-q ) then info = -8_${ik}$ else if( wantu1 .and. ldu1 < p ) then info = -12_${ik}$ else if( wantu2 .and. ldu2 < m-p ) then info = -14_${ik}$ else if( wantv1t .and. ldv1t < q ) then info = -16_${ik}$ else if( wantv2t .and. ldv2t < m-q ) then info = -18_${ik}$ end if ! quick return if q = 0 if( info == 0_${ik}$ .and. q == 0_${ik}$ ) then lworkmin = 1_${ik}$ work(1_${ik}$) = lworkmin return end if ! compute workspace if( info == 0_${ik}$ ) then iu1cs = 1_${ik}$ iu1sn = iu1cs + q iu2cs = iu1sn + q iu2sn = iu2cs + q iv1tcs = iu2sn + q iv1tsn = iv1tcs + q iv2tcs = iv1tsn + q iv2tsn = iv2tcs + q lworkopt = iv2tsn + q - 1_${ik}$ lworkmin = lworkopt work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not. lquery ) then info = -28_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SBBCSD', -info ) return else if( lquery ) then return end if ! get machine constants eps = stdlib${ii}$_slamch( 'EPSILON' ) unfl = stdlib${ii}$_slamch( 'SAFE MINIMUM' ) tolmul = max( ten, min( hundred, eps**meighth ) ) tol = tolmul*eps thresh = max( tol, maxitr*q*q*unfl ) ! test for negligible sines or cosines do i = 1, q if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = 1, q-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! initial deflation imax = q do while( imax > 1 ) if( phi(imax-1) /= zero ) then exit end if imax = imax - 1_${ik}$ end do imin = imax - 1_${ik}$ if ( imin > 1_${ik}$ ) then do while( phi(imin-1) /= zero ) imin = imin - 1_${ik}$ if ( imin <= 1 ) exit end do end if ! initialize iteration counter maxit = maxitr*q*q iter = 0_${ik}$ ! begin main iteration loop do while( imax > 1 ) ! compute the matrix entries b11d(imin) = cos( theta(imin) ) b21d(imin) = -sin( theta(imin) ) do i = imin, imax - 1 b11e(i) = -sin( theta(i) ) * sin( phi(i) ) b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) ) b12d(i) = sin( theta(i) ) * cos( phi(i) ) b12e(i) = cos( theta(i+1) ) * sin( phi(i) ) b21e(i) = -cos( theta(i) ) * sin( phi(i) ) b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) ) b22d(i) = cos( theta(i) ) * cos( phi(i) ) b22e(i) = -sin( theta(i+1) ) * sin( phi(i) ) end do b12d(imax) = sin( theta(imax) ) b22d(imax) = cos( theta(imax) ) ! abort if not converging; otherwise, increment iter if( iter > maxit ) then info = 0_${ik}$ do i = 1, q if( phi(i) /= zero )info = info + 1_${ik}$ end do return end if iter = iter + imax - imin ! compute shifts thetamax = theta(imin) thetamin = theta(imin) do i = imin+1, imax if( theta(i) > thetamax )thetamax = theta(i) if( theta(i) < thetamin )thetamin = theta(i) end do if( thetamax > piover2 - thresh ) then ! zero on diagonals of b11 and b22; induce deflation with a ! zero shift mu = zero nu = one else if( thetamin < thresh ) then ! zero on diagonals of b12 and b22; induce deflation with a ! zero shift mu = one nu = zero else ! compute shifts for b11 and b21 and use the lesser call stdlib${ii}$_slas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,dummy ) call stdlib${ii}$_slas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,dummy ) if( sigma11 <= sigma21 ) then mu = sigma11 nu = sqrt( one - mu**2_${ik}$ ) if( mu < thresh ) then mu = zero nu = one end if else nu = sigma21 mu = sqrt( one - nu**2_${ik}$ ) if( nu < thresh ) then mu = one nu = zero end if end if end if ! rotate to produce bulges in b11 and b21 if( mu <= nu ) then call stdlib${ii}$_slartgs( b11d(imin), b11e(imin), mu,work(iv1tcs+imin-1), work(iv1tsn+& imin-1) ) else call stdlib${ii}$_slartgs( b21d(imin), b21e(imin), nu,work(iv1tcs+imin-1), work(iv1tsn+& imin-1) ) end if temp = work(iv1tcs+imin-1)*b11d(imin) +work(iv1tsn+imin-1)*b11e(imin) b11e(imin) = work(iv1tcs+imin-1)*b11e(imin) -work(iv1tsn+imin-1)*b11d(imin) b11d(imin) = temp b11bulge = work(iv1tsn+imin-1)*b11d(imin+1) b11d(imin+1) = work(iv1tcs+imin-1)*b11d(imin+1) temp = work(iv1tcs+imin-1)*b21d(imin) +work(iv1tsn+imin-1)*b21e(imin) b21e(imin) = work(iv1tcs+imin-1)*b21e(imin) -work(iv1tsn+imin-1)*b21d(imin) b21d(imin) = temp b21bulge = work(iv1tsn+imin-1)*b21d(imin+1) b21d(imin+1) = work(iv1tcs+imin-1)*b21d(imin+1) ! compute theta(imin) theta( imin ) = atan2( sqrt( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ ),sqrt( b11d(imin)**2_${ik}$+& b11bulge**2_${ik}$ ) ) ! chase the bulges in b11(imin+1,imin) and b21(imin+1,imin) if( b11d(imin)**2_${ik}$+b11bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_slartgp( b11bulge, b11d(imin), work(iu1sn+imin-1),work(iu1cs+imin-1),& r ) else if( mu <= nu ) then call stdlib${ii}$_slartgs( b11e( imin ), b11d( imin + 1_${ik}$ ), mu,work(iu1cs+imin-1), work(& iu1sn+imin-1) ) else call stdlib${ii}$_slartgs( b12d( imin ), b12e( imin ), nu,work(iu1cs+imin-1), work(& iu1sn+imin-1) ) end if if( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_slartgp( b21bulge, b21d(imin), work(iu2sn+imin-1),work(iu2cs+imin-1),& r ) else if( nu < mu ) then call stdlib${ii}$_slartgs( b21e( imin ), b21d( imin + 1_${ik}$ ), nu,work(iu2cs+imin-1), work(& iu2sn+imin-1) ) else call stdlib${ii}$_slartgs( b22d(imin), b22e(imin), mu,work(iu2cs+imin-1), work(iu2sn+& imin-1) ) end if work(iu2cs+imin-1) = -work(iu2cs+imin-1) work(iu2sn+imin-1) = -work(iu2sn+imin-1) temp = work(iu1cs+imin-1)*b11e(imin) +work(iu1sn+imin-1)*b11d(imin+1) b11d(imin+1) = work(iu1cs+imin-1)*b11d(imin+1) -work(iu1sn+imin-1)*b11e(imin) b11e(imin) = temp if( imax > imin+1 ) then b11bulge = work(iu1sn+imin-1)*b11e(imin+1) b11e(imin+1) = work(iu1cs+imin-1)*b11e(imin+1) end if temp = work(iu1cs+imin-1)*b12d(imin) +work(iu1sn+imin-1)*b12e(imin) b12e(imin) = work(iu1cs+imin-1)*b12e(imin) -work(iu1sn+imin-1)*b12d(imin) b12d(imin) = temp b12bulge = work(iu1sn+imin-1)*b12d(imin+1) b12d(imin+1) = work(iu1cs+imin-1)*b12d(imin+1) temp = work(iu2cs+imin-1)*b21e(imin) +work(iu2sn+imin-1)*b21d(imin+1) b21d(imin+1) = work(iu2cs+imin-1)*b21d(imin+1) -work(iu2sn+imin-1)*b21e(imin) b21e(imin) = temp if( imax > imin+1 ) then b21bulge = work(iu2sn+imin-1)*b21e(imin+1) b21e(imin+1) = work(iu2cs+imin-1)*b21e(imin+1) end if temp = work(iu2cs+imin-1)*b22d(imin) +work(iu2sn+imin-1)*b22e(imin) b22e(imin) = work(iu2cs+imin-1)*b22e(imin) -work(iu2sn+imin-1)*b22d(imin) b22d(imin) = temp b22bulge = work(iu2sn+imin-1)*b22d(imin+1) b22d(imin+1) = work(iu2cs+imin-1)*b22d(imin+1) ! inner loop: chase bulges from b11(imin,imin+2), ! b12(imin,imin+1), b21(imin,imin+2), and b22(imin,imin+1) to ! bottom-right do i = imin+1, imax-1 ! compute phi(i-1) x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1) x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1) y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge phi(i-1) = atan2( sqrt(x1**2_${ik}$+x2**2_${ik}$), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11e(i-1)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21e(i-1)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12d(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i-1,i+1), b12(i-1,i), ! b21(i-1,i+1), and b22(i-1,i). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart21 ) then call stdlib${ii}$_slartgp( x2, x1, work(iv1tsn+i-1), work(iv1tcs+i-1),r ) else if( .not. restart11 .and. restart21 ) then call stdlib${ii}$_slartgp( b11bulge, b11e(i-1), work(iv1tsn+i-1),work(iv1tcs+i-1), & r ) else if( restart11 .and. .not. restart21 ) then call stdlib${ii}$_slartgp( b21bulge, b21e(i-1), work(iv1tsn+i-1),work(iv1tcs+i-1), & r ) else if( mu <= nu ) then call stdlib${ii}$_slartgs( b11d(i), b11e(i), mu, work(iv1tcs+i-1),work(iv1tsn+i-1) ) else call stdlib${ii}$_slartgs( b21d(i), b21e(i), nu, work(iv1tcs+i-1),work(iv1tsn+i-1) ) end if work(iv1tcs+i-1) = -work(iv1tcs+i-1) work(iv1tsn+i-1) = -work(iv1tsn+i-1) if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( y2, y1, work(iv2tsn+i-1-1),work(iv2tcs+i-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_slartgp( b12bulge, b12d(i-1), work(iv2tsn+i-1-1),work(iv2tcs+i-1-& 1_${ik}$), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( b22bulge, b22d(i-1), work(iv2tsn+i-1-1),work(iv2tcs+i-1-& 1_${ik}$), r ) else if( nu < mu ) then call stdlib${ii}$_slartgs( b12e(i-1), b12d(i), nu, work(iv2tcs+i-1-1),work(iv2tsn+i-& 1_${ik}$-1) ) else call stdlib${ii}$_slartgs( b22e(i-1), b22d(i), mu, work(iv2tcs+i-1-1),work(iv2tsn+i-& 1_${ik}$-1) ) end if temp = work(iv1tcs+i-1)*b11d(i) + work(iv1tsn+i-1)*b11e(i) b11e(i) = work(iv1tcs+i-1)*b11e(i) -work(iv1tsn+i-1)*b11d(i) b11d(i) = temp b11bulge = work(iv1tsn+i-1)*b11d(i+1) b11d(i+1) = work(iv1tcs+i-1)*b11d(i+1) temp = work(iv1tcs+i-1)*b21d(i) + work(iv1tsn+i-1)*b21e(i) b21e(i) = work(iv1tcs+i-1)*b21e(i) -work(iv1tsn+i-1)*b21d(i) b21d(i) = temp b21bulge = work(iv1tsn+i-1)*b21d(i+1) b21d(i+1) = work(iv1tcs+i-1)*b21d(i+1) temp = work(iv2tcs+i-1-1)*b12e(i-1) +work(iv2tsn+i-1-1)*b12d(i) b12d(i) = work(iv2tcs+i-1-1)*b12d(i) -work(iv2tsn+i-1-1)*b12e(i-1) b12e(i-1) = temp b12bulge = work(iv2tsn+i-1-1)*b12e(i) b12e(i) = work(iv2tcs+i-1-1)*b12e(i) temp = work(iv2tcs+i-1-1)*b22e(i-1) +work(iv2tsn+i-1-1)*b22d(i) b22d(i) = work(iv2tcs+i-1-1)*b22d(i) -work(iv2tsn+i-1-1)*b22e(i-1) b22e(i-1) = temp b22bulge = work(iv2tsn+i-1-1)*b22e(i) b22e(i) = work(iv2tcs+i-1-1)*b22e(i) ! compute theta(i) x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1) x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1) y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge theta(i) = atan2( sqrt(y1**2_${ik}$+y2**2_${ik}$), sqrt(x1**2_${ik}$+x2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11d(i)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12e(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21d(i)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22e(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i+1,i), b12(i+1,i-1), ! b21(i+1,i), and b22(i+1,i-1). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart12 ) then call stdlib${ii}$_slartgp( x2, x1, work(iu1sn+i-1), work(iu1cs+i-1),r ) else if( .not. restart11 .and. restart12 ) then call stdlib${ii}$_slartgp( b11bulge, b11d(i), work(iu1sn+i-1),work(iu1cs+i-1), r ) else if( restart11 .and. .not. restart12 ) then call stdlib${ii}$_slartgp( b12bulge, b12e(i-1), work(iu1sn+i-1),work(iu1cs+i-1), r ) else if( mu <= nu ) then call stdlib${ii}$_slartgs( b11e(i), b11d(i+1), mu, work(iu1cs+i-1),work(iu1sn+i-1) ) else call stdlib${ii}$_slartgs( b12d(i), b12e(i), nu, work(iu1cs+i-1),work(iu1sn+i-1) ) end if if( .not. restart21 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( y2, y1, work(iu2sn+i-1), work(iu2cs+i-1),r ) else if( .not. restart21 .and. restart22 ) then call stdlib${ii}$_slartgp( b21bulge, b21d(i), work(iu2sn+i-1),work(iu2cs+i-1), r ) else if( restart21 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( b22bulge, b22e(i-1), work(iu2sn+i-1),work(iu2cs+i-1), r ) else if( nu < mu ) then call stdlib${ii}$_slartgs( b21e(i), b21e(i+1), nu, work(iu2cs+i-1),work(iu2sn+i-1) ) else call stdlib${ii}$_slartgs( b22d(i), b22e(i), mu, work(iu2cs+i-1),work(iu2sn+i-1) ) end if work(iu2cs+i-1) = -work(iu2cs+i-1) work(iu2sn+i-1) = -work(iu2sn+i-1) temp = work(iu1cs+i-1)*b11e(i) + work(iu1sn+i-1)*b11d(i+1) b11d(i+1) = work(iu1cs+i-1)*b11d(i+1) -work(iu1sn+i-1)*b11e(i) b11e(i) = temp if( i < imax - 1_${ik}$ ) then b11bulge = work(iu1sn+i-1)*b11e(i+1) b11e(i+1) = work(iu1cs+i-1)*b11e(i+1) end if temp = work(iu2cs+i-1)*b21e(i) + work(iu2sn+i-1)*b21d(i+1) b21d(i+1) = work(iu2cs+i-1)*b21d(i+1) -work(iu2sn+i-1)*b21e(i) b21e(i) = temp if( i < imax - 1_${ik}$ ) then b21bulge = work(iu2sn+i-1)*b21e(i+1) b21e(i+1) = work(iu2cs+i-1)*b21e(i+1) end if temp = work(iu1cs+i-1)*b12d(i) + work(iu1sn+i-1)*b12e(i) b12e(i) = work(iu1cs+i-1)*b12e(i) - work(iu1sn+i-1)*b12d(i) b12d(i) = temp b12bulge = work(iu1sn+i-1)*b12d(i+1) b12d(i+1) = work(iu1cs+i-1)*b12d(i+1) temp = work(iu2cs+i-1)*b22d(i) + work(iu2sn+i-1)*b22e(i) b22e(i) = work(iu2cs+i-1)*b22e(i) - work(iu2sn+i-1)*b22d(i) b22d(i) = temp b22bulge = work(iu2sn+i-1)*b22d(i+1) b22d(i+1) = work(iu2cs+i-1)*b22d(i+1) end do ! compute phi(imax-1) x1 = sin(theta(imax-1))*b11e(imax-1) +cos(theta(imax-1))*b21e(imax-1) y1 = sin(theta(imax-1))*b12d(imax-1) +cos(theta(imax-1))*b22d(imax-1) y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge phi(imax-1) = atan2( abs(x1), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! chase bulges from b12(imax-1,imax) and b22(imax-1,imax) restart12 = b12d(imax-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(imax-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( y2, y1, work(iv2tsn+imax-1-1),work(iv2tcs+imax-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_slartgp( b12bulge, b12d(imax-1), work(iv2tsn+imax-1-1),work(iv2tcs+& imax-1-1), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( b22bulge, b22d(imax-1), work(iv2tsn+imax-1-1),work(iv2tcs+& imax-1-1), r ) else if( nu < mu ) then call stdlib${ii}$_slartgs( b12e(imax-1), b12d(imax), nu,work(iv2tcs+imax-1-1), work(& iv2tsn+imax-1-1) ) else call stdlib${ii}$_slartgs( b22e(imax-1), b22d(imax), mu,work(iv2tcs+imax-1-1), work(& iv2tsn+imax-1-1) ) end if temp = work(iv2tcs+imax-1-1)*b12e(imax-1) +work(iv2tsn+imax-1-1)*b12d(imax) b12d(imax) = work(iv2tcs+imax-1-1)*b12d(imax) -work(iv2tsn+imax-1-1)*b12e(imax-1) b12e(imax-1) = temp temp = work(iv2tcs+imax-1-1)*b22e(imax-1) +work(iv2tsn+imax-1-1)*b22d(imax) b22d(imax) = work(iv2tcs+imax-1-1)*b22d(imax) -work(iv2tsn+imax-1-1)*b22e(imax-1) b22e(imax-1) = temp ! update singular vectors if( wantu1 ) then if( colmajor ) then call stdlib${ii}$_slasr( 'R', 'V', 'F', p, imax-imin+1,work(iu1cs+imin-1), work(& iu1sn+imin-1),u1(1_${ik}$,imin), ldu1 ) else call stdlib${ii}$_slasr( 'L', 'V', 'F', imax-imin+1, p,work(iu1cs+imin-1), work(& iu1sn+imin-1),u1(imin,1_${ik}$), ldu1 ) end if end if if( wantu2 ) then if( colmajor ) then call stdlib${ii}$_slasr( 'R', 'V', 'F', m-p, imax-imin+1,work(iu2cs+imin-1), work(& iu2sn+imin-1),u2(1_${ik}$,imin), ldu2 ) else call stdlib${ii}$_slasr( 'L', 'V', 'F', imax-imin+1, m-p,work(iu2cs+imin-1), work(& iu2sn+imin-1),u2(imin,1_${ik}$), ldu2 ) end if end if if( wantv1t ) then if( colmajor ) then call stdlib${ii}$_slasr( 'L', 'V', 'F', imax-imin+1, q,work(iv1tcs+imin-1), work(& iv1tsn+imin-1),v1t(imin,1_${ik}$), ldv1t ) else call stdlib${ii}$_slasr( 'R', 'V', 'F', q, imax-imin+1,work(iv1tcs+imin-1), work(& iv1tsn+imin-1),v1t(1_${ik}$,imin), ldv1t ) end if end if if( wantv2t ) then if( colmajor ) then call stdlib${ii}$_slasr( 'L', 'V', 'F', imax-imin+1, m-q,work(iv2tcs+imin-1), work(& iv2tsn+imin-1),v2t(imin,1_${ik}$), ldv2t ) else call stdlib${ii}$_slasr( 'R', 'V', 'F', m-q, imax-imin+1,work(iv2tcs+imin-1), work(& iv2tsn+imin-1),v2t(1_${ik}$,imin), ldv2t ) end if end if ! fix signs on b11(imax-1,imax) and b21(imax-1,imax) if( b11e(imax-1)+b21e(imax-1) > 0_${ik}$ ) then b11d(imax) = -b11d(imax) b21d(imax) = -b21d(imax) if( wantv1t ) then if( colmajor ) then call stdlib${ii}$_sscal( q, negone, v1t(imax,1_${ik}$), ldv1t ) else call stdlib${ii}$_sscal( q, negone, v1t(1_${ik}$,imax), 1_${ik}$ ) end if end if end if ! compute theta(imax) x1 = cos(phi(imax-1))*b11d(imax) +sin(phi(imax-1))*b12e(imax-1) y1 = cos(phi(imax-1))*b21d(imax) +sin(phi(imax-1))*b22e(imax-1) theta(imax) = atan2( abs(y1), abs(x1) ) ! fix signs on b11(imax,imax), b12(imax,imax-1), b21(imax,imax), ! and b22(imax,imax-1) if( b11d(imax)+b12e(imax-1) < 0_${ik}$ ) then b12d(imax) = -b12d(imax) if( wantu1 ) then if( colmajor ) then call stdlib${ii}$_sscal( p, negone, u1(1_${ik}$,imax), 1_${ik}$ ) else call stdlib${ii}$_sscal( p, negone, u1(imax,1_${ik}$), ldu1 ) end if end if end if if( b21d(imax)+b22e(imax-1) > 0_${ik}$ ) then b22d(imax) = -b22d(imax) if( wantu2 ) then if( colmajor ) then call stdlib${ii}$_sscal( m-p, negone, u2(1_${ik}$,imax), 1_${ik}$ ) else call stdlib${ii}$_sscal( m-p, negone, u2(imax,1_${ik}$), ldu2 ) end if end if end if ! fix signs on b12(imax,imax) and b22(imax,imax) if( b12d(imax)+b22d(imax) < 0_${ik}$ ) then if( wantv2t ) then if( colmajor ) then call stdlib${ii}$_sscal( m-q, negone, v2t(imax,1_${ik}$), ldv2t ) else call stdlib${ii}$_sscal( m-q, negone, v2t(1_${ik}$,imax), 1_${ik}$ ) end if end if end if ! test for negligible sines or cosines do i = imin, imax if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = imin, imax-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! deflate if (imax > 1_${ik}$) then do while( phi(imax-1) == zero ) imax = imax - 1_${ik}$ if (imax <= 1) exit end do end if if( imin > imax - 1_${ik}$ )imin = imax - 1_${ik}$ if (imin > 1_${ik}$) then do while (phi(imin-1) /= zero) imin = imin - 1_${ik}$ if (imin <= 1) exit end do end if ! repeat main iteration loop end do ! postprocessing: order theta from least to greatest do i = 1, q mini = i thetamin = theta(i) do j = i+1, q if( theta(j) < thetamin ) then mini = j thetamin = theta(j) end if end do if( mini /= i ) then theta(mini) = theta(i) theta(i) = thetamin if( colmajor ) then if( wantu1 )call stdlib${ii}$_sswap( p, u1(1_${ik}$,i), 1_${ik}$, u1(1_${ik}$,mini), 1_${ik}$ ) if( wantu2 )call stdlib${ii}$_sswap( m-p, u2(1_${ik}$,i), 1_${ik}$, u2(1_${ik}$,mini), 1_${ik}$ ) if( wantv1t )call stdlib${ii}$_sswap( q, v1t(i,1_${ik}$), ldv1t, v1t(mini,1_${ik}$), ldv1t ) if( wantv2t )call stdlib${ii}$_sswap( m-q, v2t(i,1_${ik}$), ldv2t, v2t(mini,1_${ik}$),ldv2t ) else if( wantu1 )call stdlib${ii}$_sswap( p, u1(i,1_${ik}$), ldu1, u1(mini,1_${ik}$), ldu1 ) if( wantu2 )call stdlib${ii}$_sswap( m-p, u2(i,1_${ik}$), ldu2, u2(mini,1_${ik}$), ldu2 ) if( wantv1t )call stdlib${ii}$_sswap( q, v1t(1_${ik}$,i), 1_${ik}$, v1t(1_${ik}$,mini), 1_${ik}$ ) if( wantv2t )call stdlib${ii}$_sswap( m-q, v2t(1_${ik}$,i), 1_${ik}$, v2t(1_${ik}$,mini), 1_${ik}$ ) end if end if end do return end subroutine stdlib${ii}$_sbbcsd pure module subroutine stdlib${ii}$_dbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, u1, & !! DBBCSD computes the CS decomposition of an orthogonal matrix in !! bidiagonal-block form, !! [ B11 | B12 0 0 ] !! [ 0 | 0 -I 0 ] !! X = [----------------] !! [ B21 | B22 0 0 ] !! [ 0 | 0 0 I ] !! [ C | -S 0 0 ] !! [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**T !! = [---------] [---------------] [---------] . !! [ | U2 ] [ S | C 0 0 ] [ | V2 ] !! [ 0 | 0 0 I ] !! X is M-by-M, its top-left block is P-by-Q, and Q must be no larger !! than P, M-P, or M-Q. (If Q is not the smallest index, then X must be !! transposed and/or permuted. This can be done in constant time using !! the TRANS and SIGNS options. See DORCSD for details.) !! The bidiagonal matrices B11, B12, B21, and B22 are represented !! implicitly by angles THETA(1:Q) and PHI(1:Q-1). !! The orthogonal matrices U1, U2, V1T, and V2T are input/output. !! The input matrices are pre- or post-multiplied by the appropriate !! singular vector matrices. ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d, b22e, work, & lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, one, ten, cnegone ! Scalar Arguments character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, lwork, m, p, q ! Array Arguments real(dp), intent(out) :: b11d(*), b11e(*), b12d(*), b12e(*), b21d(*), b21e(*), b22d(*),& b22e(*), work(*) real(dp), intent(inout) :: phi(*), theta(*) real(dp), intent(inout) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*) ! =================================================================== ! Parameters integer(${ik}$), parameter :: maxitr = 6_${ik}$ real(dp), parameter :: hundred = 100.0_dp real(dp), parameter :: meighth = -0.125_dp real(dp), parameter :: piover2 = 1.57079632679489661923132169163975144210_dp ! Local Scalars logical(lk) :: colmajor, lquery, restart11, restart12, restart21, restart22, wantu1, & wantu2, wantv1t, wantv2t integer(${ik}$) :: i, imin, imax, iter, iu1cs, iu1sn, iu2cs, iu2sn, iv1tcs, iv1tsn, & iv2tcs, iv2tsn, j, lworkmin, lworkopt, maxit, mini real(dp) :: b11bulge, b12bulge, b21bulge, b22bulge, dummy, eps, mu, nu, r, sigma11, & sigma21, temp, thetamax, thetamin, thresh, tol, tolmul, unfl, x1, x2, y1, y2 ! Intrinsic Functions ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lwork == -1_${ik}$ wantu1 = stdlib_lsame( jobu1, 'Y' ) wantu2 = stdlib_lsame( jobu2, 'Y' ) wantv1t = stdlib_lsame( jobv1t, 'Y' ) wantv2t = stdlib_lsame( jobv2t, 'Y' ) colmajor = .not. stdlib_lsame( trans, 'T' ) if( m < 0_${ik}$ ) then info = -6_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -7_${ik}$ else if( q < 0_${ik}$ .or. q > m ) then info = -8_${ik}$ else if( q > p .or. q > m-p .or. q > m-q ) then info = -8_${ik}$ else if( wantu1 .and. ldu1 < p ) then info = -12_${ik}$ else if( wantu2 .and. ldu2 < m-p ) then info = -14_${ik}$ else if( wantv1t .and. ldv1t < q ) then info = -16_${ik}$ else if( wantv2t .and. ldv2t < m-q ) then info = -18_${ik}$ end if ! quick return if q = 0 if( info == 0_${ik}$ .and. q == 0_${ik}$ ) then lworkmin = 1_${ik}$ work(1_${ik}$) = lworkmin return end if ! compute workspace if( info == 0_${ik}$ ) then iu1cs = 1_${ik}$ iu1sn = iu1cs + q iu2cs = iu1sn + q iu2sn = iu2cs + q iv1tcs = iu2sn + q iv1tsn = iv1tcs + q iv2tcs = iv1tsn + q iv2tsn = iv2tcs + q lworkopt = iv2tsn + q - 1_${ik}$ lworkmin = lworkopt work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not. lquery ) then info = -28_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DBBCSD', -info ) return else if( lquery ) then return end if ! get machine constants eps = stdlib${ii}$_dlamch( 'EPSILON' ) unfl = stdlib${ii}$_dlamch( 'SAFE MINIMUM' ) tolmul = max( ten, min( hundred, eps**meighth ) ) tol = tolmul*eps thresh = max( tol, maxitr*q*q*unfl ) ! test for negligible sines or cosines do i = 1, q if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = 1, q-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! initial deflation imax = q do while( imax > 1 ) if( phi(imax-1) /= zero ) then exit end if imax = imax - 1_${ik}$ end do imin = imax - 1_${ik}$ if ( imin > 1_${ik}$ ) then do while( phi(imin-1) /= zero ) imin = imin - 1_${ik}$ if ( imin <= 1 ) exit end do end if ! initialize iteration counter maxit = maxitr*q*q iter = 0_${ik}$ ! begin main iteration loop do while( imax > 1 ) ! compute the matrix entries b11d(imin) = cos( theta(imin) ) b21d(imin) = -sin( theta(imin) ) do i = imin, imax - 1 b11e(i) = -sin( theta(i) ) * sin( phi(i) ) b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) ) b12d(i) = sin( theta(i) ) * cos( phi(i) ) b12e(i) = cos( theta(i+1) ) * sin( phi(i) ) b21e(i) = -cos( theta(i) ) * sin( phi(i) ) b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) ) b22d(i) = cos( theta(i) ) * cos( phi(i) ) b22e(i) = -sin( theta(i+1) ) * sin( phi(i) ) end do b12d(imax) = sin( theta(imax) ) b22d(imax) = cos( theta(imax) ) ! abort if not converging; otherwise, increment iter if( iter > maxit ) then info = 0_${ik}$ do i = 1, q if( phi(i) /= zero )info = info + 1_${ik}$ end do return end if iter = iter + imax - imin ! compute shifts thetamax = theta(imin) thetamin = theta(imin) do i = imin+1, imax if( theta(i) > thetamax )thetamax = theta(i) if( theta(i) < thetamin )thetamin = theta(i) end do if( thetamax > piover2 - thresh ) then ! zero on diagonals of b11 and b22; induce deflation with a ! zero shift mu = zero nu = one else if( thetamin < thresh ) then ! zero on diagonals of b12 and b22; induce deflation with a ! zero shift mu = one nu = zero else ! compute shifts for b11 and b21 and use the lesser call stdlib${ii}$_dlas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,dummy ) call stdlib${ii}$_dlas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,dummy ) if( sigma11 <= sigma21 ) then mu = sigma11 nu = sqrt( one - mu**2_${ik}$ ) if( mu < thresh ) then mu = zero nu = one end if else nu = sigma21 mu = sqrt( one - nu**2_${ik}$ ) if( nu < thresh ) then mu = one nu = zero end if end if end if ! rotate to produce bulges in b11 and b21 if( mu <= nu ) then call stdlib${ii}$_dlartgs( b11d(imin), b11e(imin), mu,work(iv1tcs+imin-1), work(iv1tsn+& imin-1) ) else call stdlib${ii}$_dlartgs( b21d(imin), b21e(imin), nu,work(iv1tcs+imin-1), work(iv1tsn+& imin-1) ) end if temp = work(iv1tcs+imin-1)*b11d(imin) +work(iv1tsn+imin-1)*b11e(imin) b11e(imin) = work(iv1tcs+imin-1)*b11e(imin) -work(iv1tsn+imin-1)*b11d(imin) b11d(imin) = temp b11bulge = work(iv1tsn+imin-1)*b11d(imin+1) b11d(imin+1) = work(iv1tcs+imin-1)*b11d(imin+1) temp = work(iv1tcs+imin-1)*b21d(imin) +work(iv1tsn+imin-1)*b21e(imin) b21e(imin) = work(iv1tcs+imin-1)*b21e(imin) -work(iv1tsn+imin-1)*b21d(imin) b21d(imin) = temp b21bulge = work(iv1tsn+imin-1)*b21d(imin+1) b21d(imin+1) = work(iv1tcs+imin-1)*b21d(imin+1) ! compute theta(imin) theta( imin ) = atan2( sqrt( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ ),sqrt( b11d(imin)**2_${ik}$+& b11bulge**2_${ik}$ ) ) ! chase the bulges in b11(imin+1,imin) and b21(imin+1,imin) if( b11d(imin)**2_${ik}$+b11bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_dlartgp( b11bulge, b11d(imin), work(iu1sn+imin-1),work(iu1cs+imin-1),& r ) else if( mu <= nu ) then call stdlib${ii}$_dlartgs( b11e( imin ), b11d( imin + 1_${ik}$ ), mu,work(iu1cs+imin-1), work(& iu1sn+imin-1) ) else call stdlib${ii}$_dlartgs( b12d( imin ), b12e( imin ), nu,work(iu1cs+imin-1), work(& iu1sn+imin-1) ) end if if( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_dlartgp( b21bulge, b21d(imin), work(iu2sn+imin-1),work(iu2cs+imin-1),& r ) else if( nu < mu ) then call stdlib${ii}$_dlartgs( b21e( imin ), b21d( imin + 1_${ik}$ ), nu,work(iu2cs+imin-1), work(& iu2sn+imin-1) ) else call stdlib${ii}$_dlartgs( b22d(imin), b22e(imin), mu,work(iu2cs+imin-1), work(iu2sn+& imin-1) ) end if work(iu2cs+imin-1) = -work(iu2cs+imin-1) work(iu2sn+imin-1) = -work(iu2sn+imin-1) temp = work(iu1cs+imin-1)*b11e(imin) +work(iu1sn+imin-1)*b11d(imin+1) b11d(imin+1) = work(iu1cs+imin-1)*b11d(imin+1) -work(iu1sn+imin-1)*b11e(imin) b11e(imin) = temp if( imax > imin+1 ) then b11bulge = work(iu1sn+imin-1)*b11e(imin+1) b11e(imin+1) = work(iu1cs+imin-1)*b11e(imin+1) end if temp = work(iu1cs+imin-1)*b12d(imin) +work(iu1sn+imin-1)*b12e(imin) b12e(imin) = work(iu1cs+imin-1)*b12e(imin) -work(iu1sn+imin-1)*b12d(imin) b12d(imin) = temp b12bulge = work(iu1sn+imin-1)*b12d(imin+1) b12d(imin+1) = work(iu1cs+imin-1)*b12d(imin+1) temp = work(iu2cs+imin-1)*b21e(imin) +work(iu2sn+imin-1)*b21d(imin+1) b21d(imin+1) = work(iu2cs+imin-1)*b21d(imin+1) -work(iu2sn+imin-1)*b21e(imin) b21e(imin) = temp if( imax > imin+1 ) then b21bulge = work(iu2sn+imin-1)*b21e(imin+1) b21e(imin+1) = work(iu2cs+imin-1)*b21e(imin+1) end if temp = work(iu2cs+imin-1)*b22d(imin) +work(iu2sn+imin-1)*b22e(imin) b22e(imin) = work(iu2cs+imin-1)*b22e(imin) -work(iu2sn+imin-1)*b22d(imin) b22d(imin) = temp b22bulge = work(iu2sn+imin-1)*b22d(imin+1) b22d(imin+1) = work(iu2cs+imin-1)*b22d(imin+1) ! inner loop: chase bulges from b11(imin,imin+2), ! b12(imin,imin+1), b21(imin,imin+2), and b22(imin,imin+1) to ! bottom-right do i = imin+1, imax-1 ! compute phi(i-1) x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1) x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1) y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge phi(i-1) = atan2( sqrt(x1**2_${ik}$+x2**2_${ik}$), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11e(i-1)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21e(i-1)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12d(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i-1,i+1), b12(i-1,i), ! b21(i-1,i+1), and b22(i-1,i). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart21 ) then call stdlib${ii}$_dlartgp( x2, x1, work(iv1tsn+i-1), work(iv1tcs+i-1),r ) else if( .not. restart11 .and. restart21 ) then call stdlib${ii}$_dlartgp( b11bulge, b11e(i-1), work(iv1tsn+i-1),work(iv1tcs+i-1), & r ) else if( restart11 .and. .not. restart21 ) then call stdlib${ii}$_dlartgp( b21bulge, b21e(i-1), work(iv1tsn+i-1),work(iv1tcs+i-1), & r ) else if( mu <= nu ) then call stdlib${ii}$_dlartgs( b11d(i), b11e(i), mu, work(iv1tcs+i-1),work(iv1tsn+i-1) ) else call stdlib${ii}$_dlartgs( b21d(i), b21e(i), nu, work(iv1tcs+i-1),work(iv1tsn+i-1) ) end if work(iv1tcs+i-1) = -work(iv1tcs+i-1) work(iv1tsn+i-1) = -work(iv1tsn+i-1) if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_dlartgp( y2, y1, work(iv2tsn+i-1-1),work(iv2tcs+i-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_dlartgp( b12bulge, b12d(i-1), work(iv2tsn+i-1-1),work(iv2tcs+i-1-& 1_${ik}$), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_dlartgp( b22bulge, b22d(i-1), work(iv2tsn+i-1-1),work(iv2tcs+i-1-& 1_${ik}$), r ) else if( nu < mu ) then call stdlib${ii}$_dlartgs( b12e(i-1), b12d(i), nu, work(iv2tcs+i-1-1),work(iv2tsn+i-& 1_${ik}$-1) ) else call stdlib${ii}$_dlartgs( b22e(i-1), b22d(i), mu, work(iv2tcs+i-1-1),work(iv2tsn+i-& 1_${ik}$-1) ) end if temp = work(iv1tcs+i-1)*b11d(i) + work(iv1tsn+i-1)*b11e(i) b11e(i) = work(iv1tcs+i-1)*b11e(i) -work(iv1tsn+i-1)*b11d(i) b11d(i) = temp b11bulge = work(iv1tsn+i-1)*b11d(i+1) b11d(i+1) = work(iv1tcs+i-1)*b11d(i+1) temp = work(iv1tcs+i-1)*b21d(i) + work(iv1tsn+i-1)*b21e(i) b21e(i) = work(iv1tcs+i-1)*b21e(i) -work(iv1tsn+i-1)*b21d(i) b21d(i) = temp b21bulge = work(iv1tsn+i-1)*b21d(i+1) b21d(i+1) = work(iv1tcs+i-1)*b21d(i+1) temp = work(iv2tcs+i-1-1)*b12e(i-1) +work(iv2tsn+i-1-1)*b12d(i) b12d(i) = work(iv2tcs+i-1-1)*b12d(i) -work(iv2tsn+i-1-1)*b12e(i-1) b12e(i-1) = temp b12bulge = work(iv2tsn+i-1-1)*b12e(i) b12e(i) = work(iv2tcs+i-1-1)*b12e(i) temp = work(iv2tcs+i-1-1)*b22e(i-1) +work(iv2tsn+i-1-1)*b22d(i) b22d(i) = work(iv2tcs+i-1-1)*b22d(i) -work(iv2tsn+i-1-1)*b22e(i-1) b22e(i-1) = temp b22bulge = work(iv2tsn+i-1-1)*b22e(i) b22e(i) = work(iv2tcs+i-1-1)*b22e(i) ! compute theta(i) x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1) x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1) y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge theta(i) = atan2( sqrt(y1**2_${ik}$+y2**2_${ik}$), sqrt(x1**2_${ik}$+x2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11d(i)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12e(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21d(i)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22e(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i+1,i), b12(i+1,i-1), ! b21(i+1,i), and b22(i+1,i-1). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart12 ) then call stdlib${ii}$_dlartgp( x2, x1, work(iu1sn+i-1), work(iu1cs+i-1),r ) else if( .not. restart11 .and. restart12 ) then call stdlib${ii}$_dlartgp( b11bulge, b11d(i), work(iu1sn+i-1),work(iu1cs+i-1), r ) else if( restart11 .and. .not. restart12 ) then call stdlib${ii}$_dlartgp( b12bulge, b12e(i-1), work(iu1sn+i-1),work(iu1cs+i-1), r ) else if( mu <= nu ) then call stdlib${ii}$_dlartgs( b11e(i), b11d(i+1), mu, work(iu1cs+i-1),work(iu1sn+i-1) ) else call stdlib${ii}$_dlartgs( b12d(i), b12e(i), nu, work(iu1cs+i-1),work(iu1sn+i-1) ) end if if( .not. restart21 .and. .not. restart22 ) then call stdlib${ii}$_dlartgp( y2, y1, work(iu2sn+i-1), work(iu2cs+i-1),r ) else if( .not. restart21 .and. restart22 ) then call stdlib${ii}$_dlartgp( b21bulge, b21d(i), work(iu2sn+i-1),work(iu2cs+i-1), r ) else if( restart21 .and. .not. restart22 ) then call stdlib${ii}$_dlartgp( b22bulge, b22e(i-1), work(iu2sn+i-1),work(iu2cs+i-1), r ) else if( nu < mu ) then call stdlib${ii}$_dlartgs( b21e(i), b21e(i+1), nu, work(iu2cs+i-1),work(iu2sn+i-1) ) else call stdlib${ii}$_dlartgs( b22d(i), b22e(i), mu, work(iu2cs+i-1),work(iu2sn+i-1) ) end if work(iu2cs+i-1) = -work(iu2cs+i-1) work(iu2sn+i-1) = -work(iu2sn+i-1) temp = work(iu1cs+i-1)*b11e(i) + work(iu1sn+i-1)*b11d(i+1) b11d(i+1) = work(iu1cs+i-1)*b11d(i+1) -work(iu1sn+i-1)*b11e(i) b11e(i) = temp if( i < imax - 1_${ik}$ ) then b11bulge = work(iu1sn+i-1)*b11e(i+1) b11e(i+1) = work(iu1cs+i-1)*b11e(i+1) end if temp = work(iu2cs+i-1)*b21e(i) + work(iu2sn+i-1)*b21d(i+1) b21d(i+1) = work(iu2cs+i-1)*b21d(i+1) -work(iu2sn+i-1)*b21e(i) b21e(i) = temp if( i < imax - 1_${ik}$ ) then b21bulge = work(iu2sn+i-1)*b21e(i+1) b21e(i+1) = work(iu2cs+i-1)*b21e(i+1) end if temp = work(iu1cs+i-1)*b12d(i) + work(iu1sn+i-1)*b12e(i) b12e(i) = work(iu1cs+i-1)*b12e(i) - work(iu1sn+i-1)*b12d(i) b12d(i) = temp b12bulge = work(iu1sn+i-1)*b12d(i+1) b12d(i+1) = work(iu1cs+i-1)*b12d(i+1) temp = work(iu2cs+i-1)*b22d(i) + work(iu2sn+i-1)*b22e(i) b22e(i) = work(iu2cs+i-1)*b22e(i) - work(iu2sn+i-1)*b22d(i) b22d(i) = temp b22bulge = work(iu2sn+i-1)*b22d(i+1) b22d(i+1) = work(iu2cs+i-1)*b22d(i+1) end do ! compute phi(imax-1) x1 = sin(theta(imax-1))*b11e(imax-1) +cos(theta(imax-1))*b21e(imax-1) y1 = sin(theta(imax-1))*b12d(imax-1) +cos(theta(imax-1))*b22d(imax-1) y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge phi(imax-1) = atan2( abs(x1), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! chase bulges from b12(imax-1,imax) and b22(imax-1,imax) restart12 = b12d(imax-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(imax-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_dlartgp( y2, y1, work(iv2tsn+imax-1-1),work(iv2tcs+imax-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_dlartgp( b12bulge, b12d(imax-1), work(iv2tsn+imax-1-1),work(iv2tcs+& imax-1-1), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_dlartgp( b22bulge, b22d(imax-1), work(iv2tsn+imax-1-1),work(iv2tcs+& imax-1-1), r ) else if( nu < mu ) then call stdlib${ii}$_dlartgs( b12e(imax-1), b12d(imax), nu,work(iv2tcs+imax-1-1), work(& iv2tsn+imax-1-1) ) else call stdlib${ii}$_dlartgs( b22e(imax-1), b22d(imax), mu,work(iv2tcs+imax-1-1), work(& iv2tsn+imax-1-1) ) end if temp = work(iv2tcs+imax-1-1)*b12e(imax-1) +work(iv2tsn+imax-1-1)*b12d(imax) b12d(imax) = work(iv2tcs+imax-1-1)*b12d(imax) -work(iv2tsn+imax-1-1)*b12e(imax-1) b12e(imax-1) = temp temp = work(iv2tcs+imax-1-1)*b22e(imax-1) +work(iv2tsn+imax-1-1)*b22d(imax) b22d(imax) = work(iv2tcs+imax-1-1)*b22d(imax) -work(iv2tsn+imax-1-1)*b22e(imax-1) b22e(imax-1) = temp ! update singular vectors if( wantu1 ) then if( colmajor ) then call stdlib${ii}$_dlasr( 'R', 'V', 'F', p, imax-imin+1,work(iu1cs+imin-1), work(& iu1sn+imin-1),u1(1_${ik}$,imin), ldu1 ) else call stdlib${ii}$_dlasr( 'L', 'V', 'F', imax-imin+1, p,work(iu1cs+imin-1), work(& iu1sn+imin-1),u1(imin,1_${ik}$), ldu1 ) end if end if if( wantu2 ) then if( colmajor ) then call stdlib${ii}$_dlasr( 'R', 'V', 'F', m-p, imax-imin+1,work(iu2cs+imin-1), work(& iu2sn+imin-1),u2(1_${ik}$,imin), ldu2 ) else call stdlib${ii}$_dlasr( 'L', 'V', 'F', imax-imin+1, m-p,work(iu2cs+imin-1), work(& iu2sn+imin-1),u2(imin,1_${ik}$), ldu2 ) end if end if if( wantv1t ) then if( colmajor ) then call stdlib${ii}$_dlasr( 'L', 'V', 'F', imax-imin+1, q,work(iv1tcs+imin-1), work(& iv1tsn+imin-1),v1t(imin,1_${ik}$), ldv1t ) else call stdlib${ii}$_dlasr( 'R', 'V', 'F', q, imax-imin+1,work(iv1tcs+imin-1), work(& iv1tsn+imin-1),v1t(1_${ik}$,imin), ldv1t ) end if end if if( wantv2t ) then if( colmajor ) then call stdlib${ii}$_dlasr( 'L', 'V', 'F', imax-imin+1, m-q,work(iv2tcs+imin-1), work(& iv2tsn+imin-1),v2t(imin,1_${ik}$), ldv2t ) else call stdlib${ii}$_dlasr( 'R', 'V', 'F', m-q, imax-imin+1,work(iv2tcs+imin-1), work(& iv2tsn+imin-1),v2t(1_${ik}$,imin), ldv2t ) end if end if ! fix signs on b11(imax-1,imax) and b21(imax-1,imax) if( b11e(imax-1)+b21e(imax-1) > 0_${ik}$ ) then b11d(imax) = -b11d(imax) b21d(imax) = -b21d(imax) if( wantv1t ) then if( colmajor ) then call stdlib${ii}$_dscal( q, negone, v1t(imax,1_${ik}$), ldv1t ) else call stdlib${ii}$_dscal( q, negone, v1t(1_${ik}$,imax), 1_${ik}$ ) end if end if end if ! compute theta(imax) x1 = cos(phi(imax-1))*b11d(imax) +sin(phi(imax-1))*b12e(imax-1) y1 = cos(phi(imax-1))*b21d(imax) +sin(phi(imax-1))*b22e(imax-1) theta(imax) = atan2( abs(y1), abs(x1) ) ! fix signs on b11(imax,imax), b12(imax,imax-1), b21(imax,imax), ! and b22(imax,imax-1) if( b11d(imax)+b12e(imax-1) < 0_${ik}$ ) then b12d(imax) = -b12d(imax) if( wantu1 ) then if( colmajor ) then call stdlib${ii}$_dscal( p, negone, u1(1_${ik}$,imax), 1_${ik}$ ) else call stdlib${ii}$_dscal( p, negone, u1(imax,1_${ik}$), ldu1 ) end if end if end if if( b21d(imax)+b22e(imax-1) > 0_${ik}$ ) then b22d(imax) = -b22d(imax) if( wantu2 ) then if( colmajor ) then call stdlib${ii}$_dscal( m-p, negone, u2(1_${ik}$,imax), 1_${ik}$ ) else call stdlib${ii}$_dscal( m-p, negone, u2(imax,1_${ik}$), ldu2 ) end if end if end if ! fix signs on b12(imax,imax) and b22(imax,imax) if( b12d(imax)+b22d(imax) < 0_${ik}$ ) then if( wantv2t ) then if( colmajor ) then call stdlib${ii}$_dscal( m-q, negone, v2t(imax,1_${ik}$), ldv2t ) else call stdlib${ii}$_dscal( m-q, negone, v2t(1_${ik}$,imax), 1_${ik}$ ) end if end if end if ! test for negligible sines or cosines do i = imin, imax if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = imin, imax-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! deflate if (imax > 1_${ik}$) then do while( phi(imax-1) == zero ) imax = imax - 1_${ik}$ if (imax <= 1) exit end do end if if( imin > imax - 1_${ik}$ )imin = imax - 1_${ik}$ if (imin > 1_${ik}$) then do while (phi(imin-1) /= zero) imin = imin - 1_${ik}$ if (imin <= 1) exit end do end if ! repeat main iteration loop end do ! postprocessing: order theta from least to greatest do i = 1, q mini = i thetamin = theta(i) do j = i+1, q if( theta(j) < thetamin ) then mini = j thetamin = theta(j) end if end do if( mini /= i ) then theta(mini) = theta(i) theta(i) = thetamin if( colmajor ) then if( wantu1 )call stdlib${ii}$_dswap( p, u1(1_${ik}$,i), 1_${ik}$, u1(1_${ik}$,mini), 1_${ik}$ ) if( wantu2 )call stdlib${ii}$_dswap( m-p, u2(1_${ik}$,i), 1_${ik}$, u2(1_${ik}$,mini), 1_${ik}$ ) if( wantv1t )call stdlib${ii}$_dswap( q, v1t(i,1_${ik}$), ldv1t, v1t(mini,1_${ik}$), ldv1t ) if( wantv2t )call stdlib${ii}$_dswap( m-q, v2t(i,1_${ik}$), ldv2t, v2t(mini,1_${ik}$),ldv2t ) else if( wantu1 )call stdlib${ii}$_dswap( p, u1(i,1_${ik}$), ldu1, u1(mini,1_${ik}$), ldu1 ) if( wantu2 )call stdlib${ii}$_dswap( m-p, u2(i,1_${ik}$), ldu2, u2(mini,1_${ik}$), ldu2 ) if( wantv1t )call stdlib${ii}$_dswap( q, v1t(1_${ik}$,i), 1_${ik}$, v1t(1_${ik}$,mini), 1_${ik}$ ) if( wantv2t )call stdlib${ii}$_dswap( m-q, v2t(1_${ik}$,i), 1_${ik}$, v2t(1_${ik}$,mini), 1_${ik}$ ) end if end if end do return end subroutine stdlib${ii}$_dbbcsd #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$bbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, u1, & !! DBBCSD: computes the CS decomposition of an orthogonal matrix in !! bidiagonal-block form, !! [ B11 | B12 0 0 ] !! [ 0 | 0 -I 0 ] !! X = [----------------] !! [ B21 | B22 0 0 ] !! [ 0 | 0 0 I ] !! [ C | -S 0 0 ] !! [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**T !! = [---------] [---------------] [---------] . !! [ | U2 ] [ S | C 0 0 ] [ | V2 ] !! [ 0 | 0 0 I ] !! X is M-by-M, its top-left block is P-by-Q, and Q must be no larger !! than P, M-P, or M-Q. (If Q is not the smallest index, then X must be !! transposed and/or permuted. This can be done in constant time using !! the TRANS and SIGNS options. See DORCSD for details.) !! The bidiagonal matrices B11, B12, B21, and B22 are represented !! implicitly by angles THETA(1:Q) and PHI(1:Q-1). !! The orthogonal matrices U1, U2, V1T, and V2T are input/output. !! The input matrices are pre- or post-multiplied by the appropriate !! singular vector matrices. ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d, b22e, work, & lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, one, ten, cnegone ! Scalar Arguments character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, lwork, m, p, q ! Array Arguments real(${rk}$), intent(out) :: b11d(*), b11e(*), b12d(*), b12e(*), b21d(*), b21e(*), b22d(*),& b22e(*), work(*) real(${rk}$), intent(inout) :: phi(*), theta(*) real(${rk}$), intent(inout) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*) ! =================================================================== ! Parameters integer(${ik}$), parameter :: maxitr = 6_${ik}$ real(${rk}$), parameter :: hundred = 100.0_${rk}$ real(${rk}$), parameter :: meighth = -0.125_${rk}$ real(${rk}$), parameter :: piover2 = 1.57079632679489661923132169163975144210_${rk}$ ! Local Scalars logical(lk) :: colmajor, lquery, restart11, restart12, restart21, restart22, wantu1, & wantu2, wantv1t, wantv2t integer(${ik}$) :: i, imin, imax, iter, iu1cs, iu1sn, iu2cs, iu2sn, iv1tcs, iv1tsn, & iv2tcs, iv2tsn, j, lworkmin, lworkopt, maxit, mini real(${rk}$) :: b11bulge, b12bulge, b21bulge, b22bulge, dummy, eps, mu, nu, r, sigma11, & sigma21, temp, thetamax, thetamin, thresh, tol, tolmul, unfl, x1, x2, y1, y2 ! Intrinsic Functions ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lwork == -1_${ik}$ wantu1 = stdlib_lsame( jobu1, 'Y' ) wantu2 = stdlib_lsame( jobu2, 'Y' ) wantv1t = stdlib_lsame( jobv1t, 'Y' ) wantv2t = stdlib_lsame( jobv2t, 'Y' ) colmajor = .not. stdlib_lsame( trans, 'T' ) if( m < 0_${ik}$ ) then info = -6_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -7_${ik}$ else if( q < 0_${ik}$ .or. q > m ) then info = -8_${ik}$ else if( q > p .or. q > m-p .or. q > m-q ) then info = -8_${ik}$ else if( wantu1 .and. ldu1 < p ) then info = -12_${ik}$ else if( wantu2 .and. ldu2 < m-p ) then info = -14_${ik}$ else if( wantv1t .and. ldv1t < q ) then info = -16_${ik}$ else if( wantv2t .and. ldv2t < m-q ) then info = -18_${ik}$ end if ! quick return if q = 0 if( info == 0_${ik}$ .and. q == 0_${ik}$ ) then lworkmin = 1_${ik}$ work(1_${ik}$) = lworkmin return end if ! compute workspace if( info == 0_${ik}$ ) then iu1cs = 1_${ik}$ iu1sn = iu1cs + q iu2cs = iu1sn + q iu2sn = iu2cs + q iv1tcs = iu2sn + q iv1tsn = iv1tcs + q iv2tcs = iv1tsn + q iv2tsn = iv2tcs + q lworkopt = iv2tsn + q - 1_${ik}$ lworkmin = lworkopt work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not. lquery ) then info = -28_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DBBCSD', -info ) return else if( lquery ) then return end if ! get machine constants eps = stdlib${ii}$_${ri}$lamch( 'EPSILON' ) unfl = stdlib${ii}$_${ri}$lamch( 'SAFE MINIMUM' ) tolmul = max( ten, min( hundred, eps**meighth ) ) tol = tolmul*eps thresh = max( tol, maxitr*q*q*unfl ) ! test for negligible sines or cosines do i = 1, q if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = 1, q-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! initial deflation imax = q do while( imax > 1 ) if( phi(imax-1) /= zero ) then exit end if imax = imax - 1_${ik}$ end do imin = imax - 1_${ik}$ if ( imin > 1_${ik}$ ) then do while( phi(imin-1) /= zero ) imin = imin - 1_${ik}$ if ( imin <= 1 ) exit end do end if ! initialize iteration counter maxit = maxitr*q*q iter = 0_${ik}$ ! begin main iteration loop do while( imax > 1 ) ! compute the matrix entries b11d(imin) = cos( theta(imin) ) b21d(imin) = -sin( theta(imin) ) do i = imin, imax - 1 b11e(i) = -sin( theta(i) ) * sin( phi(i) ) b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) ) b12d(i) = sin( theta(i) ) * cos( phi(i) ) b12e(i) = cos( theta(i+1) ) * sin( phi(i) ) b21e(i) = -cos( theta(i) ) * sin( phi(i) ) b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) ) b22d(i) = cos( theta(i) ) * cos( phi(i) ) b22e(i) = -sin( theta(i+1) ) * sin( phi(i) ) end do b12d(imax) = sin( theta(imax) ) b22d(imax) = cos( theta(imax) ) ! abort if not converging; otherwise, increment iter if( iter > maxit ) then info = 0_${ik}$ do i = 1, q if( phi(i) /= zero )info = info + 1_${ik}$ end do return end if iter = iter + imax - imin ! compute shifts thetamax = theta(imin) thetamin = theta(imin) do i = imin+1, imax if( theta(i) > thetamax )thetamax = theta(i) if( theta(i) < thetamin )thetamin = theta(i) end do if( thetamax > piover2 - thresh ) then ! zero on diagonals of b11 and b22; induce deflation with a ! zero shift mu = zero nu = one else if( thetamin < thresh ) then ! zero on diagonals of b12 and b22; induce deflation with a ! zero shift mu = one nu = zero else ! compute shifts for b11 and b21 and use the lesser call stdlib${ii}$_${ri}$las2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,dummy ) call stdlib${ii}$_${ri}$las2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,dummy ) if( sigma11 <= sigma21 ) then mu = sigma11 nu = sqrt( one - mu**2_${ik}$ ) if( mu < thresh ) then mu = zero nu = one end if else nu = sigma21 mu = sqrt( one - nu**2_${ik}$ ) if( nu < thresh ) then mu = one nu = zero end if end if end if ! rotate to produce bulges in b11 and b21 if( mu <= nu ) then call stdlib${ii}$_${ri}$lartgs( b11d(imin), b11e(imin), mu,work(iv1tcs+imin-1), work(iv1tsn+& imin-1) ) else call stdlib${ii}$_${ri}$lartgs( b21d(imin), b21e(imin), nu,work(iv1tcs+imin-1), work(iv1tsn+& imin-1) ) end if temp = work(iv1tcs+imin-1)*b11d(imin) +work(iv1tsn+imin-1)*b11e(imin) b11e(imin) = work(iv1tcs+imin-1)*b11e(imin) -work(iv1tsn+imin-1)*b11d(imin) b11d(imin) = temp b11bulge = work(iv1tsn+imin-1)*b11d(imin+1) b11d(imin+1) = work(iv1tcs+imin-1)*b11d(imin+1) temp = work(iv1tcs+imin-1)*b21d(imin) +work(iv1tsn+imin-1)*b21e(imin) b21e(imin) = work(iv1tcs+imin-1)*b21e(imin) -work(iv1tsn+imin-1)*b21d(imin) b21d(imin) = temp b21bulge = work(iv1tsn+imin-1)*b21d(imin+1) b21d(imin+1) = work(iv1tcs+imin-1)*b21d(imin+1) ! compute theta(imin) theta( imin ) = atan2( sqrt( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ ),sqrt( b11d(imin)**2_${ik}$+& b11bulge**2_${ik}$ ) ) ! chase the bulges in b11(imin+1,imin) and b21(imin+1,imin) if( b11d(imin)**2_${ik}$+b11bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_${ri}$lartgp( b11bulge, b11d(imin), work(iu1sn+imin-1),work(iu1cs+imin-1),& r ) else if( mu <= nu ) then call stdlib${ii}$_${ri}$lartgs( b11e( imin ), b11d( imin + 1_${ik}$ ), mu,work(iu1cs+imin-1), work(& iu1sn+imin-1) ) else call stdlib${ii}$_${ri}$lartgs( b12d( imin ), b12e( imin ), nu,work(iu1cs+imin-1), work(& iu1sn+imin-1) ) end if if( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_${ri}$lartgp( b21bulge, b21d(imin), work(iu2sn+imin-1),work(iu2cs+imin-1),& r ) else if( nu < mu ) then call stdlib${ii}$_${ri}$lartgs( b21e( imin ), b21d( imin + 1_${ik}$ ), nu,work(iu2cs+imin-1), work(& iu2sn+imin-1) ) else call stdlib${ii}$_${ri}$lartgs( b22d(imin), b22e(imin), mu,work(iu2cs+imin-1), work(iu2sn+& imin-1) ) end if work(iu2cs+imin-1) = -work(iu2cs+imin-1) work(iu2sn+imin-1) = -work(iu2sn+imin-1) temp = work(iu1cs+imin-1)*b11e(imin) +work(iu1sn+imin-1)*b11d(imin+1) b11d(imin+1) = work(iu1cs+imin-1)*b11d(imin+1) -work(iu1sn+imin-1)*b11e(imin) b11e(imin) = temp if( imax > imin+1 ) then b11bulge = work(iu1sn+imin-1)*b11e(imin+1) b11e(imin+1) = work(iu1cs+imin-1)*b11e(imin+1) end if temp = work(iu1cs+imin-1)*b12d(imin) +work(iu1sn+imin-1)*b12e(imin) b12e(imin) = work(iu1cs+imin-1)*b12e(imin) -work(iu1sn+imin-1)*b12d(imin) b12d(imin) = temp b12bulge = work(iu1sn+imin-1)*b12d(imin+1) b12d(imin+1) = work(iu1cs+imin-1)*b12d(imin+1) temp = work(iu2cs+imin-1)*b21e(imin) +work(iu2sn+imin-1)*b21d(imin+1) b21d(imin+1) = work(iu2cs+imin-1)*b21d(imin+1) -work(iu2sn+imin-1)*b21e(imin) b21e(imin) = temp if( imax > imin+1 ) then b21bulge = work(iu2sn+imin-1)*b21e(imin+1) b21e(imin+1) = work(iu2cs+imin-1)*b21e(imin+1) end if temp = work(iu2cs+imin-1)*b22d(imin) +work(iu2sn+imin-1)*b22e(imin) b22e(imin) = work(iu2cs+imin-1)*b22e(imin) -work(iu2sn+imin-1)*b22d(imin) b22d(imin) = temp b22bulge = work(iu2sn+imin-1)*b22d(imin+1) b22d(imin+1) = work(iu2cs+imin-1)*b22d(imin+1) ! inner loop: chase bulges from b11(imin,imin+2), ! b12(imin,imin+1), b21(imin,imin+2), and b22(imin,imin+1) to ! bottom-right do i = imin+1, imax-1 ! compute phi(i-1) x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1) x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1) y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge phi(i-1) = atan2( sqrt(x1**2_${ik}$+x2**2_${ik}$), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11e(i-1)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21e(i-1)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12d(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i-1,i+1), b12(i-1,i), ! b21(i-1,i+1), and b22(i-1,i). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart21 ) then call stdlib${ii}$_${ri}$lartgp( x2, x1, work(iv1tsn+i-1), work(iv1tcs+i-1),r ) else if( .not. restart11 .and. restart21 ) then call stdlib${ii}$_${ri}$lartgp( b11bulge, b11e(i-1), work(iv1tsn+i-1),work(iv1tcs+i-1), & r ) else if( restart11 .and. .not. restart21 ) then call stdlib${ii}$_${ri}$lartgp( b21bulge, b21e(i-1), work(iv1tsn+i-1),work(iv1tcs+i-1), & r ) else if( mu <= nu ) then call stdlib${ii}$_${ri}$lartgs( b11d(i), b11e(i), mu, work(iv1tcs+i-1),work(iv1tsn+i-1) ) else call stdlib${ii}$_${ri}$lartgs( b21d(i), b21e(i), nu, work(iv1tcs+i-1),work(iv1tsn+i-1) ) end if work(iv1tcs+i-1) = -work(iv1tcs+i-1) work(iv1tsn+i-1) = -work(iv1tsn+i-1) if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_${ri}$lartgp( y2, y1, work(iv2tsn+i-1-1),work(iv2tcs+i-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_${ri}$lartgp( b12bulge, b12d(i-1), work(iv2tsn+i-1-1),work(iv2tcs+i-1-& 1_${ik}$), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_${ri}$lartgp( b22bulge, b22d(i-1), work(iv2tsn+i-1-1),work(iv2tcs+i-1-& 1_${ik}$), r ) else if( nu < mu ) then call stdlib${ii}$_${ri}$lartgs( b12e(i-1), b12d(i), nu, work(iv2tcs+i-1-1),work(iv2tsn+i-& 1_${ik}$-1) ) else call stdlib${ii}$_${ri}$lartgs( b22e(i-1), b22d(i), mu, work(iv2tcs+i-1-1),work(iv2tsn+i-& 1_${ik}$-1) ) end if temp = work(iv1tcs+i-1)*b11d(i) + work(iv1tsn+i-1)*b11e(i) b11e(i) = work(iv1tcs+i-1)*b11e(i) -work(iv1tsn+i-1)*b11d(i) b11d(i) = temp b11bulge = work(iv1tsn+i-1)*b11d(i+1) b11d(i+1) = work(iv1tcs+i-1)*b11d(i+1) temp = work(iv1tcs+i-1)*b21d(i) + work(iv1tsn+i-1)*b21e(i) b21e(i) = work(iv1tcs+i-1)*b21e(i) -work(iv1tsn+i-1)*b21d(i) b21d(i) = temp b21bulge = work(iv1tsn+i-1)*b21d(i+1) b21d(i+1) = work(iv1tcs+i-1)*b21d(i+1) temp = work(iv2tcs+i-1-1)*b12e(i-1) +work(iv2tsn+i-1-1)*b12d(i) b12d(i) = work(iv2tcs+i-1-1)*b12d(i) -work(iv2tsn+i-1-1)*b12e(i-1) b12e(i-1) = temp b12bulge = work(iv2tsn+i-1-1)*b12e(i) b12e(i) = work(iv2tcs+i-1-1)*b12e(i) temp = work(iv2tcs+i-1-1)*b22e(i-1) +work(iv2tsn+i-1-1)*b22d(i) b22d(i) = work(iv2tcs+i-1-1)*b22d(i) -work(iv2tsn+i-1-1)*b22e(i-1) b22e(i-1) = temp b22bulge = work(iv2tsn+i-1-1)*b22e(i) b22e(i) = work(iv2tcs+i-1-1)*b22e(i) ! compute theta(i) x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1) x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1) y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge theta(i) = atan2( sqrt(y1**2_${ik}$+y2**2_${ik}$), sqrt(x1**2_${ik}$+x2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11d(i)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12e(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21d(i)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22e(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i+1,i), b12(i+1,i-1), ! b21(i+1,i), and b22(i+1,i-1). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart12 ) then call stdlib${ii}$_${ri}$lartgp( x2, x1, work(iu1sn+i-1), work(iu1cs+i-1),r ) else if( .not. restart11 .and. restart12 ) then call stdlib${ii}$_${ri}$lartgp( b11bulge, b11d(i), work(iu1sn+i-1),work(iu1cs+i-1), r ) else if( restart11 .and. .not. restart12 ) then call stdlib${ii}$_${ri}$lartgp( b12bulge, b12e(i-1), work(iu1sn+i-1),work(iu1cs+i-1), r ) else if( mu <= nu ) then call stdlib${ii}$_${ri}$lartgs( b11e(i), b11d(i+1), mu, work(iu1cs+i-1),work(iu1sn+i-1) ) else call stdlib${ii}$_${ri}$lartgs( b12d(i), b12e(i), nu, work(iu1cs+i-1),work(iu1sn+i-1) ) end if if( .not. restart21 .and. .not. restart22 ) then call stdlib${ii}$_${ri}$lartgp( y2, y1, work(iu2sn+i-1), work(iu2cs+i-1),r ) else if( .not. restart21 .and. restart22 ) then call stdlib${ii}$_${ri}$lartgp( b21bulge, b21d(i), work(iu2sn+i-1),work(iu2cs+i-1), r ) else if( restart21 .and. .not. restart22 ) then call stdlib${ii}$_${ri}$lartgp( b22bulge, b22e(i-1), work(iu2sn+i-1),work(iu2cs+i-1), r ) else if( nu < mu ) then call stdlib${ii}$_${ri}$lartgs( b21e(i), b21e(i+1), nu, work(iu2cs+i-1),work(iu2sn+i-1) ) else call stdlib${ii}$_${ri}$lartgs( b22d(i), b22e(i), mu, work(iu2cs+i-1),work(iu2sn+i-1) ) end if work(iu2cs+i-1) = -work(iu2cs+i-1) work(iu2sn+i-1) = -work(iu2sn+i-1) temp = work(iu1cs+i-1)*b11e(i) + work(iu1sn+i-1)*b11d(i+1) b11d(i+1) = work(iu1cs+i-1)*b11d(i+1) -work(iu1sn+i-1)*b11e(i) b11e(i) = temp if( i < imax - 1_${ik}$ ) then b11bulge = work(iu1sn+i-1)*b11e(i+1) b11e(i+1) = work(iu1cs+i-1)*b11e(i+1) end if temp = work(iu2cs+i-1)*b21e(i) + work(iu2sn+i-1)*b21d(i+1) b21d(i+1) = work(iu2cs+i-1)*b21d(i+1) -work(iu2sn+i-1)*b21e(i) b21e(i) = temp if( i < imax - 1_${ik}$ ) then b21bulge = work(iu2sn+i-1)*b21e(i+1) b21e(i+1) = work(iu2cs+i-1)*b21e(i+1) end if temp = work(iu1cs+i-1)*b12d(i) + work(iu1sn+i-1)*b12e(i) b12e(i) = work(iu1cs+i-1)*b12e(i) - work(iu1sn+i-1)*b12d(i) b12d(i) = temp b12bulge = work(iu1sn+i-1)*b12d(i+1) b12d(i+1) = work(iu1cs+i-1)*b12d(i+1) temp = work(iu2cs+i-1)*b22d(i) + work(iu2sn+i-1)*b22e(i) b22e(i) = work(iu2cs+i-1)*b22e(i) - work(iu2sn+i-1)*b22d(i) b22d(i) = temp b22bulge = work(iu2sn+i-1)*b22d(i+1) b22d(i+1) = work(iu2cs+i-1)*b22d(i+1) end do ! compute phi(imax-1) x1 = sin(theta(imax-1))*b11e(imax-1) +cos(theta(imax-1))*b21e(imax-1) y1 = sin(theta(imax-1))*b12d(imax-1) +cos(theta(imax-1))*b22d(imax-1) y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge phi(imax-1) = atan2( abs(x1), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! chase bulges from b12(imax-1,imax) and b22(imax-1,imax) restart12 = b12d(imax-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(imax-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_${ri}$lartgp( y2, y1, work(iv2tsn+imax-1-1),work(iv2tcs+imax-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_${ri}$lartgp( b12bulge, b12d(imax-1), work(iv2tsn+imax-1-1),work(iv2tcs+& imax-1-1), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_${ri}$lartgp( b22bulge, b22d(imax-1), work(iv2tsn+imax-1-1),work(iv2tcs+& imax-1-1), r ) else if( nu < mu ) then call stdlib${ii}$_${ri}$lartgs( b12e(imax-1), b12d(imax), nu,work(iv2tcs+imax-1-1), work(& iv2tsn+imax-1-1) ) else call stdlib${ii}$_${ri}$lartgs( b22e(imax-1), b22d(imax), mu,work(iv2tcs+imax-1-1), work(& iv2tsn+imax-1-1) ) end if temp = work(iv2tcs+imax-1-1)*b12e(imax-1) +work(iv2tsn+imax-1-1)*b12d(imax) b12d(imax) = work(iv2tcs+imax-1-1)*b12d(imax) -work(iv2tsn+imax-1-1)*b12e(imax-1) b12e(imax-1) = temp temp = work(iv2tcs+imax-1-1)*b22e(imax-1) +work(iv2tsn+imax-1-1)*b22d(imax) b22d(imax) = work(iv2tcs+imax-1-1)*b22d(imax) -work(iv2tsn+imax-1-1)*b22e(imax-1) b22e(imax-1) = temp ! update singular vectors if( wantu1 ) then if( colmajor ) then call stdlib${ii}$_${ri}$lasr( 'R', 'V', 'F', p, imax-imin+1,work(iu1cs+imin-1), work(& iu1sn+imin-1),u1(1_${ik}$,imin), ldu1 ) else call stdlib${ii}$_${ri}$lasr( 'L', 'V', 'F', imax-imin+1, p,work(iu1cs+imin-1), work(& iu1sn+imin-1),u1(imin,1_${ik}$), ldu1 ) end if end if if( wantu2 ) then if( colmajor ) then call stdlib${ii}$_${ri}$lasr( 'R', 'V', 'F', m-p, imax-imin+1,work(iu2cs+imin-1), work(& iu2sn+imin-1),u2(1_${ik}$,imin), ldu2 ) else call stdlib${ii}$_${ri}$lasr( 'L', 'V', 'F', imax-imin+1, m-p,work(iu2cs+imin-1), work(& iu2sn+imin-1),u2(imin,1_${ik}$), ldu2 ) end if end if if( wantv1t ) then if( colmajor ) then call stdlib${ii}$_${ri}$lasr( 'L', 'V', 'F', imax-imin+1, q,work(iv1tcs+imin-1), work(& iv1tsn+imin-1),v1t(imin,1_${ik}$), ldv1t ) else call stdlib${ii}$_${ri}$lasr( 'R', 'V', 'F', q, imax-imin+1,work(iv1tcs+imin-1), work(& iv1tsn+imin-1),v1t(1_${ik}$,imin), ldv1t ) end if end if if( wantv2t ) then if( colmajor ) then call stdlib${ii}$_${ri}$lasr( 'L', 'V', 'F', imax-imin+1, m-q,work(iv2tcs+imin-1), work(& iv2tsn+imin-1),v2t(imin,1_${ik}$), ldv2t ) else call stdlib${ii}$_${ri}$lasr( 'R', 'V', 'F', m-q, imax-imin+1,work(iv2tcs+imin-1), work(& iv2tsn+imin-1),v2t(1_${ik}$,imin), ldv2t ) end if end if ! fix signs on b11(imax-1,imax) and b21(imax-1,imax) if( b11e(imax-1)+b21e(imax-1) > 0_${ik}$ ) then b11d(imax) = -b11d(imax) b21d(imax) = -b21d(imax) if( wantv1t ) then if( colmajor ) then call stdlib${ii}$_${ri}$scal( q, negone, v1t(imax,1_${ik}$), ldv1t ) else call stdlib${ii}$_${ri}$scal( q, negone, v1t(1_${ik}$,imax), 1_${ik}$ ) end if end if end if ! compute theta(imax) x1 = cos(phi(imax-1))*b11d(imax) +sin(phi(imax-1))*b12e(imax-1) y1 = cos(phi(imax-1))*b21d(imax) +sin(phi(imax-1))*b22e(imax-1) theta(imax) = atan2( abs(y1), abs(x1) ) ! fix signs on b11(imax,imax), b12(imax,imax-1), b21(imax,imax), ! and b22(imax,imax-1) if( b11d(imax)+b12e(imax-1) < 0_${ik}$ ) then b12d(imax) = -b12d(imax) if( wantu1 ) then if( colmajor ) then call stdlib${ii}$_${ri}$scal( p, negone, u1(1_${ik}$,imax), 1_${ik}$ ) else call stdlib${ii}$_${ri}$scal( p, negone, u1(imax,1_${ik}$), ldu1 ) end if end if end if if( b21d(imax)+b22e(imax-1) > 0_${ik}$ ) then b22d(imax) = -b22d(imax) if( wantu2 ) then if( colmajor ) then call stdlib${ii}$_${ri}$scal( m-p, negone, u2(1_${ik}$,imax), 1_${ik}$ ) else call stdlib${ii}$_${ri}$scal( m-p, negone, u2(imax,1_${ik}$), ldu2 ) end if end if end if ! fix signs on b12(imax,imax) and b22(imax,imax) if( b12d(imax)+b22d(imax) < 0_${ik}$ ) then if( wantv2t ) then if( colmajor ) then call stdlib${ii}$_${ri}$scal( m-q, negone, v2t(imax,1_${ik}$), ldv2t ) else call stdlib${ii}$_${ri}$scal( m-q, negone, v2t(1_${ik}$,imax), 1_${ik}$ ) end if end if end if ! test for negligible sines or cosines do i = imin, imax if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = imin, imax-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! deflate if (imax > 1_${ik}$) then do while( phi(imax-1) == zero ) imax = imax - 1_${ik}$ if (imax <= 1) exit end do end if if( imin > imax - 1_${ik}$ )imin = imax - 1_${ik}$ if (imin > 1_${ik}$) then do while (phi(imin-1) /= zero) imin = imin - 1_${ik}$ if (imin <= 1) exit end do end if ! repeat main iteration loop end do ! postprocessing: order theta from least to greatest do i = 1, q mini = i thetamin = theta(i) do j = i+1, q if( theta(j) < thetamin ) then mini = j thetamin = theta(j) end if end do if( mini /= i ) then theta(mini) = theta(i) theta(i) = thetamin if( colmajor ) then if( wantu1 )call stdlib${ii}$_${ri}$swap( p, u1(1_${ik}$,i), 1_${ik}$, u1(1_${ik}$,mini), 1_${ik}$ ) if( wantu2 )call stdlib${ii}$_${ri}$swap( m-p, u2(1_${ik}$,i), 1_${ik}$, u2(1_${ik}$,mini), 1_${ik}$ ) if( wantv1t )call stdlib${ii}$_${ri}$swap( q, v1t(i,1_${ik}$), ldv1t, v1t(mini,1_${ik}$), ldv1t ) if( wantv2t )call stdlib${ii}$_${ri}$swap( m-q, v2t(i,1_${ik}$), ldv2t, v2t(mini,1_${ik}$),ldv2t ) else if( wantu1 )call stdlib${ii}$_${ri}$swap( p, u1(i,1_${ik}$), ldu1, u1(mini,1_${ik}$), ldu1 ) if( wantu2 )call stdlib${ii}$_${ri}$swap( m-p, u2(i,1_${ik}$), ldu2, u2(mini,1_${ik}$), ldu2 ) if( wantv1t )call stdlib${ii}$_${ri}$swap( q, v1t(1_${ik}$,i), 1_${ik}$, v1t(1_${ik}$,mini), 1_${ik}$ ) if( wantv2t )call stdlib${ii}$_${ri}$swap( m-q, v2t(1_${ik}$,i), 1_${ik}$, v2t(1_${ik}$,mini), 1_${ik}$ ) end if end if end do return end subroutine stdlib${ii}$_${ri}$bbcsd #:endif #:endfor pure module subroutine stdlib${ii}$_cbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, u1, & ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d, b22e, rwork, & lrwork, info ) !! CBBCSD computes the CS decomposition of a unitary matrix in !! bidiagonal-block form, !! [ B11 | B12 0 0 ] !! [ 0 | 0 -I 0 ] !! X = [----------------] !! [ B21 | B22 0 0 ] !! [ 0 | 0 0 I ] !! [ C | -S 0 0 ] !! [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**H !! = [---------] [---------------] [---------] . !! [ | U2 ] [ S | C 0 0 ] [ | V2 ] !! [ 0 | 0 0 I ] !! X is M-by-M, its top-left block is P-by-Q, and Q must be no larger !! than P, M-P, or M-Q. (If Q is not the smallest index, then X must be !! transposed and/or permuted. This can be done in constant time using !! the TRANS and SIGNS options. See CUNCSD for details.) !! The bidiagonal matrices B11, B12, B21, and B22 are represented !! implicitly by angles THETA(1:Q) and PHI(1:Q-1). !! The unitary matrices U1, U2, V1T, and V2T are input/output. !! The input matrices are pre- or post-multiplied by the appropriate !! singular vector matrices. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, one, ten, cnegone ! Scalar Arguments character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, lrwork, m, p, q ! Array Arguments real(sp), intent(out) :: b11d(*), b11e(*), b12d(*), b12e(*), b21d(*), b21e(*), b22d(*),& b22e(*), rwork(*) real(sp), intent(inout) :: phi(*), theta(*) complex(sp), intent(inout) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*) ! =================================================================== ! Parameters integer(${ik}$), parameter :: maxitr = 6_${ik}$ real(sp), parameter :: hundred = 100.0_sp real(sp), parameter :: meighth = -0.125_sp real(sp), parameter :: piover2 = 1.57079632679489661923132169163975144210_sp ! Local Scalars logical(lk) :: colmajor, lquery, restart11, restart12, restart21, restart22, wantu1, & wantu2, wantv1t, wantv2t integer(${ik}$) :: i, imin, imax, iter, iu1cs, iu1sn, iu2cs, iu2sn, iv1tcs, iv1tsn, & iv2tcs, iv2tsn, j, lrworkmin, lrworkopt, maxit, mini real(sp) :: b11bulge, b12bulge, b21bulge, b22bulge, dummy, eps, mu, nu, r, sigma11, & sigma21, temp, thetamax, thetamin, thresh, tol, tolmul, unfl, x1, x2, y1, y2 ! Intrinsic Functions ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lrwork == -1_${ik}$ wantu1 = stdlib_lsame( jobu1, 'Y' ) wantu2 = stdlib_lsame( jobu2, 'Y' ) wantv1t = stdlib_lsame( jobv1t, 'Y' ) wantv2t = stdlib_lsame( jobv2t, 'Y' ) colmajor = .not. stdlib_lsame( trans, 'T' ) if( m < 0_${ik}$ ) then info = -6_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -7_${ik}$ else if( q < 0_${ik}$ .or. q > m ) then info = -8_${ik}$ else if( q > p .or. q > m-p .or. q > m-q ) then info = -8_${ik}$ else if( wantu1 .and. ldu1 < p ) then info = -12_${ik}$ else if( wantu2 .and. ldu2 < m-p ) then info = -14_${ik}$ else if( wantv1t .and. ldv1t < q ) then info = -16_${ik}$ else if( wantv2t .and. ldv2t < m-q ) then info = -18_${ik}$ end if ! quick return if q = 0 if( info == 0_${ik}$ .and. q == 0_${ik}$ ) then lrworkmin = 1_${ik}$ rwork(1_${ik}$) = lrworkmin return end if ! compute workspace if( info == 0_${ik}$ ) then iu1cs = 1_${ik}$ iu1sn = iu1cs + q iu2cs = iu1sn + q iu2sn = iu2cs + q iv1tcs = iu2sn + q iv1tsn = iv1tcs + q iv2tcs = iv1tsn + q iv2tsn = iv2tcs + q lrworkopt = iv2tsn + q - 1_${ik}$ lrworkmin = lrworkopt rwork(1_${ik}$) = lrworkopt if( lrwork < lrworkmin .and. .not. lquery ) then info = -28_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CBBCSD', -info ) return else if( lquery ) then return end if ! get machine constants eps = stdlib${ii}$_slamch( 'EPSILON' ) unfl = stdlib${ii}$_slamch( 'SAFE MINIMUM' ) tolmul = max( ten, min( hundred, eps**meighth ) ) tol = tolmul*eps thresh = max( tol, maxitr*q*q*unfl ) ! test for negligible sines or cosines do i = 1, q if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = 1, q-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! initial deflation imax = q do while( imax > 1 ) if( phi(imax-1) /= zero ) then exit end if imax = imax - 1_${ik}$ end do imin = imax - 1_${ik}$ if ( imin > 1_${ik}$ ) then do while( phi(imin-1) /= zero ) imin = imin - 1_${ik}$ if ( imin <= 1 ) exit end do end if ! initialize iteration counter maxit = maxitr*q*q iter = 0_${ik}$ ! begin main iteration loop do while( imax > 1 ) ! compute the matrix entries b11d(imin) = cos( theta(imin) ) b21d(imin) = -sin( theta(imin) ) do i = imin, imax - 1 b11e(i) = -sin( theta(i) ) * sin( phi(i) ) b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) ) b12d(i) = sin( theta(i) ) * cos( phi(i) ) b12e(i) = cos( theta(i+1) ) * sin( phi(i) ) b21e(i) = -cos( theta(i) ) * sin( phi(i) ) b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) ) b22d(i) = cos( theta(i) ) * cos( phi(i) ) b22e(i) = -sin( theta(i+1) ) * sin( phi(i) ) end do b12d(imax) = sin( theta(imax) ) b22d(imax) = cos( theta(imax) ) ! abort if not converging; otherwise, increment iter if( iter > maxit ) then info = 0_${ik}$ do i = 1, q if( phi(i) /= zero )info = info + 1_${ik}$ end do return end if iter = iter + imax - imin ! compute shifts thetamax = theta(imin) thetamin = theta(imin) do i = imin+1, imax if( theta(i) > thetamax )thetamax = theta(i) if( theta(i) < thetamin )thetamin = theta(i) end do if( thetamax > piover2 - thresh ) then ! zero on diagonals of b11 and b22; induce deflation with a ! zero shift mu = zero nu = one else if( thetamin < thresh ) then ! zero on diagonals of b12 and b22; induce deflation with a ! zero shift mu = one nu = zero else ! compute shifts for b11 and b21 and use the lesser call stdlib${ii}$_slas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,dummy ) call stdlib${ii}$_slas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,dummy ) if( sigma11 <= sigma21 ) then mu = sigma11 nu = sqrt( one - mu**2_${ik}$ ) if( mu < thresh ) then mu = zero nu = one end if else nu = sigma21 mu = sqrt( one - nu**2_${ik}$ ) if( nu < thresh ) then mu = one nu = zero end if end if end if ! rotate to produce bulges in b11 and b21 if( mu <= nu ) then call stdlib${ii}$_slartgs( b11d(imin), b11e(imin), mu,rwork(iv1tcs+imin-1), rwork(& iv1tsn+imin-1) ) else call stdlib${ii}$_slartgs( b21d(imin), b21e(imin), nu,rwork(iv1tcs+imin-1), rwork(& iv1tsn+imin-1) ) end if temp = rwork(iv1tcs+imin-1)*b11d(imin) +rwork(iv1tsn+imin-1)*b11e(imin) b11e(imin) = rwork(iv1tcs+imin-1)*b11e(imin) -rwork(iv1tsn+imin-1)*b11d(imin) b11d(imin) = temp b11bulge = rwork(iv1tsn+imin-1)*b11d(imin+1) b11d(imin+1) = rwork(iv1tcs+imin-1)*b11d(imin+1) temp = rwork(iv1tcs+imin-1)*b21d(imin) +rwork(iv1tsn+imin-1)*b21e(imin) b21e(imin) = rwork(iv1tcs+imin-1)*b21e(imin) -rwork(iv1tsn+imin-1)*b21d(imin) b21d(imin) = temp b21bulge = rwork(iv1tsn+imin-1)*b21d(imin+1) b21d(imin+1) = rwork(iv1tcs+imin-1)*b21d(imin+1) ! compute theta(imin) theta( imin ) = atan2( sqrt( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ ),sqrt( b11d(imin)**2_${ik}$+& b11bulge**2_${ik}$ ) ) ! chase the bulges in b11(imin+1,imin) and b21(imin+1,imin) if( b11d(imin)**2_${ik}$+b11bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_slartgp( b11bulge, b11d(imin), rwork(iu1sn+imin-1),rwork(iu1cs+imin-& 1_${ik}$), r ) else if( mu <= nu ) then call stdlib${ii}$_slartgs( b11e( imin ), b11d( imin + 1_${ik}$ ), mu,rwork(iu1cs+imin-1), & rwork(iu1sn+imin-1) ) else call stdlib${ii}$_slartgs( b12d( imin ), b12e( imin ), nu,rwork(iu1cs+imin-1), rwork(& iu1sn+imin-1) ) end if if( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_slartgp( b21bulge, b21d(imin), rwork(iu2sn+imin-1),rwork(iu2cs+imin-& 1_${ik}$), r ) else if( nu < mu ) then call stdlib${ii}$_slartgs( b21e( imin ), b21d( imin + 1_${ik}$ ), nu,rwork(iu2cs+imin-1), & rwork(iu2sn+imin-1) ) else call stdlib${ii}$_slartgs( b22d(imin), b22e(imin), mu,rwork(iu2cs+imin-1), rwork(iu2sn+& imin-1) ) end if rwork(iu2cs+imin-1) = -rwork(iu2cs+imin-1) rwork(iu2sn+imin-1) = -rwork(iu2sn+imin-1) temp = rwork(iu1cs+imin-1)*b11e(imin) +rwork(iu1sn+imin-1)*b11d(imin+1) b11d(imin+1) = rwork(iu1cs+imin-1)*b11d(imin+1) -rwork(iu1sn+imin-1)*b11e(imin) b11e(imin) = temp if( imax > imin+1 ) then b11bulge = rwork(iu1sn+imin-1)*b11e(imin+1) b11e(imin+1) = rwork(iu1cs+imin-1)*b11e(imin+1) end if temp = rwork(iu1cs+imin-1)*b12d(imin) +rwork(iu1sn+imin-1)*b12e(imin) b12e(imin) = rwork(iu1cs+imin-1)*b12e(imin) -rwork(iu1sn+imin-1)*b12d(imin) b12d(imin) = temp b12bulge = rwork(iu1sn+imin-1)*b12d(imin+1) b12d(imin+1) = rwork(iu1cs+imin-1)*b12d(imin+1) temp = rwork(iu2cs+imin-1)*b21e(imin) +rwork(iu2sn+imin-1)*b21d(imin+1) b21d(imin+1) = rwork(iu2cs+imin-1)*b21d(imin+1) -rwork(iu2sn+imin-1)*b21e(imin) b21e(imin) = temp if( imax > imin+1 ) then b21bulge = rwork(iu2sn+imin-1)*b21e(imin+1) b21e(imin+1) = rwork(iu2cs+imin-1)*b21e(imin+1) end if temp = rwork(iu2cs+imin-1)*b22d(imin) +rwork(iu2sn+imin-1)*b22e(imin) b22e(imin) = rwork(iu2cs+imin-1)*b22e(imin) -rwork(iu2sn+imin-1)*b22d(imin) b22d(imin) = temp b22bulge = rwork(iu2sn+imin-1)*b22d(imin+1) b22d(imin+1) = rwork(iu2cs+imin-1)*b22d(imin+1) ! inner loop: chase bulges from b11(imin,imin+2), ! b12(imin,imin+1), b21(imin,imin+2), and b22(imin,imin+1) to ! bottom-right do i = imin+1, imax-1 ! compute phi(i-1) x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1) x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1) y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge phi(i-1) = atan2( sqrt(x1**2_${ik}$+x2**2_${ik}$), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11e(i-1)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21e(i-1)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12d(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i-1,i+1), b12(i-1,i), ! b21(i-1,i+1), and b22(i-1,i). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart21 ) then call stdlib${ii}$_slartgp( x2, x1, rwork(iv1tsn+i-1),rwork(iv1tcs+i-1), r ) else if( .not. restart11 .and. restart21 ) then call stdlib${ii}$_slartgp( b11bulge, b11e(i-1), rwork(iv1tsn+i-1),rwork(iv1tcs+i-1),& r ) else if( restart11 .and. .not. restart21 ) then call stdlib${ii}$_slartgp( b21bulge, b21e(i-1), rwork(iv1tsn+i-1),rwork(iv1tcs+i-1),& r ) else if( mu <= nu ) then call stdlib${ii}$_slartgs( b11d(i), b11e(i), mu, rwork(iv1tcs+i-1),rwork(iv1tsn+i-1)& ) else call stdlib${ii}$_slartgs( b21d(i), b21e(i), nu, rwork(iv1tcs+i-1),rwork(iv1tsn+i-1)& ) end if rwork(iv1tcs+i-1) = -rwork(iv1tcs+i-1) rwork(iv1tsn+i-1) = -rwork(iv1tsn+i-1) if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( y2, y1, rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_slartgp( b12bulge, b12d(i-1), rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-& 1_${ik}$-1), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( b22bulge, b22d(i-1), rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-& 1_${ik}$-1), r ) else if( nu < mu ) then call stdlib${ii}$_slartgs( b12e(i-1), b12d(i), nu,rwork(iv2tcs+i-1-1), rwork(iv2tsn+& i-1-1) ) else call stdlib${ii}$_slartgs( b22e(i-1), b22d(i), mu,rwork(iv2tcs+i-1-1), rwork(iv2tsn+& i-1-1) ) end if temp = rwork(iv1tcs+i-1)*b11d(i) + rwork(iv1tsn+i-1)*b11e(i) b11e(i) = rwork(iv1tcs+i-1)*b11e(i) -rwork(iv1tsn+i-1)*b11d(i) b11d(i) = temp b11bulge = rwork(iv1tsn+i-1)*b11d(i+1) b11d(i+1) = rwork(iv1tcs+i-1)*b11d(i+1) temp = rwork(iv1tcs+i-1)*b21d(i) + rwork(iv1tsn+i-1)*b21e(i) b21e(i) = rwork(iv1tcs+i-1)*b21e(i) -rwork(iv1tsn+i-1)*b21d(i) b21d(i) = temp b21bulge = rwork(iv1tsn+i-1)*b21d(i+1) b21d(i+1) = rwork(iv1tcs+i-1)*b21d(i+1) temp = rwork(iv2tcs+i-1-1)*b12e(i-1) +rwork(iv2tsn+i-1-1)*b12d(i) b12d(i) = rwork(iv2tcs+i-1-1)*b12d(i) -rwork(iv2tsn+i-1-1)*b12e(i-1) b12e(i-1) = temp b12bulge = rwork(iv2tsn+i-1-1)*b12e(i) b12e(i) = rwork(iv2tcs+i-1-1)*b12e(i) temp = rwork(iv2tcs+i-1-1)*b22e(i-1) +rwork(iv2tsn+i-1-1)*b22d(i) b22d(i) = rwork(iv2tcs+i-1-1)*b22d(i) -rwork(iv2tsn+i-1-1)*b22e(i-1) b22e(i-1) = temp b22bulge = rwork(iv2tsn+i-1-1)*b22e(i) b22e(i) = rwork(iv2tcs+i-1-1)*b22e(i) ! compute theta(i) x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1) x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1) y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge theta(i) = atan2( sqrt(y1**2_${ik}$+y2**2_${ik}$), sqrt(x1**2_${ik}$+x2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11d(i)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12e(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21d(i)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22e(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i+1,i), b12(i+1,i-1), ! b21(i+1,i), and b22(i+1,i-1). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart12 ) then call stdlib${ii}$_slartgp( x2, x1, rwork(iu1sn+i-1), rwork(iu1cs+i-1),r ) else if( .not. restart11 .and. restart12 ) then call stdlib${ii}$_slartgp( b11bulge, b11d(i), rwork(iu1sn+i-1),rwork(iu1cs+i-1), r ) else if( restart11 .and. .not. restart12 ) then call stdlib${ii}$_slartgp( b12bulge, b12e(i-1), rwork(iu1sn+i-1),rwork(iu1cs+i-1), & r ) else if( mu <= nu ) then call stdlib${ii}$_slartgs( b11e(i), b11d(i+1), mu, rwork(iu1cs+i-1),rwork(iu1sn+i-1)& ) else call stdlib${ii}$_slartgs( b12d(i), b12e(i), nu, rwork(iu1cs+i-1),rwork(iu1sn+i-1) ) end if if( .not. restart21 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( y2, y1, rwork(iu2sn+i-1), rwork(iu2cs+i-1),r ) else if( .not. restart21 .and. restart22 ) then call stdlib${ii}$_slartgp( b21bulge, b21d(i), rwork(iu2sn+i-1),rwork(iu2cs+i-1), r ) else if( restart21 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( b22bulge, b22e(i-1), rwork(iu2sn+i-1),rwork(iu2cs+i-1), & r ) else if( nu < mu ) then call stdlib${ii}$_slartgs( b21e(i), b21e(i+1), nu, rwork(iu2cs+i-1),rwork(iu2sn+i-1)& ) else call stdlib${ii}$_slartgs( b22d(i), b22e(i), mu, rwork(iu2cs+i-1),rwork(iu2sn+i-1) ) end if rwork(iu2cs+i-1) = -rwork(iu2cs+i-1) rwork(iu2sn+i-1) = -rwork(iu2sn+i-1) temp = rwork(iu1cs+i-1)*b11e(i) + rwork(iu1sn+i-1)*b11d(i+1) b11d(i+1) = rwork(iu1cs+i-1)*b11d(i+1) -rwork(iu1sn+i-1)*b11e(i) b11e(i) = temp if( i < imax - 1_${ik}$ ) then b11bulge = rwork(iu1sn+i-1)*b11e(i+1) b11e(i+1) = rwork(iu1cs+i-1)*b11e(i+1) end if temp = rwork(iu2cs+i-1)*b21e(i) + rwork(iu2sn+i-1)*b21d(i+1) b21d(i+1) = rwork(iu2cs+i-1)*b21d(i+1) -rwork(iu2sn+i-1)*b21e(i) b21e(i) = temp if( i < imax - 1_${ik}$ ) then b21bulge = rwork(iu2sn+i-1)*b21e(i+1) b21e(i+1) = rwork(iu2cs+i-1)*b21e(i+1) end if temp = rwork(iu1cs+i-1)*b12d(i) + rwork(iu1sn+i-1)*b12e(i) b12e(i) = rwork(iu1cs+i-1)*b12e(i) -rwork(iu1sn+i-1)*b12d(i) b12d(i) = temp b12bulge = rwork(iu1sn+i-1)*b12d(i+1) b12d(i+1) = rwork(iu1cs+i-1)*b12d(i+1) temp = rwork(iu2cs+i-1)*b22d(i) + rwork(iu2sn+i-1)*b22e(i) b22e(i) = rwork(iu2cs+i-1)*b22e(i) -rwork(iu2sn+i-1)*b22d(i) b22d(i) = temp b22bulge = rwork(iu2sn+i-1)*b22d(i+1) b22d(i+1) = rwork(iu2cs+i-1)*b22d(i+1) end do ! compute phi(imax-1) x1 = sin(theta(imax-1))*b11e(imax-1) +cos(theta(imax-1))*b21e(imax-1) y1 = sin(theta(imax-1))*b12d(imax-1) +cos(theta(imax-1))*b22d(imax-1) y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge phi(imax-1) = atan2( abs(x1), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! chase bulges from b12(imax-1,imax) and b22(imax-1,imax) restart12 = b12d(imax-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(imax-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( y2, y1, rwork(iv2tsn+imax-1-1),rwork(iv2tcs+imax-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_slartgp( b12bulge, b12d(imax-1),rwork(iv2tsn+imax-1-1),rwork(iv2tcs+& imax-1-1), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( b22bulge, b22d(imax-1),rwork(iv2tsn+imax-1-1),rwork(iv2tcs+& imax-1-1), r ) else if( nu < mu ) then call stdlib${ii}$_slartgs( b12e(imax-1), b12d(imax), nu,rwork(iv2tcs+imax-1-1),rwork(& iv2tsn+imax-1-1) ) else call stdlib${ii}$_slartgs( b22e(imax-1), b22d(imax), mu,rwork(iv2tcs+imax-1-1),rwork(& iv2tsn+imax-1-1) ) end if temp = rwork(iv2tcs+imax-1-1)*b12e(imax-1) +rwork(iv2tsn+imax-1-1)*b12d(imax) b12d(imax) = rwork(iv2tcs+imax-1-1)*b12d(imax) -rwork(iv2tsn+imax-1-1)*b12e(imax-1) b12e(imax-1) = temp temp = rwork(iv2tcs+imax-1-1)*b22e(imax-1) +rwork(iv2tsn+imax-1-1)*b22d(imax) b22d(imax) = rwork(iv2tcs+imax-1-1)*b22d(imax) -rwork(iv2tsn+imax-1-1)*b22e(imax-1) b22e(imax-1) = temp ! update singular vectors if( wantu1 ) then if( colmajor ) then call stdlib${ii}$_clasr( 'R', 'V', 'F', p, imax-imin+1,rwork(iu1cs+imin-1), rwork(& iu1sn+imin-1),u1(1_${ik}$,imin), ldu1 ) else call stdlib${ii}$_clasr( 'L', 'V', 'F', imax-imin+1, p,rwork(iu1cs+imin-1), rwork(& iu1sn+imin-1),u1(imin,1_${ik}$), ldu1 ) end if end if if( wantu2 ) then if( colmajor ) then call stdlib${ii}$_clasr( 'R', 'V', 'F', m-p, imax-imin+1,rwork(iu2cs+imin-1), rwork(& iu2sn+imin-1),u2(1_${ik}$,imin), ldu2 ) else call stdlib${ii}$_clasr( 'L', 'V', 'F', imax-imin+1, m-p,rwork(iu2cs+imin-1), rwork(& iu2sn+imin-1),u2(imin,1_${ik}$), ldu2 ) end if end if if( wantv1t ) then if( colmajor ) then call stdlib${ii}$_clasr( 'L', 'V', 'F', imax-imin+1, q,rwork(iv1tcs+imin-1), rwork(& iv1tsn+imin-1),v1t(imin,1_${ik}$), ldv1t ) else call stdlib${ii}$_clasr( 'R', 'V', 'F', q, imax-imin+1,rwork(iv1tcs+imin-1), rwork(& iv1tsn+imin-1),v1t(1_${ik}$,imin), ldv1t ) end if end if if( wantv2t ) then if( colmajor ) then call stdlib${ii}$_clasr( 'L', 'V', 'F', imax-imin+1, m-q,rwork(iv2tcs+imin-1), & rwork(iv2tsn+imin-1),v2t(imin,1_${ik}$), ldv2t ) else call stdlib${ii}$_clasr( 'R', 'V', 'F', m-q, imax-imin+1,rwork(iv2tcs+imin-1), & rwork(iv2tsn+imin-1),v2t(1_${ik}$,imin), ldv2t ) end if end if ! fix signs on b11(imax-1,imax) and b21(imax-1,imax) if( b11e(imax-1)+b21e(imax-1) > 0_${ik}$ ) then b11d(imax) = -b11d(imax) b21d(imax) = -b21d(imax) if( wantv1t ) then if( colmajor ) then call stdlib${ii}$_cscal( q, cnegone, v1t(imax,1_${ik}$), ldv1t ) else call stdlib${ii}$_cscal( q, cnegone, v1t(1_${ik}$,imax), 1_${ik}$ ) end if end if end if ! compute theta(imax) x1 = cos(phi(imax-1))*b11d(imax) +sin(phi(imax-1))*b12e(imax-1) y1 = cos(phi(imax-1))*b21d(imax) +sin(phi(imax-1))*b22e(imax-1) theta(imax) = atan2( abs(y1), abs(x1) ) ! fix signs on b11(imax,imax), b12(imax,imax-1), b21(imax,imax), ! and b22(imax,imax-1) if( b11d(imax)+b12e(imax-1) < 0_${ik}$ ) then b12d(imax) = -b12d(imax) if( wantu1 ) then if( colmajor ) then call stdlib${ii}$_cscal( p, cnegone, u1(1_${ik}$,imax), 1_${ik}$ ) else call stdlib${ii}$_cscal( p, cnegone, u1(imax,1_${ik}$), ldu1 ) end if end if end if if( b21d(imax)+b22e(imax-1) > 0_${ik}$ ) then b22d(imax) = -b22d(imax) if( wantu2 ) then if( colmajor ) then call stdlib${ii}$_cscal( m-p, cnegone, u2(1_${ik}$,imax), 1_${ik}$ ) else call stdlib${ii}$_cscal( m-p, cnegone, u2(imax,1_${ik}$), ldu2 ) end if end if end if ! fix signs on b12(imax,imax) and b22(imax,imax) if( b12d(imax)+b22d(imax) < 0_${ik}$ ) then if( wantv2t ) then if( colmajor ) then call stdlib${ii}$_cscal( m-q, cnegone, v2t(imax,1_${ik}$), ldv2t ) else call stdlib${ii}$_cscal( m-q, cnegone, v2t(1_${ik}$,imax), 1_${ik}$ ) end if end if end if ! test for negligible sines or cosines do i = imin, imax if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = imin, imax-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! deflate if (imax > 1_${ik}$) then do while( phi(imax-1) == zero ) imax = imax - 1_${ik}$ if (imax <= 1) exit end do end if if( imin > imax - 1_${ik}$ )imin = imax - 1_${ik}$ if (imin > 1_${ik}$) then do while (phi(imin-1) /= zero) imin = imin - 1_${ik}$ if (imin <= 1) exit end do end if ! repeat main iteration loop end do ! postprocessing: order theta from least to greatest do i = 1, q mini = i thetamin = theta(i) do j = i+1, q if( theta(j) < thetamin ) then mini = j thetamin = theta(j) end if end do if( mini /= i ) then theta(mini) = theta(i) theta(i) = thetamin if( colmajor ) then if( wantu1 )call stdlib${ii}$_cswap( p, u1(1_${ik}$,i), 1_${ik}$, u1(1_${ik}$,mini), 1_${ik}$ ) if( wantu2 )call stdlib${ii}$_cswap( m-p, u2(1_${ik}$,i), 1_${ik}$, u2(1_${ik}$,mini), 1_${ik}$ ) if( wantv1t )call stdlib${ii}$_cswap( q, v1t(i,1_${ik}$), ldv1t, v1t(mini,1_${ik}$), ldv1t ) if( wantv2t )call stdlib${ii}$_cswap( m-q, v2t(i,1_${ik}$), ldv2t, v2t(mini,1_${ik}$),ldv2t ) else if( wantu1 )call stdlib${ii}$_cswap( p, u1(i,1_${ik}$), ldu1, u1(mini,1_${ik}$), ldu1 ) if( wantu2 )call stdlib${ii}$_cswap( m-p, u2(i,1_${ik}$), ldu2, u2(mini,1_${ik}$), ldu2 ) if( wantv1t )call stdlib${ii}$_cswap( q, v1t(1_${ik}$,i), 1_${ik}$, v1t(1_${ik}$,mini), 1_${ik}$ ) if( wantv2t )call stdlib${ii}$_cswap( m-q, v2t(1_${ik}$,i), 1_${ik}$, v2t(1_${ik}$,mini), 1_${ik}$ ) end if end if end do return end subroutine stdlib${ii}$_cbbcsd pure module subroutine stdlib${ii}$_zbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, u1, & !! ZBBCSD computes the CS decomposition of a unitary matrix in !! bidiagonal-block form, !! [ B11 | B12 0 0 ] !! [ 0 | 0 -I 0 ] !! X = [----------------] !! [ B21 | B22 0 0 ] !! [ 0 | 0 0 I ] !! [ C | -S 0 0 ] !! [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**H !! = [---------] [---------------] [---------] . !! [ | U2 ] [ S | C 0 0 ] [ | V2 ] !! [ 0 | 0 0 I ] !! X is M-by-M, its top-left block is P-by-Q, and Q must be no larger !! than P, M-P, or M-Q. (If Q is not the smallest index, then X must be !! transposed and/or permuted. This can be done in constant time using !! the TRANS and SIGNS options. See ZUNCSD for details.) !! The bidiagonal matrices B11, B12, B21, and B22 are represented !! implicitly by angles THETA(1:Q) and PHI(1:Q-1). !! The unitary matrices U1, U2, V1T, and V2T are input/output. !! The input matrices are pre- or post-multiplied by the appropriate !! singular vector matrices. ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d, b22e, rwork, & lrwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, one, ten, cnegone ! Scalar Arguments character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, lrwork, m, p, q ! Array Arguments real(dp), intent(out) :: b11d(*), b11e(*), b12d(*), b12e(*), b21d(*), b21e(*), b22d(*),& b22e(*), rwork(*) real(dp), intent(inout) :: phi(*), theta(*) complex(dp), intent(inout) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*) ! =================================================================== ! Parameters integer(${ik}$), parameter :: maxitr = 6_${ik}$ real(dp), parameter :: hundred = 100.0_dp real(dp), parameter :: meighth = -0.125_dp real(dp), parameter :: piover2 = 1.57079632679489661923132169163975144210_dp ! Local Scalars logical(lk) :: colmajor, lquery, restart11, restart12, restart21, restart22, wantu1, & wantu2, wantv1t, wantv2t integer(${ik}$) :: i, imin, imax, iter, iu1cs, iu1sn, iu2cs, iu2sn, iv1tcs, iv1tsn, & iv2tcs, iv2tsn, j, lrworkmin, lrworkopt, maxit, mini real(dp) :: b11bulge, b12bulge, b21bulge, b22bulge, dummy, eps, mu, nu, r, sigma11, & sigma21, temp, thetamax, thetamin, thresh, tol, tolmul, unfl, x1, x2, y1, y2 ! Intrinsic Functions ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lrwork == -1_${ik}$ wantu1 = stdlib_lsame( jobu1, 'Y' ) wantu2 = stdlib_lsame( jobu2, 'Y' ) wantv1t = stdlib_lsame( jobv1t, 'Y' ) wantv2t = stdlib_lsame( jobv2t, 'Y' ) colmajor = .not. stdlib_lsame( trans, 'T' ) if( m < 0_${ik}$ ) then info = -6_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -7_${ik}$ else if( q < 0_${ik}$ .or. q > m ) then info = -8_${ik}$ else if( q > p .or. q > m-p .or. q > m-q ) then info = -8_${ik}$ else if( wantu1 .and. ldu1 < p ) then info = -12_${ik}$ else if( wantu2 .and. ldu2 < m-p ) then info = -14_${ik}$ else if( wantv1t .and. ldv1t < q ) then info = -16_${ik}$ else if( wantv2t .and. ldv2t < m-q ) then info = -18_${ik}$ end if ! quick return if q = 0 if( info == 0_${ik}$ .and. q == 0_${ik}$ ) then lrworkmin = 1_${ik}$ rwork(1_${ik}$) = lrworkmin return end if ! compute workspace if( info == 0_${ik}$ ) then iu1cs = 1_${ik}$ iu1sn = iu1cs + q iu2cs = iu1sn + q iu2sn = iu2cs + q iv1tcs = iu2sn + q iv1tsn = iv1tcs + q iv2tcs = iv1tsn + q iv2tsn = iv2tcs + q lrworkopt = iv2tsn + q - 1_${ik}$ lrworkmin = lrworkopt rwork(1_${ik}$) = lrworkopt if( lrwork < lrworkmin .and. .not. lquery ) then info = -28_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZBBCSD', -info ) return else if( lquery ) then return end if ! get machine constants eps = stdlib${ii}$_dlamch( 'EPSILON' ) unfl = stdlib${ii}$_dlamch( 'SAFE MINIMUM' ) tolmul = max( ten, min( hundred, eps**meighth ) ) tol = tolmul*eps thresh = max( tol, maxitr*q*q*unfl ) ! test for negligible sines or cosines do i = 1, q if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = 1, q-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! initial deflation imax = q do while( imax > 1 ) if( phi(imax-1) /= zero ) then exit end if imax = imax - 1_${ik}$ end do imin = imax - 1_${ik}$ if ( imin > 1_${ik}$ ) then do while( phi(imin-1) /= zero ) imin = imin - 1_${ik}$ if ( imin <= 1 ) exit end do end if ! initialize iteration counter maxit = maxitr*q*q iter = 0_${ik}$ ! begin main iteration loop do while( imax > 1 ) ! compute the matrix entries b11d(imin) = cos( theta(imin) ) b21d(imin) = -sin( theta(imin) ) do i = imin, imax - 1 b11e(i) = -sin( theta(i) ) * sin( phi(i) ) b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) ) b12d(i) = sin( theta(i) ) * cos( phi(i) ) b12e(i) = cos( theta(i+1) ) * sin( phi(i) ) b21e(i) = -cos( theta(i) ) * sin( phi(i) ) b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) ) b22d(i) = cos( theta(i) ) * cos( phi(i) ) b22e(i) = -sin( theta(i+1) ) * sin( phi(i) ) end do b12d(imax) = sin( theta(imax) ) b22d(imax) = cos( theta(imax) ) ! abort if not converging; otherwise, increment iter if( iter > maxit ) then info = 0_${ik}$ do i = 1, q if( phi(i) /= zero )info = info + 1_${ik}$ end do return end if iter = iter + imax - imin ! compute shifts thetamax = theta(imin) thetamin = theta(imin) do i = imin+1, imax if( theta(i) > thetamax )thetamax = theta(i) if( theta(i) < thetamin )thetamin = theta(i) end do if( thetamax > piover2 - thresh ) then ! zero on diagonals of b11 and b22; induce deflation with a ! zero shift mu = zero nu = one else if( thetamin < thresh ) then ! zero on diagonals of b12 and b22; induce deflation with a ! zero shift mu = one nu = zero else ! compute shifts for b11 and b21 and use the lesser call stdlib${ii}$_dlas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,dummy ) call stdlib${ii}$_dlas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,dummy ) if( sigma11 <= sigma21 ) then mu = sigma11 nu = sqrt( one - mu**2_${ik}$ ) if( mu < thresh ) then mu = zero nu = one end if else nu = sigma21 mu = sqrt( one - nu**2_${ik}$ ) if( nu < thresh ) then mu = one nu = zero end if end if end if ! rotate to produce bulges in b11 and b21 if( mu <= nu ) then call stdlib${ii}$_dlartgs( b11d(imin), b11e(imin), mu,rwork(iv1tcs+imin-1), rwork(& iv1tsn+imin-1) ) else call stdlib${ii}$_dlartgs( b21d(imin), b21e(imin), nu,rwork(iv1tcs+imin-1), rwork(& iv1tsn+imin-1) ) end if temp = rwork(iv1tcs+imin-1)*b11d(imin) +rwork(iv1tsn+imin-1)*b11e(imin) b11e(imin) = rwork(iv1tcs+imin-1)*b11e(imin) -rwork(iv1tsn+imin-1)*b11d(imin) b11d(imin) = temp b11bulge = rwork(iv1tsn+imin-1)*b11d(imin+1) b11d(imin+1) = rwork(iv1tcs+imin-1)*b11d(imin+1) temp = rwork(iv1tcs+imin-1)*b21d(imin) +rwork(iv1tsn+imin-1)*b21e(imin) b21e(imin) = rwork(iv1tcs+imin-1)*b21e(imin) -rwork(iv1tsn+imin-1)*b21d(imin) b21d(imin) = temp b21bulge = rwork(iv1tsn+imin-1)*b21d(imin+1) b21d(imin+1) = rwork(iv1tcs+imin-1)*b21d(imin+1) ! compute theta(imin) theta( imin ) = atan2( sqrt( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ ),sqrt( b11d(imin)**2_${ik}$+& b11bulge**2_${ik}$ ) ) ! chase the bulges in b11(imin+1,imin) and b21(imin+1,imin) if( b11d(imin)**2_${ik}$+b11bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_dlartgp( b11bulge, b11d(imin), rwork(iu1sn+imin-1),rwork(iu1cs+imin-& 1_${ik}$), r ) else if( mu <= nu ) then call stdlib${ii}$_dlartgs( b11e( imin ), b11d( imin + 1_${ik}$ ), mu,rwork(iu1cs+imin-1), & rwork(iu1sn+imin-1) ) else call stdlib${ii}$_dlartgs( b12d( imin ), b12e( imin ), nu,rwork(iu1cs+imin-1), rwork(& iu1sn+imin-1) ) end if if( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_dlartgp( b21bulge, b21d(imin), rwork(iu2sn+imin-1),rwork(iu2cs+imin-& 1_${ik}$), r ) else if( nu < mu ) then call stdlib${ii}$_dlartgs( b21e( imin ), b21d( imin + 1_${ik}$ ), nu,rwork(iu2cs+imin-1), & rwork(iu2sn+imin-1) ) else call stdlib${ii}$_dlartgs( b22d(imin), b22e(imin), mu,rwork(iu2cs+imin-1), rwork(iu2sn+& imin-1) ) end if rwork(iu2cs+imin-1) = -rwork(iu2cs+imin-1) rwork(iu2sn+imin-1) = -rwork(iu2sn+imin-1) temp = rwork(iu1cs+imin-1)*b11e(imin) +rwork(iu1sn+imin-1)*b11d(imin+1) b11d(imin+1) = rwork(iu1cs+imin-1)*b11d(imin+1) -rwork(iu1sn+imin-1)*b11e(imin) b11e(imin) = temp if( imax > imin+1 ) then b11bulge = rwork(iu1sn+imin-1)*b11e(imin+1) b11e(imin+1) = rwork(iu1cs+imin-1)*b11e(imin+1) end if temp = rwork(iu1cs+imin-1)*b12d(imin) +rwork(iu1sn+imin-1)*b12e(imin) b12e(imin) = rwork(iu1cs+imin-1)*b12e(imin) -rwork(iu1sn+imin-1)*b12d(imin) b12d(imin) = temp b12bulge = rwork(iu1sn+imin-1)*b12d(imin+1) b12d(imin+1) = rwork(iu1cs+imin-1)*b12d(imin+1) temp = rwork(iu2cs+imin-1)*b21e(imin) +rwork(iu2sn+imin-1)*b21d(imin+1) b21d(imin+1) = rwork(iu2cs+imin-1)*b21d(imin+1) -rwork(iu2sn+imin-1)*b21e(imin) b21e(imin) = temp if( imax > imin+1 ) then b21bulge = rwork(iu2sn+imin-1)*b21e(imin+1) b21e(imin+1) = rwork(iu2cs+imin-1)*b21e(imin+1) end if temp = rwork(iu2cs+imin-1)*b22d(imin) +rwork(iu2sn+imin-1)*b22e(imin) b22e(imin) = rwork(iu2cs+imin-1)*b22e(imin) -rwork(iu2sn+imin-1)*b22d(imin) b22d(imin) = temp b22bulge = rwork(iu2sn+imin-1)*b22d(imin+1) b22d(imin+1) = rwork(iu2cs+imin-1)*b22d(imin+1) ! inner loop: chase bulges from b11(imin,imin+2), ! b12(imin,imin+1), b21(imin,imin+2), and b22(imin,imin+1) to ! bottom-right do i = imin+1, imax-1 ! compute phi(i-1) x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1) x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1) y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge phi(i-1) = atan2( sqrt(x1**2_${ik}$+x2**2_${ik}$), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11e(i-1)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21e(i-1)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12d(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i-1,i+1), b12(i-1,i), ! b21(i-1,i+1), and b22(i-1,i). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart21 ) then call stdlib${ii}$_dlartgp( x2, x1, rwork(iv1tsn+i-1),rwork(iv1tcs+i-1), r ) else if( .not. restart11 .and. restart21 ) then call stdlib${ii}$_dlartgp( b11bulge, b11e(i-1), rwork(iv1tsn+i-1),rwork(iv1tcs+i-1),& r ) else if( restart11 .and. .not. restart21 ) then call stdlib${ii}$_dlartgp( b21bulge, b21e(i-1), rwork(iv1tsn+i-1),rwork(iv1tcs+i-1),& r ) else if( mu <= nu ) then call stdlib${ii}$_dlartgs( b11d(i), b11e(i), mu, rwork(iv1tcs+i-1),rwork(iv1tsn+i-1)& ) else call stdlib${ii}$_dlartgs( b21d(i), b21e(i), nu, rwork(iv1tcs+i-1),rwork(iv1tsn+i-1)& ) end if rwork(iv1tcs+i-1) = -rwork(iv1tcs+i-1) rwork(iv1tsn+i-1) = -rwork(iv1tsn+i-1) if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_dlartgp( y2, y1, rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_dlartgp( b12bulge, b12d(i-1), rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-& 1_${ik}$-1), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_dlartgp( b22bulge, b22d(i-1), rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-& 1_${ik}$-1), r ) else if( nu < mu ) then call stdlib${ii}$_dlartgs( b12e(i-1), b12d(i), nu,rwork(iv2tcs+i-1-1), rwork(iv2tsn+& i-1-1) ) else call stdlib${ii}$_dlartgs( b22e(i-1), b22d(i), mu,rwork(iv2tcs+i-1-1), rwork(iv2tsn+& i-1-1) ) end if temp = rwork(iv1tcs+i-1)*b11d(i) + rwork(iv1tsn+i-1)*b11e(i) b11e(i) = rwork(iv1tcs+i-1)*b11e(i) -rwork(iv1tsn+i-1)*b11d(i) b11d(i) = temp b11bulge = rwork(iv1tsn+i-1)*b11d(i+1) b11d(i+1) = rwork(iv1tcs+i-1)*b11d(i+1) temp = rwork(iv1tcs+i-1)*b21d(i) + rwork(iv1tsn+i-1)*b21e(i) b21e(i) = rwork(iv1tcs+i-1)*b21e(i) -rwork(iv1tsn+i-1)*b21d(i) b21d(i) = temp b21bulge = rwork(iv1tsn+i-1)*b21d(i+1) b21d(i+1) = rwork(iv1tcs+i-1)*b21d(i+1) temp = rwork(iv2tcs+i-1-1)*b12e(i-1) +rwork(iv2tsn+i-1-1)*b12d(i) b12d(i) = rwork(iv2tcs+i-1-1)*b12d(i) -rwork(iv2tsn+i-1-1)*b12e(i-1) b12e(i-1) = temp b12bulge = rwork(iv2tsn+i-1-1)*b12e(i) b12e(i) = rwork(iv2tcs+i-1-1)*b12e(i) temp = rwork(iv2tcs+i-1-1)*b22e(i-1) +rwork(iv2tsn+i-1-1)*b22d(i) b22d(i) = rwork(iv2tcs+i-1-1)*b22d(i) -rwork(iv2tsn+i-1-1)*b22e(i-1) b22e(i-1) = temp b22bulge = rwork(iv2tsn+i-1-1)*b22e(i) b22e(i) = rwork(iv2tcs+i-1-1)*b22e(i) ! compute theta(i) x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1) x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1) y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge theta(i) = atan2( sqrt(y1**2_${ik}$+y2**2_${ik}$), sqrt(x1**2_${ik}$+x2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11d(i)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12e(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21d(i)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22e(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i+1,i), b12(i+1,i-1), ! b21(i+1,i), and b22(i+1,i-1). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart12 ) then call stdlib${ii}$_dlartgp( x2, x1, rwork(iu1sn+i-1), rwork(iu1cs+i-1),r ) else if( .not. restart11 .and. restart12 ) then call stdlib${ii}$_dlartgp( b11bulge, b11d(i), rwork(iu1sn+i-1),rwork(iu1cs+i-1), r ) else if( restart11 .and. .not. restart12 ) then call stdlib${ii}$_dlartgp( b12bulge, b12e(i-1), rwork(iu1sn+i-1),rwork(iu1cs+i-1), & r ) else if( mu <= nu ) then call stdlib${ii}$_dlartgs( b11e(i), b11d(i+1), mu, rwork(iu1cs+i-1),rwork(iu1sn+i-1)& ) else call stdlib${ii}$_dlartgs( b12d(i), b12e(i), nu, rwork(iu1cs+i-1),rwork(iu1sn+i-1) ) end if if( .not. restart21 .and. .not. restart22 ) then call stdlib${ii}$_dlartgp( y2, y1, rwork(iu2sn+i-1), rwork(iu2cs+i-1),r ) else if( .not. restart21 .and. restart22 ) then call stdlib${ii}$_dlartgp( b21bulge, b21d(i), rwork(iu2sn+i-1),rwork(iu2cs+i-1), r ) else if( restart21 .and. .not. restart22 ) then call stdlib${ii}$_dlartgp( b22bulge, b22e(i-1), rwork(iu2sn+i-1),rwork(iu2cs+i-1), & r ) else if( nu < mu ) then call stdlib${ii}$_dlartgs( b21e(i), b21e(i+1), nu, rwork(iu2cs+i-1),rwork(iu2sn+i-1)& ) else call stdlib${ii}$_dlartgs( b22d(i), b22e(i), mu, rwork(iu2cs+i-1),rwork(iu2sn+i-1) ) end if rwork(iu2cs+i-1) = -rwork(iu2cs+i-1) rwork(iu2sn+i-1) = -rwork(iu2sn+i-1) temp = rwork(iu1cs+i-1)*b11e(i) + rwork(iu1sn+i-1)*b11d(i+1) b11d(i+1) = rwork(iu1cs+i-1)*b11d(i+1) -rwork(iu1sn+i-1)*b11e(i) b11e(i) = temp if( i < imax - 1_${ik}$ ) then b11bulge = rwork(iu1sn+i-1)*b11e(i+1) b11e(i+1) = rwork(iu1cs+i-1)*b11e(i+1) end if temp = rwork(iu2cs+i-1)*b21e(i) + rwork(iu2sn+i-1)*b21d(i+1) b21d(i+1) = rwork(iu2cs+i-1)*b21d(i+1) -rwork(iu2sn+i-1)*b21e(i) b21e(i) = temp if( i < imax - 1_${ik}$ ) then b21bulge = rwork(iu2sn+i-1)*b21e(i+1) b21e(i+1) = rwork(iu2cs+i-1)*b21e(i+1) end if temp = rwork(iu1cs+i-1)*b12d(i) + rwork(iu1sn+i-1)*b12e(i) b12e(i) = rwork(iu1cs+i-1)*b12e(i) -rwork(iu1sn+i-1)*b12d(i) b12d(i) = temp b12bulge = rwork(iu1sn+i-1)*b12d(i+1) b12d(i+1) = rwork(iu1cs+i-1)*b12d(i+1) temp = rwork(iu2cs+i-1)*b22d(i) + rwork(iu2sn+i-1)*b22e(i) b22e(i) = rwork(iu2cs+i-1)*b22e(i) -rwork(iu2sn+i-1)*b22d(i) b22d(i) = temp b22bulge = rwork(iu2sn+i-1)*b22d(i+1) b22d(i+1) = rwork(iu2cs+i-1)*b22d(i+1) end do ! compute phi(imax-1) x1 = sin(theta(imax-1))*b11e(imax-1) +cos(theta(imax-1))*b21e(imax-1) y1 = sin(theta(imax-1))*b12d(imax-1) +cos(theta(imax-1))*b22d(imax-1) y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge phi(imax-1) = atan2( abs(x1), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! chase bulges from b12(imax-1,imax) and b22(imax-1,imax) restart12 = b12d(imax-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(imax-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_dlartgp( y2, y1, rwork(iv2tsn+imax-1-1),rwork(iv2tcs+imax-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_dlartgp( b12bulge, b12d(imax-1),rwork(iv2tsn+imax-1-1),rwork(iv2tcs+& imax-1-1), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_dlartgp( b22bulge, b22d(imax-1),rwork(iv2tsn+imax-1-1),rwork(iv2tcs+& imax-1-1), r ) else if( nu < mu ) then call stdlib${ii}$_dlartgs( b12e(imax-1), b12d(imax), nu,rwork(iv2tcs+imax-1-1),rwork(& iv2tsn+imax-1-1) ) else call stdlib${ii}$_dlartgs( b22e(imax-1), b22d(imax), mu,rwork(iv2tcs+imax-1-1),rwork(& iv2tsn+imax-1-1) ) end if temp = rwork(iv2tcs+imax-1-1)*b12e(imax-1) +rwork(iv2tsn+imax-1-1)*b12d(imax) b12d(imax) = rwork(iv2tcs+imax-1-1)*b12d(imax) -rwork(iv2tsn+imax-1-1)*b12e(imax-1) b12e(imax-1) = temp temp = rwork(iv2tcs+imax-1-1)*b22e(imax-1) +rwork(iv2tsn+imax-1-1)*b22d(imax) b22d(imax) = rwork(iv2tcs+imax-1-1)*b22d(imax) -rwork(iv2tsn+imax-1-1)*b22e(imax-1) b22e(imax-1) = temp ! update singular vectors if( wantu1 ) then if( colmajor ) then call stdlib${ii}$_zlasr( 'R', 'V', 'F', p, imax-imin+1,rwork(iu1cs+imin-1), rwork(& iu1sn+imin-1),u1(1_${ik}$,imin), ldu1 ) else call stdlib${ii}$_zlasr( 'L', 'V', 'F', imax-imin+1, p,rwork(iu1cs+imin-1), rwork(& iu1sn+imin-1),u1(imin,1_${ik}$), ldu1 ) end if end if if( wantu2 ) then if( colmajor ) then call stdlib${ii}$_zlasr( 'R', 'V', 'F', m-p, imax-imin+1,rwork(iu2cs+imin-1), rwork(& iu2sn+imin-1),u2(1_${ik}$,imin), ldu2 ) else call stdlib${ii}$_zlasr( 'L', 'V', 'F', imax-imin+1, m-p,rwork(iu2cs+imin-1), rwork(& iu2sn+imin-1),u2(imin,1_${ik}$), ldu2 ) end if end if if( wantv1t ) then if( colmajor ) then call stdlib${ii}$_zlasr( 'L', 'V', 'F', imax-imin+1, q,rwork(iv1tcs+imin-1), rwork(& iv1tsn+imin-1),v1t(imin,1_${ik}$), ldv1t ) else call stdlib${ii}$_zlasr( 'R', 'V', 'F', q, imax-imin+1,rwork(iv1tcs+imin-1), rwork(& iv1tsn+imin-1),v1t(1_${ik}$,imin), ldv1t ) end if end if if( wantv2t ) then if( colmajor ) then call stdlib${ii}$_zlasr( 'L', 'V', 'F', imax-imin+1, m-q,rwork(iv2tcs+imin-1), & rwork(iv2tsn+imin-1),v2t(imin,1_${ik}$), ldv2t ) else call stdlib${ii}$_zlasr( 'R', 'V', 'F', m-q, imax-imin+1,rwork(iv2tcs+imin-1), & rwork(iv2tsn+imin-1),v2t(1_${ik}$,imin), ldv2t ) end if end if ! fix signs on b11(imax-1,imax) and b21(imax-1,imax) if( b11e(imax-1)+b21e(imax-1) > 0_${ik}$ ) then b11d(imax) = -b11d(imax) b21d(imax) = -b21d(imax) if( wantv1t ) then if( colmajor ) then call stdlib${ii}$_zscal( q, cnegone, v1t(imax,1_${ik}$), ldv1t ) else call stdlib${ii}$_zscal( q, cnegone, v1t(1_${ik}$,imax), 1_${ik}$ ) end if end if end if ! compute theta(imax) x1 = cos(phi(imax-1))*b11d(imax) +sin(phi(imax-1))*b12e(imax-1) y1 = cos(phi(imax-1))*b21d(imax) +sin(phi(imax-1))*b22e(imax-1) theta(imax) = atan2( abs(y1), abs(x1) ) ! fix signs on b11(imax,imax), b12(imax,imax-1), b21(imax,imax), ! and b22(imax,imax-1) if( b11d(imax)+b12e(imax-1) < 0_${ik}$ ) then b12d(imax) = -b12d(imax) if( wantu1 ) then if( colmajor ) then call stdlib${ii}$_zscal( p, cnegone, u1(1_${ik}$,imax), 1_${ik}$ ) else call stdlib${ii}$_zscal( p, cnegone, u1(imax,1_${ik}$), ldu1 ) end if end if end if if( b21d(imax)+b22e(imax-1) > 0_${ik}$ ) then b22d(imax) = -b22d(imax) if( wantu2 ) then if( colmajor ) then call stdlib${ii}$_zscal( m-p, cnegone, u2(1_${ik}$,imax), 1_${ik}$ ) else call stdlib${ii}$_zscal( m-p, cnegone, u2(imax,1_${ik}$), ldu2 ) end if end if end if ! fix signs on b12(imax,imax) and b22(imax,imax) if( b12d(imax)+b22d(imax) < 0_${ik}$ ) then if( wantv2t ) then if( colmajor ) then call stdlib${ii}$_zscal( m-q, cnegone, v2t(imax,1_${ik}$), ldv2t ) else call stdlib${ii}$_zscal( m-q, cnegone, v2t(1_${ik}$,imax), 1_${ik}$ ) end if end if end if ! test for negligible sines or cosines do i = imin, imax if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = imin, imax-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! deflate if (imax > 1_${ik}$) then do while( phi(imax-1) == zero ) imax = imax - 1_${ik}$ if (imax <= 1) exit end do end if if( imin > imax - 1_${ik}$ )imin = imax - 1_${ik}$ if (imin > 1_${ik}$) then do while (phi(imin-1) /= zero) imin = imin - 1_${ik}$ if (imin <= 1) exit end do end if ! repeat main iteration loop end do ! postprocessing: order theta from least to greatest do i = 1, q mini = i thetamin = theta(i) do j = i+1, q if( theta(j) < thetamin ) then mini = j thetamin = theta(j) end if end do if( mini /= i ) then theta(mini) = theta(i) theta(i) = thetamin if( colmajor ) then if( wantu1 )call stdlib${ii}$_zswap( p, u1(1_${ik}$,i), 1_${ik}$, u1(1_${ik}$,mini), 1_${ik}$ ) if( wantu2 )call stdlib${ii}$_zswap( m-p, u2(1_${ik}$,i), 1_${ik}$, u2(1_${ik}$,mini), 1_${ik}$ ) if( wantv1t )call stdlib${ii}$_zswap( q, v1t(i,1_${ik}$), ldv1t, v1t(mini,1_${ik}$), ldv1t ) if( wantv2t )call stdlib${ii}$_zswap( m-q, v2t(i,1_${ik}$), ldv2t, v2t(mini,1_${ik}$),ldv2t ) else if( wantu1 )call stdlib${ii}$_zswap( p, u1(i,1_${ik}$), ldu1, u1(mini,1_${ik}$), ldu1 ) if( wantu2 )call stdlib${ii}$_zswap( m-p, u2(i,1_${ik}$), ldu2, u2(mini,1_${ik}$), ldu2 ) if( wantv1t )call stdlib${ii}$_zswap( q, v1t(1_${ik}$,i), 1_${ik}$, v1t(1_${ik}$,mini), 1_${ik}$ ) if( wantv2t )call stdlib${ii}$_zswap( m-q, v2t(1_${ik}$,i), 1_${ik}$, v2t(1_${ik}$,mini), 1_${ik}$ ) end if end if end do return end subroutine stdlib${ii}$_zbbcsd #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$bbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, u1, & !! ZBBCSD: computes the CS decomposition of a unitary matrix in !! bidiagonal-block form, !! [ B11 | B12 0 0 ] !! [ 0 | 0 -I 0 ] !! X = [----------------] !! [ B21 | B22 0 0 ] !! [ 0 | 0 0 I ] !! [ C | -S 0 0 ] !! [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**H !! = [---------] [---------------] [---------] . !! [ | U2 ] [ S | C 0 0 ] [ | V2 ] !! [ 0 | 0 0 I ] !! X is M-by-M, its top-left block is P-by-Q, and Q must be no larger !! than P, M-P, or M-Q. (If Q is not the smallest index, then X must be !! transposed and/or permuted. This can be done in constant time using !! the TRANS and SIGNS options. See ZUNCSD for details.) !! The bidiagonal matrices B11, B12, B21, and B22 are represented !! implicitly by angles THETA(1:Q) and PHI(1:Q-1). !! The unitary matrices U1, U2, V1T, and V2T are input/output. !! The input matrices are pre- or post-multiplied by the appropriate !! singular vector matrices. ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d, b22e, rwork, & lrwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, one, ten, cnegone ! Scalar Arguments character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, lrwork, m, p, q ! Array Arguments real(${ck}$), intent(out) :: b11d(*), b11e(*), b12d(*), b12e(*), b21d(*), b21e(*), b22d(*),& b22e(*), rwork(*) real(${ck}$), intent(inout) :: phi(*), theta(*) complex(${ck}$), intent(inout) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*) ! =================================================================== ! Parameters integer(${ik}$), parameter :: maxitr = 6_${ik}$ real(${ck}$), parameter :: hundred = 100.0_${ck}$ real(${ck}$), parameter :: meighth = -0.125_${ck}$ real(${ck}$), parameter :: piover2 = 1.57079632679489661923132169163975144210_${ck}$ ! Local Scalars logical(lk) :: colmajor, lquery, restart11, restart12, restart21, restart22, wantu1, & wantu2, wantv1t, wantv2t integer(${ik}$) :: i, imin, imax, iter, iu1cs, iu1sn, iu2cs, iu2sn, iv1tcs, iv1tsn, & iv2tcs, iv2tsn, j, lrworkmin, lrworkopt, maxit, mini real(${ck}$) :: b11bulge, b12bulge, b21bulge, b22bulge, dummy, eps, mu, nu, r, sigma11, & sigma21, temp, thetamax, thetamin, thresh, tol, tolmul, unfl, x1, x2, y1, y2 ! Intrinsic Functions ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lrwork == -1_${ik}$ wantu1 = stdlib_lsame( jobu1, 'Y' ) wantu2 = stdlib_lsame( jobu2, 'Y' ) wantv1t = stdlib_lsame( jobv1t, 'Y' ) wantv2t = stdlib_lsame( jobv2t, 'Y' ) colmajor = .not. stdlib_lsame( trans, 'T' ) if( m < 0_${ik}$ ) then info = -6_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -7_${ik}$ else if( q < 0_${ik}$ .or. q > m ) then info = -8_${ik}$ else if( q > p .or. q > m-p .or. q > m-q ) then info = -8_${ik}$ else if( wantu1 .and. ldu1 < p ) then info = -12_${ik}$ else if( wantu2 .and. ldu2 < m-p ) then info = -14_${ik}$ else if( wantv1t .and. ldv1t < q ) then info = -16_${ik}$ else if( wantv2t .and. ldv2t < m-q ) then info = -18_${ik}$ end if ! quick return if q = 0 if( info == 0_${ik}$ .and. q == 0_${ik}$ ) then lrworkmin = 1_${ik}$ rwork(1_${ik}$) = lrworkmin return end if ! compute workspace if( info == 0_${ik}$ ) then iu1cs = 1_${ik}$ iu1sn = iu1cs + q iu2cs = iu1sn + q iu2sn = iu2cs + q iv1tcs = iu2sn + q iv1tsn = iv1tcs + q iv2tcs = iv1tsn + q iv2tsn = iv2tcs + q lrworkopt = iv2tsn + q - 1_${ik}$ lrworkmin = lrworkopt rwork(1_${ik}$) = lrworkopt if( lrwork < lrworkmin .and. .not. lquery ) then info = -28_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZBBCSD', -info ) return else if( lquery ) then return end if ! get machine constants eps = stdlib${ii}$_${c2ri(ci)}$lamch( 'EPSILON' ) unfl = stdlib${ii}$_${c2ri(ci)}$lamch( 'SAFE MINIMUM' ) tolmul = max( ten, min( hundred, eps**meighth ) ) tol = tolmul*eps thresh = max( tol, maxitr*q*q*unfl ) ! test for negligible sines or cosines do i = 1, q if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = 1, q-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! initial deflation imax = q do while( imax > 1 ) if( phi(imax-1) /= zero ) then exit end if imax = imax - 1_${ik}$ end do imin = imax - 1_${ik}$ if ( imin > 1_${ik}$ ) then do while( phi(imin-1) /= zero ) imin = imin - 1_${ik}$ if ( imin <= 1 ) exit end do end if ! initialize iteration counter maxit = maxitr*q*q iter = 0_${ik}$ ! begin main iteration loop do while( imax > 1 ) ! compute the matrix entries b11d(imin) = cos( theta(imin) ) b21d(imin) = -sin( theta(imin) ) do i = imin, imax - 1 b11e(i) = -sin( theta(i) ) * sin( phi(i) ) b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) ) b12d(i) = sin( theta(i) ) * cos( phi(i) ) b12e(i) = cos( theta(i+1) ) * sin( phi(i) ) b21e(i) = -cos( theta(i) ) * sin( phi(i) ) b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) ) b22d(i) = cos( theta(i) ) * cos( phi(i) ) b22e(i) = -sin( theta(i+1) ) * sin( phi(i) ) end do b12d(imax) = sin( theta(imax) ) b22d(imax) = cos( theta(imax) ) ! abort if not converging; otherwise, increment iter if( iter > maxit ) then info = 0_${ik}$ do i = 1, q if( phi(i) /= zero )info = info + 1_${ik}$ end do return end if iter = iter + imax - imin ! compute shifts thetamax = theta(imin) thetamin = theta(imin) do i = imin+1, imax if( theta(i) > thetamax )thetamax = theta(i) if( theta(i) < thetamin )thetamin = theta(i) end do if( thetamax > piover2 - thresh ) then ! zero on diagonals of b11 and b22; induce deflation with a ! zero shift mu = zero nu = one else if( thetamin < thresh ) then ! zero on diagonals of b12 and b22; induce deflation with a ! zero shift mu = one nu = zero else ! compute shifts for b11 and b21 and use the lesser call stdlib${ii}$_${c2ri(ci)}$las2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,dummy ) call stdlib${ii}$_${c2ri(ci)}$las2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,dummy ) if( sigma11 <= sigma21 ) then mu = sigma11 nu = sqrt( one - mu**2_${ik}$ ) if( mu < thresh ) then mu = zero nu = one end if else nu = sigma21 mu = sqrt( one - nu**2_${ik}$ ) if( nu < thresh ) then mu = one nu = zero end if end if end if ! rotate to produce bulges in b11 and b21 if( mu <= nu ) then call stdlib${ii}$_${c2ri(ci)}$lartgs( b11d(imin), b11e(imin), mu,rwork(iv1tcs+imin-1), rwork(& iv1tsn+imin-1) ) else call stdlib${ii}$_${c2ri(ci)}$lartgs( b21d(imin), b21e(imin), nu,rwork(iv1tcs+imin-1), rwork(& iv1tsn+imin-1) ) end if temp = rwork(iv1tcs+imin-1)*b11d(imin) +rwork(iv1tsn+imin-1)*b11e(imin) b11e(imin) = rwork(iv1tcs+imin-1)*b11e(imin) -rwork(iv1tsn+imin-1)*b11d(imin) b11d(imin) = temp b11bulge = rwork(iv1tsn+imin-1)*b11d(imin+1) b11d(imin+1) = rwork(iv1tcs+imin-1)*b11d(imin+1) temp = rwork(iv1tcs+imin-1)*b21d(imin) +rwork(iv1tsn+imin-1)*b21e(imin) b21e(imin) = rwork(iv1tcs+imin-1)*b21e(imin) -rwork(iv1tsn+imin-1)*b21d(imin) b21d(imin) = temp b21bulge = rwork(iv1tsn+imin-1)*b21d(imin+1) b21d(imin+1) = rwork(iv1tcs+imin-1)*b21d(imin+1) ! compute theta(imin) theta( imin ) = atan2( sqrt( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ ),sqrt( b11d(imin)**2_${ik}$+& b11bulge**2_${ik}$ ) ) ! chase the bulges in b11(imin+1,imin) and b21(imin+1,imin) if( b11d(imin)**2_${ik}$+b11bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_${c2ri(ci)}$lartgp( b11bulge, b11d(imin), rwork(iu1sn+imin-1),rwork(iu1cs+imin-& 1_${ik}$), r ) else if( mu <= nu ) then call stdlib${ii}$_${c2ri(ci)}$lartgs( b11e( imin ), b11d( imin + 1_${ik}$ ), mu,rwork(iu1cs+imin-1), & rwork(iu1sn+imin-1) ) else call stdlib${ii}$_${c2ri(ci)}$lartgs( b12d( imin ), b12e( imin ), nu,rwork(iu1cs+imin-1), rwork(& iu1sn+imin-1) ) end if if( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_${c2ri(ci)}$lartgp( b21bulge, b21d(imin), rwork(iu2sn+imin-1),rwork(iu2cs+imin-& 1_${ik}$), r ) else if( nu < mu ) then call stdlib${ii}$_${c2ri(ci)}$lartgs( b21e( imin ), b21d( imin + 1_${ik}$ ), nu,rwork(iu2cs+imin-1), & rwork(iu2sn+imin-1) ) else call stdlib${ii}$_${c2ri(ci)}$lartgs( b22d(imin), b22e(imin), mu,rwork(iu2cs+imin-1), rwork(iu2sn+& imin-1) ) end if rwork(iu2cs+imin-1) = -rwork(iu2cs+imin-1) rwork(iu2sn+imin-1) = -rwork(iu2sn+imin-1) temp = rwork(iu1cs+imin-1)*b11e(imin) +rwork(iu1sn+imin-1)*b11d(imin+1) b11d(imin+1) = rwork(iu1cs+imin-1)*b11d(imin+1) -rwork(iu1sn+imin-1)*b11e(imin) b11e(imin) = temp if( imax > imin+1 ) then b11bulge = rwork(iu1sn+imin-1)*b11e(imin+1) b11e(imin+1) = rwork(iu1cs+imin-1)*b11e(imin+1) end if temp = rwork(iu1cs+imin-1)*b12d(imin) +rwork(iu1sn+imin-1)*b12e(imin) b12e(imin) = rwork(iu1cs+imin-1)*b12e(imin) -rwork(iu1sn+imin-1)*b12d(imin) b12d(imin) = temp b12bulge = rwork(iu1sn+imin-1)*b12d(imin+1) b12d(imin+1) = rwork(iu1cs+imin-1)*b12d(imin+1) temp = rwork(iu2cs+imin-1)*b21e(imin) +rwork(iu2sn+imin-1)*b21d(imin+1) b21d(imin+1) = rwork(iu2cs+imin-1)*b21d(imin+1) -rwork(iu2sn+imin-1)*b21e(imin) b21e(imin) = temp if( imax > imin+1 ) then b21bulge = rwork(iu2sn+imin-1)*b21e(imin+1) b21e(imin+1) = rwork(iu2cs+imin-1)*b21e(imin+1) end if temp = rwork(iu2cs+imin-1)*b22d(imin) +rwork(iu2sn+imin-1)*b22e(imin) b22e(imin) = rwork(iu2cs+imin-1)*b22e(imin) -rwork(iu2sn+imin-1)*b22d(imin) b22d(imin) = temp b22bulge = rwork(iu2sn+imin-1)*b22d(imin+1) b22d(imin+1) = rwork(iu2cs+imin-1)*b22d(imin+1) ! inner loop: chase bulges from b11(imin,imin+2), ! b12(imin,imin+1), b21(imin,imin+2), and b22(imin,imin+1) to ! bottom-right do i = imin+1, imax-1 ! compute phi(i-1) x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1) x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1) y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge phi(i-1) = atan2( sqrt(x1**2_${ik}$+x2**2_${ik}$), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11e(i-1)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21e(i-1)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12d(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i-1,i+1), b12(i-1,i), ! b21(i-1,i+1), and b22(i-1,i). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart21 ) then call stdlib${ii}$_${c2ri(ci)}$lartgp( x2, x1, rwork(iv1tsn+i-1),rwork(iv1tcs+i-1), r ) else if( .not. restart11 .and. restart21 ) then call stdlib${ii}$_${c2ri(ci)}$lartgp( b11bulge, b11e(i-1), rwork(iv1tsn+i-1),rwork(iv1tcs+i-1),& r ) else if( restart11 .and. .not. restart21 ) then call stdlib${ii}$_${c2ri(ci)}$lartgp( b21bulge, b21e(i-1), rwork(iv1tsn+i-1),rwork(iv1tcs+i-1),& r ) else if( mu <= nu ) then call stdlib${ii}$_${c2ri(ci)}$lartgs( b11d(i), b11e(i), mu, rwork(iv1tcs+i-1),rwork(iv1tsn+i-1)& ) else call stdlib${ii}$_${c2ri(ci)}$lartgs( b21d(i), b21e(i), nu, rwork(iv1tcs+i-1),rwork(iv1tsn+i-1)& ) end if rwork(iv1tcs+i-1) = -rwork(iv1tcs+i-1) rwork(iv1tsn+i-1) = -rwork(iv1tsn+i-1) if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_${c2ri(ci)}$lartgp( y2, y1, rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_${c2ri(ci)}$lartgp( b12bulge, b12d(i-1), rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-& 1_${ik}$-1), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_${c2ri(ci)}$lartgp( b22bulge, b22d(i-1), rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-& 1_${ik}$-1), r ) else if( nu < mu ) then call stdlib${ii}$_${c2ri(ci)}$lartgs( b12e(i-1), b12d(i), nu,rwork(iv2tcs+i-1-1), rwork(iv2tsn+& i-1-1) ) else call stdlib${ii}$_${c2ri(ci)}$lartgs( b22e(i-1), b22d(i), mu,rwork(iv2tcs+i-1-1), rwork(iv2tsn+& i-1-1) ) end if temp = rwork(iv1tcs+i-1)*b11d(i) + rwork(iv1tsn+i-1)*b11e(i) b11e(i) = rwork(iv1tcs+i-1)*b11e(i) -rwork(iv1tsn+i-1)*b11d(i) b11d(i) = temp b11bulge = rwork(iv1tsn+i-1)*b11d(i+1) b11d(i+1) = rwork(iv1tcs+i-1)*b11d(i+1) temp = rwork(iv1tcs+i-1)*b21d(i) + rwork(iv1tsn+i-1)*b21e(i) b21e(i) = rwork(iv1tcs+i-1)*b21e(i) -rwork(iv1tsn+i-1)*b21d(i) b21d(i) = temp b21bulge = rwork(iv1tsn+i-1)*b21d(i+1) b21d(i+1) = rwork(iv1tcs+i-1)*b21d(i+1) temp = rwork(iv2tcs+i-1-1)*b12e(i-1) +rwork(iv2tsn+i-1-1)*b12d(i) b12d(i) = rwork(iv2tcs+i-1-1)*b12d(i) -rwork(iv2tsn+i-1-1)*b12e(i-1) b12e(i-1) = temp b12bulge = rwork(iv2tsn+i-1-1)*b12e(i) b12e(i) = rwork(iv2tcs+i-1-1)*b12e(i) temp = rwork(iv2tcs+i-1-1)*b22e(i-1) +rwork(iv2tsn+i-1-1)*b22d(i) b22d(i) = rwork(iv2tcs+i-1-1)*b22d(i) -rwork(iv2tsn+i-1-1)*b22e(i-1) b22e(i-1) = temp b22bulge = rwork(iv2tsn+i-1-1)*b22e(i) b22e(i) = rwork(iv2tcs+i-1-1)*b22e(i) ! compute theta(i) x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1) x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1) y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge theta(i) = atan2( sqrt(y1**2_${ik}$+y2**2_${ik}$), sqrt(x1**2_${ik}$+x2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11d(i)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12e(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21d(i)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22e(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i+1,i), b12(i+1,i-1), ! b21(i+1,i), and b22(i+1,i-1). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart12 ) then call stdlib${ii}$_${c2ri(ci)}$lartgp( x2, x1, rwork(iu1sn+i-1), rwork(iu1cs+i-1),r ) else if( .not. restart11 .and. restart12 ) then call stdlib${ii}$_${c2ri(ci)}$lartgp( b11bulge, b11d(i), rwork(iu1sn+i-1),rwork(iu1cs+i-1), r ) else if( restart11 .and. .not. restart12 ) then call stdlib${ii}$_${c2ri(ci)}$lartgp( b12bulge, b12e(i-1), rwork(iu1sn+i-1),rwork(iu1cs+i-1), & r ) else if( mu <= nu ) then call stdlib${ii}$_${c2ri(ci)}$lartgs( b11e(i), b11d(i+1), mu, rwork(iu1cs+i-1),rwork(iu1sn+i-1)& ) else call stdlib${ii}$_${c2ri(ci)}$lartgs( b12d(i), b12e(i), nu, rwork(iu1cs+i-1),rwork(iu1sn+i-1) ) end if if( .not. restart21 .and. .not. restart22 ) then call stdlib${ii}$_${c2ri(ci)}$lartgp( y2, y1, rwork(iu2sn+i-1), rwork(iu2cs+i-1),r ) else if( .not. restart21 .and. restart22 ) then call stdlib${ii}$_${c2ri(ci)}$lartgp( b21bulge, b21d(i), rwork(iu2sn+i-1),rwork(iu2cs+i-1), r ) else if( restart21 .and. .not. restart22 ) then call stdlib${ii}$_${c2ri(ci)}$lartgp( b22bulge, b22e(i-1), rwork(iu2sn+i-1),rwork(iu2cs+i-1), & r ) else if( nu < mu ) then call stdlib${ii}$_${c2ri(ci)}$lartgs( b21e(i), b21e(i+1), nu, rwork(iu2cs+i-1),rwork(iu2sn+i-1)& ) else call stdlib${ii}$_${c2ri(ci)}$lartgs( b22d(i), b22e(i), mu, rwork(iu2cs+i-1),rwork(iu2sn+i-1) ) end if rwork(iu2cs+i-1) = -rwork(iu2cs+i-1) rwork(iu2sn+i-1) = -rwork(iu2sn+i-1) temp = rwork(iu1cs+i-1)*b11e(i) + rwork(iu1sn+i-1)*b11d(i+1) b11d(i+1) = rwork(iu1cs+i-1)*b11d(i+1) -rwork(iu1sn+i-1)*b11e(i) b11e(i) = temp if( i < imax - 1_${ik}$ ) then b11bulge = rwork(iu1sn+i-1)*b11e(i+1) b11e(i+1) = rwork(iu1cs+i-1)*b11e(i+1) end if temp = rwork(iu2cs+i-1)*b21e(i) + rwork(iu2sn+i-1)*b21d(i+1) b21d(i+1) = rwork(iu2cs+i-1)*b21d(i+1) -rwork(iu2sn+i-1)*b21e(i) b21e(i) = temp if( i < imax - 1_${ik}$ ) then b21bulge = rwork(iu2sn+i-1)*b21e(i+1) b21e(i+1) = rwork(iu2cs+i-1)*b21e(i+1) end if temp = rwork(iu1cs+i-1)*b12d(i) + rwork(iu1sn+i-1)*b12e(i) b12e(i) = rwork(iu1cs+i-1)*b12e(i) -rwork(iu1sn+i-1)*b12d(i) b12d(i) = temp b12bulge = rwork(iu1sn+i-1)*b12d(i+1) b12d(i+1) = rwork(iu1cs+i-1)*b12d(i+1) temp = rwork(iu2cs+i-1)*b22d(i) + rwork(iu2sn+i-1)*b22e(i) b22e(i) = rwork(iu2cs+i-1)*b22e(i) -rwork(iu2sn+i-1)*b22d(i) b22d(i) = temp b22bulge = rwork(iu2sn+i-1)*b22d(i+1) b22d(i+1) = rwork(iu2cs+i-1)*b22d(i+1) end do ! compute phi(imax-1) x1 = sin(theta(imax-1))*b11e(imax-1) +cos(theta(imax-1))*b21e(imax-1) y1 = sin(theta(imax-1))*b12d(imax-1) +cos(theta(imax-1))*b22d(imax-1) y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge phi(imax-1) = atan2( abs(x1), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! chase bulges from b12(imax-1,imax) and b22(imax-1,imax) restart12 = b12d(imax-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(imax-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_${c2ri(ci)}$lartgp( y2, y1, rwork(iv2tsn+imax-1-1),rwork(iv2tcs+imax-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_${c2ri(ci)}$lartgp( b12bulge, b12d(imax-1),rwork(iv2tsn+imax-1-1),rwork(iv2tcs+& imax-1-1), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_${c2ri(ci)}$lartgp( b22bulge, b22d(imax-1),rwork(iv2tsn+imax-1-1),rwork(iv2tcs+& imax-1-1), r ) else if( nu < mu ) then call stdlib${ii}$_${c2ri(ci)}$lartgs( b12e(imax-1), b12d(imax), nu,rwork(iv2tcs+imax-1-1),rwork(& iv2tsn+imax-1-1) ) else call stdlib${ii}$_${c2ri(ci)}$lartgs( b22e(imax-1), b22d(imax), mu,rwork(iv2tcs+imax-1-1),rwork(& iv2tsn+imax-1-1) ) end if temp = rwork(iv2tcs+imax-1-1)*b12e(imax-1) +rwork(iv2tsn+imax-1-1)*b12d(imax) b12d(imax) = rwork(iv2tcs+imax-1-1)*b12d(imax) -rwork(iv2tsn+imax-1-1)*b12e(imax-1) b12e(imax-1) = temp temp = rwork(iv2tcs+imax-1-1)*b22e(imax-1) +rwork(iv2tsn+imax-1-1)*b22d(imax) b22d(imax) = rwork(iv2tcs+imax-1-1)*b22d(imax) -rwork(iv2tsn+imax-1-1)*b22e(imax-1) b22e(imax-1) = temp ! update singular vectors if( wantu1 ) then if( colmajor ) then call stdlib${ii}$_${ci}$lasr( 'R', 'V', 'F', p, imax-imin+1,rwork(iu1cs+imin-1), rwork(& iu1sn+imin-1),u1(1_${ik}$,imin), ldu1 ) else call stdlib${ii}$_${ci}$lasr( 'L', 'V', 'F', imax-imin+1, p,rwork(iu1cs+imin-1), rwork(& iu1sn+imin-1),u1(imin,1_${ik}$), ldu1 ) end if end if if( wantu2 ) then if( colmajor ) then call stdlib${ii}$_${ci}$lasr( 'R', 'V', 'F', m-p, imax-imin+1,rwork(iu2cs+imin-1), rwork(& iu2sn+imin-1),u2(1_${ik}$,imin), ldu2 ) else call stdlib${ii}$_${ci}$lasr( 'L', 'V', 'F', imax-imin+1, m-p,rwork(iu2cs+imin-1), rwork(& iu2sn+imin-1),u2(imin,1_${ik}$), ldu2 ) end if end if if( wantv1t ) then if( colmajor ) then call stdlib${ii}$_${ci}$lasr( 'L', 'V', 'F', imax-imin+1, q,rwork(iv1tcs+imin-1), rwork(& iv1tsn+imin-1),v1t(imin,1_${ik}$), ldv1t ) else call stdlib${ii}$_${ci}$lasr( 'R', 'V', 'F', q, imax-imin+1,rwork(iv1tcs+imin-1), rwork(& iv1tsn+imin-1),v1t(1_${ik}$,imin), ldv1t ) end if end if if( wantv2t ) then if( colmajor ) then call stdlib${ii}$_${ci}$lasr( 'L', 'V', 'F', imax-imin+1, m-q,rwork(iv2tcs+imin-1), & rwork(iv2tsn+imin-1),v2t(imin,1_${ik}$), ldv2t ) else call stdlib${ii}$_${ci}$lasr( 'R', 'V', 'F', m-q, imax-imin+1,rwork(iv2tcs+imin-1), & rwork(iv2tsn+imin-1),v2t(1_${ik}$,imin), ldv2t ) end if end if ! fix signs on b11(imax-1,imax) and b21(imax-1,imax) if( b11e(imax-1)+b21e(imax-1) > 0_${ik}$ ) then b11d(imax) = -b11d(imax) b21d(imax) = -b21d(imax) if( wantv1t ) then if( colmajor ) then call stdlib${ii}$_${ci}$scal( q, cnegone, v1t(imax,1_${ik}$), ldv1t ) else call stdlib${ii}$_${ci}$scal( q, cnegone, v1t(1_${ik}$,imax), 1_${ik}$ ) end if end if end if ! compute theta(imax) x1 = cos(phi(imax-1))*b11d(imax) +sin(phi(imax-1))*b12e(imax-1) y1 = cos(phi(imax-1))*b21d(imax) +sin(phi(imax-1))*b22e(imax-1) theta(imax) = atan2( abs(y1), abs(x1) ) ! fix signs on b11(imax,imax), b12(imax,imax-1), b21(imax,imax), ! and b22(imax,imax-1) if( b11d(imax)+b12e(imax-1) < 0_${ik}$ ) then b12d(imax) = -b12d(imax) if( wantu1 ) then if( colmajor ) then call stdlib${ii}$_${ci}$scal( p, cnegone, u1(1_${ik}$,imax), 1_${ik}$ ) else call stdlib${ii}$_${ci}$scal( p, cnegone, u1(imax,1_${ik}$), ldu1 ) end if end if end if if( b21d(imax)+b22e(imax-1) > 0_${ik}$ ) then b22d(imax) = -b22d(imax) if( wantu2 ) then if( colmajor ) then call stdlib${ii}$_${ci}$scal( m-p, cnegone, u2(1_${ik}$,imax), 1_${ik}$ ) else call stdlib${ii}$_${ci}$scal( m-p, cnegone, u2(imax,1_${ik}$), ldu2 ) end if end if end if ! fix signs on b12(imax,imax) and b22(imax,imax) if( b12d(imax)+b22d(imax) < 0_${ik}$ ) then if( wantv2t ) then if( colmajor ) then call stdlib${ii}$_${ci}$scal( m-q, cnegone, v2t(imax,1_${ik}$), ldv2t ) else call stdlib${ii}$_${ci}$scal( m-q, cnegone, v2t(1_${ik}$,imax), 1_${ik}$ ) end if end if end if ! test for negligible sines or cosines do i = imin, imax if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = imin, imax-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! deflate if (imax > 1_${ik}$) then do while( phi(imax-1) == zero ) imax = imax - 1_${ik}$ if (imax <= 1) exit end do end if if( imin > imax - 1_${ik}$ )imin = imax - 1_${ik}$ if (imin > 1_${ik}$) then do while (phi(imin-1) /= zero) imin = imin - 1_${ik}$ if (imin <= 1) exit end do end if ! repeat main iteration loop end do ! postprocessing: order theta from least to greatest do i = 1, q mini = i thetamin = theta(i) do j = i+1, q if( theta(j) < thetamin ) then mini = j thetamin = theta(j) end if end do if( mini /= i ) then theta(mini) = theta(i) theta(i) = thetamin if( colmajor ) then if( wantu1 )call stdlib${ii}$_${ci}$swap( p, u1(1_${ik}$,i), 1_${ik}$, u1(1_${ik}$,mini), 1_${ik}$ ) if( wantu2 )call stdlib${ii}$_${ci}$swap( m-p, u2(1_${ik}$,i), 1_${ik}$, u2(1_${ik}$,mini), 1_${ik}$ ) if( wantv1t )call stdlib${ii}$_${ci}$swap( q, v1t(i,1_${ik}$), ldv1t, v1t(mini,1_${ik}$), ldv1t ) if( wantv2t )call stdlib${ii}$_${ci}$swap( m-q, v2t(i,1_${ik}$), ldv2t, v2t(mini,1_${ik}$),ldv2t ) else if( wantu1 )call stdlib${ii}$_${ci}$swap( p, u1(i,1_${ik}$), ldu1, u1(mini,1_${ik}$), ldu1 ) if( wantu2 )call stdlib${ii}$_${ci}$swap( m-p, u2(i,1_${ik}$), ldu2, u2(mini,1_${ik}$), ldu2 ) if( wantv1t )call stdlib${ii}$_${ci}$swap( q, v1t(1_${ik}$,i), 1_${ik}$, v1t(1_${ik}$,mini), 1_${ik}$ ) if( wantv2t )call stdlib${ii}$_${ci}$swap( m-q, v2t(1_${ik}$,i), 1_${ik}$, v2t(1_${ik}$,mini), 1_${ik}$ ) end if end if end do return end subroutine stdlib${ii}$_${ci}$bbcsd #:endif #:endfor recursive module subroutine stdlib${ii}$_cuncsd( jobu1, jobu2, jobv1t, jobv2t, trans,signs, m, p, q, x11, & !! CUNCSD computes the CS decomposition of an M-by-M partitioned !! unitary matrix X: !! [ I 0 0 | 0 0 0 ] !! [ 0 C 0 | 0 -S 0 ] !! [ X11 | X12 ] [ U1 | ] [ 0 0 0 | 0 0 -I ] [ V1 | ]**H !! X = [-----------] = [---------] [---------------------] [---------] . !! [ X21 | X22 ] [ | U2 ] [ 0 0 0 | I 0 0 ] [ | V2 ] !! [ 0 S 0 | 0 C 0 ] !! [ 0 0 I | 0 0 0 ] !! X11 is P-by-Q. The unitary matrices U1, U2, V1, and V2 are P-by-P, !! (M-P)-by-(M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. C and S are !! R-by-R nonnegative diagonal matrices satisfying C^2 + S^2 = I, in !! which R = MIN(P,M-P,Q,M-Q). ldx11, x12,ldx12, x21, ldx21, x22, ldx22, theta,u1, ldu1, u2, ldu2, v1t, ldv1t, v2t,ldv2t, & work, lwork, rwork, lrwork,iwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, signs, trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, ldx11, ldx12, ldx21, ldx22, & lrwork, lwork, m, p, q ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(sp), intent(out) :: theta(*) real(sp), intent(out) :: rwork(*) complex(sp), intent(out) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*), work(*) complex(sp), intent(inout) :: x11(ldx11,*), x12(ldx12,*), x21(ldx21,*), x22(ldx22,*) ! =================================================================== ! Local Scalars character :: transt, signst integer(${ik}$) :: childinfo, i, ib11d, ib11e, ib12d, ib12e, ib21d, ib21e, ib22d, ib22e, & ibbcsd, iorbdb, iorglq, iorgqr, iphi, itaup1, itaup2, itauq1, itauq2, j, lbbcsdwork, & lbbcsdworkmin, lbbcsdworkopt, lorbdbwork, lorbdbworkmin, lorbdbworkopt, lorglqwork, & lorglqworkmin, lorglqworkopt, lorgqrwork, lorgqrworkmin, lorgqrworkopt, lworkmin, & lworkopt, p1, q1 logical(lk) :: colmajor, defaultsigns, lquery, wantu1, wantu2, wantv1t, wantv2t integer(${ik}$) :: lrworkmin, lrworkopt logical(lk) :: lrquery ! Intrinsic Functions ! Executable Statements ! test input arguments info = 0_${ik}$ wantu1 = stdlib_lsame( jobu1, 'Y' ) wantu2 = stdlib_lsame( jobu2, 'Y' ) wantv1t = stdlib_lsame( jobv1t, 'Y' ) wantv2t = stdlib_lsame( jobv2t, 'Y' ) colmajor = .not. stdlib_lsame( trans, 'T' ) defaultsigns = .not. stdlib_lsame( signs, 'O' ) lquery = lwork == -1_${ik}$ lrquery = lrwork == -1_${ik}$ if( m < 0_${ik}$ ) then info = -7_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -8_${ik}$ else if( q < 0_${ik}$ .or. q > m ) then info = -9_${ik}$ else if ( colmajor .and. ldx11 < max( 1_${ik}$, p ) ) then info = -11_${ik}$ else if (.not. colmajor .and. ldx11 < max( 1_${ik}$, q ) ) then info = -11_${ik}$ else if (colmajor .and. ldx12 < max( 1_${ik}$, p ) ) then info = -13_${ik}$ else if (.not. colmajor .and. ldx12 < max( 1_${ik}$, m-q ) ) then info = -13_${ik}$ else if (colmajor .and. ldx21 < max( 1_${ik}$, m-p ) ) then info = -15_${ik}$ else if (.not. colmajor .and. ldx21 < max( 1_${ik}$, q ) ) then info = -15_${ik}$ else if (colmajor .and. ldx22 < max( 1_${ik}$, m-p ) ) then info = -17_${ik}$ else if (.not. colmajor .and. ldx22 < max( 1_${ik}$, m-q ) ) then info = -17_${ik}$ else if( wantu1 .and. ldu1 < p ) then info = -20_${ik}$ else if( wantu2 .and. ldu2 < m-p ) then info = -22_${ik}$ else if( wantv1t .and. ldv1t < q ) then info = -24_${ik}$ else if( wantv2t .and. ldv2t < m-q ) then info = -26_${ik}$ end if ! work with transpose if convenient if( info == 0_${ik}$ .and. min( p, m-p ) < min( q, m-q ) ) then if( colmajor ) then transt = 'T' else transt = 'N' end if if( defaultsigns ) then signst = 'O' else signst = 'D' end if call stdlib${ii}$_cuncsd( jobv1t, jobv2t, jobu1, jobu2, transt, signst, m,q, p, x11, & ldx11, x21, ldx21, x12, ldx12, x22,ldx22, theta, v1t, ldv1t, v2t, ldv2t, u1, ldu1,& u2, ldu2, work, lwork, rwork, lrwork, iwork,info ) return end if ! work with permutation [ 0 i; i 0 ] * x * [ 0 i; i 0 ] if ! convenient if( info == 0_${ik}$ .and. m-q < q ) then if( defaultsigns ) then signst = 'O' else signst = 'D' end if call stdlib${ii}$_cuncsd( jobu2, jobu1, jobv2t, jobv1t, trans, signst, m,m-p, m-q, x22, & ldx22, x21, ldx21, x12, ldx12, x11,ldx11, theta, u2, ldu2, u1, ldu1, v2t, ldv2t, & v1t,ldv1t, work, lwork, rwork, lrwork, iwork, info ) return end if ! compute workspace if( info == 0_${ik}$ ) then ! real workspace iphi = 2_${ik}$ ib11d = iphi + max( 1_${ik}$, q - 1_${ik}$ ) ib11e = ib11d + max( 1_${ik}$, q ) ib12d = ib11e + max( 1_${ik}$, q - 1_${ik}$ ) ib12e = ib12d + max( 1_${ik}$, q ) ib21d = ib12e + max( 1_${ik}$, q - 1_${ik}$ ) ib21e = ib21d + max( 1_${ik}$, q ) ib22d = ib21e + max( 1_${ik}$, q - 1_${ik}$ ) ib22e = ib22d + max( 1_${ik}$, q ) ibbcsd = ib22e + max( 1_${ik}$, q - 1_${ik}$ ) call stdlib${ii}$_cbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, theta, u1, & ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, theta, theta, theta, theta, theta,theta, & theta, theta, rwork, -1_${ik}$, childinfo ) lbbcsdworkopt = int( rwork(1_${ik}$),KIND=${ik}$) lbbcsdworkmin = lbbcsdworkopt lrworkopt = ibbcsd + lbbcsdworkopt - 1_${ik}$ lrworkmin = ibbcsd + lbbcsdworkmin - 1_${ik}$ rwork(1_${ik}$) = lrworkopt ! complex workspace itaup1 = 2_${ik}$ itaup2 = itaup1 + max( 1_${ik}$, p ) itauq1 = itaup2 + max( 1_${ik}$, m - p ) itauq2 = itauq1 + max( 1_${ik}$, q ) iorgqr = itauq2 + max( 1_${ik}$, m - q ) call stdlib${ii}$_cungqr( m-q, m-q, m-q, u1, max(1_${ik}$,m-q), u1, work, -1_${ik}$,childinfo ) lorgqrworkopt = int( work(1_${ik}$),KIND=${ik}$) lorgqrworkmin = max( 1_${ik}$, m - q ) iorglq = itauq2 + max( 1_${ik}$, m - q ) call stdlib${ii}$_cunglq( m-q, m-q, m-q, u1, max(1_${ik}$,m-q), u1, work, -1_${ik}$,childinfo ) lorglqworkopt = int( work(1_${ik}$),KIND=${ik}$) lorglqworkmin = max( 1_${ik}$, m - q ) iorbdb = itauq2 + max( 1_${ik}$, m - q ) call stdlib${ii}$_cunbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12,x21, ldx21, x22, & ldx22, theta, theta, u1, u2,v1t, v2t, work, -1_${ik}$, childinfo ) lorbdbworkopt = int( work(1_${ik}$),KIND=${ik}$) lorbdbworkmin = lorbdbworkopt lworkopt = max( iorgqr + lorgqrworkopt, iorglq + lorglqworkopt,iorbdb + & lorbdbworkopt ) - 1_${ik}$ lworkmin = max( iorgqr + lorgqrworkmin, iorglq + lorglqworkmin,iorbdb + & lorbdbworkmin ) - 1_${ik}$ work(1_${ik}$) = max(lworkopt,lworkmin) if( lwork < lworkmin.and. .not. ( lquery .or. lrquery ) ) then info = -22_${ik}$ else if( lrwork < lrworkmin.and. .not. ( lquery .or. lrquery ) ) then info = -24_${ik}$ else lorgqrwork = lwork - iorgqr + 1_${ik}$ lorglqwork = lwork - iorglq + 1_${ik}$ lorbdbwork = lwork - iorbdb + 1_${ik}$ lbbcsdwork = lrwork - ibbcsd + 1_${ik}$ end if end if ! abort if any illegal arguments if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CUNCSD', -info ) return else if( lquery .or. lrquery ) then return end if ! transform to bidiagonal block form call stdlib${ii}$_cunbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12, x21,ldx21, x22, & ldx22, theta, rwork(iphi), work(itaup1),work(itaup2), work(itauq1), work(itauq2),work(& iorbdb), lorbdbwork, childinfo ) ! accumulate householder reflectors if( colmajor ) then if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_clacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) call stdlib${ii}$_cungqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqrwork, & info) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_clacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) call stdlib${ii}$_cungqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqrwork,& info ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_clacpy( 'U', q-1, q-1, x11(1_${ik}$,2_${ik}$), ldx11, v1t(2_${ik}$,2_${ik}$),ldv1t ) v1t(1_${ik}$, 1_${ik}$) = cone do j = 2, q v1t(1_${ik}$,j) = czero v1t(j,1_${ik}$) = czero end do call stdlib${ii}$_cunglq( q-1, q-1, q-1, v1t(2_${ik}$,2_${ik}$), ldv1t, work(itauq1),work(iorglq), & lorglqwork, info ) end if if( wantv2t .and. m-q > 0_${ik}$ ) then call stdlib${ii}$_clacpy( 'U', p, m-q, x12, ldx12, v2t, ldv2t ) if( m-p > q ) then call stdlib${ii}$_clacpy( 'U', m-p-q, m-p-q, x22(q+1,p+1), ldx22,v2t(p+1,p+1), & ldv2t ) end if if( m > q ) then call stdlib${ii}$_cunglq( m-q, m-q, m-q, v2t, ldv2t, work(itauq2),work(iorglq), & lorglqwork, info ) end if end if else if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_clacpy( 'U', q, p, x11, ldx11, u1, ldu1 ) call stdlib${ii}$_cunglq( p, p, q, u1, ldu1, work(itaup1), work(iorglq),lorglqwork, & info) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_clacpy( 'U', q, m-p, x21, ldx21, u2, ldu2 ) call stdlib${ii}$_cunglq( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorglq), lorglqwork,& info ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_clacpy( 'L', q-1, q-1, x11(2_${ik}$,1_${ik}$), ldx11, v1t(2_${ik}$,2_${ik}$),ldv1t ) v1t(1_${ik}$, 1_${ik}$) = cone do j = 2, q v1t(1_${ik}$,j) = czero v1t(j,1_${ik}$) = czero end do call stdlib${ii}$_cungqr( q-1, q-1, q-1, v1t(2_${ik}$,2_${ik}$), ldv1t, work(itauq1),work(iorgqr), & lorgqrwork, info ) end if if( wantv2t .and. m-q > 0_${ik}$ ) then p1 = min( p+1, m ) q1 = min( q+1, m ) call stdlib${ii}$_clacpy( 'L', m-q, p, x12, ldx12, v2t, ldv2t ) if ( m > p+q ) then call stdlib${ii}$_clacpy( 'L', m-p-q, m-p-q, x22(p1,q1), ldx22,v2t(p+1,p+1), ldv2t ) end if call stdlib${ii}$_cungqr( m-q, m-q, m-q, v2t, ldv2t, work(itauq2),work(iorgqr), & lorgqrwork, info ) end if end if ! compute the csd of the matrix in bidiagonal-block form call stdlib${ii}$_cbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q, theta,rwork(iphi), & u1, ldu1, u2, ldu2, v1t, ldv1t, v2t,ldv2t, rwork(ib11d), rwork(ib11e), rwork(ib12d),& rwork(ib12e), rwork(ib21d), rwork(ib21e),rwork(ib22d), rwork(ib22e), rwork(ibbcsd),& lbbcsdwork, info ) ! permute rows and columns to place identity submatrices in top- ! left corner of (1,1)-block and/or bottom-right corner of (1,2)- ! block and/or bottom-right corner of (2,1)-block and/or top-left ! corner of (2,2)-block if( q > 0_${ik}$ .and. wantu2 ) then do i = 1, q iwork(i) = m - p - q + i end do do i = q + 1, m - p iwork(i) = i - q end do if( colmajor ) then call stdlib${ii}$_clapmt( .false., m-p, m-p, u2, ldu2, iwork ) else call stdlib${ii}$_clapmr( .false., m-p, m-p, u2, ldu2, iwork ) end if end if if( m > 0_${ik}$ .and. wantv2t ) then do i = 1, p iwork(i) = m - p - q + i end do do i = p + 1, m - q iwork(i) = i - p end do if( .not. colmajor ) then call stdlib${ii}$_clapmt( .false., m-q, m-q, v2t, ldv2t, iwork ) else call stdlib${ii}$_clapmr( .false., m-q, m-q, v2t, ldv2t, iwork ) end if end if return ! end stdlib${ii}$_cuncsd end subroutine stdlib${ii}$_cuncsd recursive module subroutine stdlib${ii}$_zuncsd( jobu1, jobu2, jobv1t, jobv2t, trans,signs, m, p, q, x11, & !! ZUNCSD computes the CS decomposition of an M-by-M partitioned !! unitary matrix X: !! [ I 0 0 | 0 0 0 ] !! [ 0 C 0 | 0 -S 0 ] !! [ X11 | X12 ] [ U1 | ] [ 0 0 0 | 0 0 -I ] [ V1 | ]**H !! X = [-----------] = [---------] [---------------------] [---------] . !! [ X21 | X22 ] [ | U2 ] [ 0 0 0 | I 0 0 ] [ | V2 ] !! [ 0 S 0 | 0 C 0 ] !! [ 0 0 I | 0 0 0 ] !! X11 is P-by-Q. The unitary matrices U1, U2, V1, and V2 are P-by-P, !! (M-P)-by-(M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. C and S are !! R-by-R nonnegative diagonal matrices satisfying C^2 + S^2 = I, in !! which R = MIN(P,M-P,Q,M-Q). ldx11, x12,ldx12, x21, ldx21, x22, ldx22, theta,u1, ldu1, u2, ldu2, v1t, ldv1t, v2t,ldv2t, & work, lwork, rwork, lrwork,iwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, signs, trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, ldx11, ldx12, ldx21, ldx22, & lrwork, lwork, m, p, q ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(dp), intent(out) :: theta(*) real(dp), intent(out) :: rwork(*) complex(dp), intent(out) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*), work(*) complex(dp), intent(inout) :: x11(ldx11,*), x12(ldx12,*), x21(ldx21,*), x22(ldx22,*) ! =================================================================== ! Local Scalars character :: transt, signst integer(${ik}$) :: childinfo, i, ib11d, ib11e, ib12d, ib12e, ib21d, ib21e, ib22d, ib22e, & ibbcsd, iorbdb, iorglq, iorgqr, iphi, itaup1, itaup2, itauq1, itauq2, j, lbbcsdwork, & lbbcsdworkmin, lbbcsdworkopt, lorbdbwork, lorbdbworkmin, lorbdbworkopt, lorglqwork, & lorglqworkmin, lorglqworkopt, lorgqrwork, lorgqrworkmin, lorgqrworkopt, lworkmin, & lworkopt, p1, q1 logical(lk) :: colmajor, defaultsigns, lquery, wantu1, wantu2, wantv1t, wantv2t integer(${ik}$) :: lrworkmin, lrworkopt logical(lk) :: lrquery ! Intrinsic Functions ! Executable Statements ! test input arguments info = 0_${ik}$ wantu1 = stdlib_lsame( jobu1, 'Y' ) wantu2 = stdlib_lsame( jobu2, 'Y' ) wantv1t = stdlib_lsame( jobv1t, 'Y' ) wantv2t = stdlib_lsame( jobv2t, 'Y' ) colmajor = .not. stdlib_lsame( trans, 'T' ) defaultsigns = .not. stdlib_lsame( signs, 'O' ) lquery = lwork == -1_${ik}$ lrquery = lrwork == -1_${ik}$ if( m < 0_${ik}$ ) then info = -7_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -8_${ik}$ else if( q < 0_${ik}$ .or. q > m ) then info = -9_${ik}$ else if ( colmajor .and. ldx11 < max( 1_${ik}$, p ) ) then info = -11_${ik}$ else if (.not. colmajor .and. ldx11 < max( 1_${ik}$, q ) ) then info = -11_${ik}$ else if (colmajor .and. ldx12 < max( 1_${ik}$, p ) ) then info = -13_${ik}$ else if (.not. colmajor .and. ldx12 < max( 1_${ik}$, m-q ) ) then info = -13_${ik}$ else if (colmajor .and. ldx21 < max( 1_${ik}$, m-p ) ) then info = -15_${ik}$ else if (.not. colmajor .and. ldx21 < max( 1_${ik}$, q ) ) then info = -15_${ik}$ else if (colmajor .and. ldx22 < max( 1_${ik}$, m-p ) ) then info = -17_${ik}$ else if (.not. colmajor .and. ldx22 < max( 1_${ik}$, m-q ) ) then info = -17_${ik}$ else if( wantu1 .and. ldu1 < p ) then info = -20_${ik}$ else if( wantu2 .and. ldu2 < m-p ) then info = -22_${ik}$ else if( wantv1t .and. ldv1t < q ) then info = -24_${ik}$ else if( wantv2t .and. ldv2t < m-q ) then info = -26_${ik}$ end if ! work with transpose if convenient if( info == 0_${ik}$ .and. min( p, m-p ) < min( q, m-q ) ) then if( colmajor ) then transt = 'T' else transt = 'N' end if if( defaultsigns ) then signst = 'O' else signst = 'D' end if call stdlib${ii}$_zuncsd( jobv1t, jobv2t, jobu1, jobu2, transt, signst, m,q, p, x11, & ldx11, x21, ldx21, x12, ldx12, x22,ldx22, theta, v1t, ldv1t, v2t, ldv2t, u1, ldu1,& u2, ldu2, work, lwork, rwork, lrwork, iwork,info ) return end if ! work with permutation [ 0 i; i 0 ] * x * [ 0 i; i 0 ] if ! convenient if( info == 0_${ik}$ .and. m-q < q ) then if( defaultsigns ) then signst = 'O' else signst = 'D' end if call stdlib${ii}$_zuncsd( jobu2, jobu1, jobv2t, jobv1t, trans, signst, m,m-p, m-q, x22, & ldx22, x21, ldx21, x12, ldx12, x11,ldx11, theta, u2, ldu2, u1, ldu1, v2t, ldv2t, & v1t,ldv1t, work, lwork, rwork, lrwork, iwork, info ) return end if ! compute workspace if( info == 0_${ik}$ ) then ! real workspace iphi = 2_${ik}$ ib11d = iphi + max( 1_${ik}$, q - 1_${ik}$ ) ib11e = ib11d + max( 1_${ik}$, q ) ib12d = ib11e + max( 1_${ik}$, q - 1_${ik}$ ) ib12e = ib12d + max( 1_${ik}$, q ) ib21d = ib12e + max( 1_${ik}$, q - 1_${ik}$ ) ib21e = ib21d + max( 1_${ik}$, q ) ib22d = ib21e + max( 1_${ik}$, q - 1_${ik}$ ) ib22e = ib22d + max( 1_${ik}$, q ) ibbcsd = ib22e + max( 1_${ik}$, q - 1_${ik}$ ) call stdlib${ii}$_zbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, theta, u1, & ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, theta, theta, theta, theta, theta,theta, & theta, theta, rwork, -1_${ik}$, childinfo ) lbbcsdworkopt = int( rwork(1_${ik}$),KIND=${ik}$) lbbcsdworkmin = lbbcsdworkopt lrworkopt = ibbcsd + lbbcsdworkopt - 1_${ik}$ lrworkmin = ibbcsd + lbbcsdworkmin - 1_${ik}$ rwork(1_${ik}$) = lrworkopt ! complex workspace itaup1 = 2_${ik}$ itaup2 = itaup1 + max( 1_${ik}$, p ) itauq1 = itaup2 + max( 1_${ik}$, m - p ) itauq2 = itauq1 + max( 1_${ik}$, q ) iorgqr = itauq2 + max( 1_${ik}$, m - q ) call stdlib${ii}$_zungqr( m-q, m-q, m-q, u1, max(1_${ik}$,m-q), u1, work, -1_${ik}$,childinfo ) lorgqrworkopt = int( work(1_${ik}$),KIND=${ik}$) lorgqrworkmin = max( 1_${ik}$, m - q ) iorglq = itauq2 + max( 1_${ik}$, m - q ) call stdlib${ii}$_zunglq( m-q, m-q, m-q, u1, max(1_${ik}$,m-q), u1, work, -1_${ik}$,childinfo ) lorglqworkopt = int( work(1_${ik}$),KIND=${ik}$) lorglqworkmin = max( 1_${ik}$, m - q ) iorbdb = itauq2 + max( 1_${ik}$, m - q ) call stdlib${ii}$_zunbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12,x21, ldx21, x22, & ldx22, theta, theta, u1, u2,v1t, v2t, work, -1_${ik}$, childinfo ) lorbdbworkopt = int( work(1_${ik}$),KIND=${ik}$) lorbdbworkmin = lorbdbworkopt lworkopt = max( iorgqr + lorgqrworkopt, iorglq + lorglqworkopt,iorbdb + & lorbdbworkopt ) - 1_${ik}$ lworkmin = max( iorgqr + lorgqrworkmin, iorglq + lorglqworkmin,iorbdb + & lorbdbworkmin ) - 1_${ik}$ work(1_${ik}$) = max(lworkopt,lworkmin) if( lwork < lworkmin.and. .not. ( lquery .or. lrquery ) ) then info = -22_${ik}$ else if( lrwork < lrworkmin.and. .not. ( lquery .or. lrquery ) ) then info = -24_${ik}$ else lorgqrwork = lwork - iorgqr + 1_${ik}$ lorglqwork = lwork - iorglq + 1_${ik}$ lorbdbwork = lwork - iorbdb + 1_${ik}$ lbbcsdwork = lrwork - ibbcsd + 1_${ik}$ end if end if ! abort if any illegal arguments if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNCSD', -info ) return else if( lquery .or. lrquery ) then return end if ! transform to bidiagonal block form call stdlib${ii}$_zunbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12, x21,ldx21, x22, & ldx22, theta, rwork(iphi), work(itaup1),work(itaup2), work(itauq1), work(itauq2),work(& iorbdb), lorbdbwork, childinfo ) ! accumulate householder reflectors if( colmajor ) then if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_zlacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) call stdlib${ii}$_zungqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqrwork, & info) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_zlacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) call stdlib${ii}$_zungqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqrwork,& info ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_zlacpy( 'U', q-1, q-1, x11(1_${ik}$,2_${ik}$), ldx11, v1t(2_${ik}$,2_${ik}$),ldv1t ) v1t(1_${ik}$, 1_${ik}$) = cone do j = 2, q v1t(1_${ik}$,j) = czero v1t(j,1_${ik}$) = czero end do call stdlib${ii}$_zunglq( q-1, q-1, q-1, v1t(2_${ik}$,2_${ik}$), ldv1t, work(itauq1),work(iorglq), & lorglqwork, info ) end if if( wantv2t .and. m-q > 0_${ik}$ ) then call stdlib${ii}$_zlacpy( 'U', p, m-q, x12, ldx12, v2t, ldv2t ) if( m-p > q) then call stdlib${ii}$_zlacpy( 'U', m-p-q, m-p-q, x22(q+1,p+1), ldx22,v2t(p+1,p+1), & ldv2t ) end if if( m > q ) then call stdlib${ii}$_zunglq( m-q, m-q, m-q, v2t, ldv2t, work(itauq2),work(iorglq), & lorglqwork, info ) end if end if else if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_zlacpy( 'U', q, p, x11, ldx11, u1, ldu1 ) call stdlib${ii}$_zunglq( p, p, q, u1, ldu1, work(itaup1), work(iorglq),lorglqwork, & info) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_zlacpy( 'U', q, m-p, x21, ldx21, u2, ldu2 ) call stdlib${ii}$_zunglq( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorglq), lorglqwork,& info ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_zlacpy( 'L', q-1, q-1, x11(2_${ik}$,1_${ik}$), ldx11, v1t(2_${ik}$,2_${ik}$),ldv1t ) v1t(1_${ik}$, 1_${ik}$) = cone do j = 2, q v1t(1_${ik}$,j) = czero v1t(j,1_${ik}$) = czero end do call stdlib${ii}$_zungqr( q-1, q-1, q-1, v1t(2_${ik}$,2_${ik}$), ldv1t, work(itauq1),work(iorgqr), & lorgqrwork, info ) end if if( wantv2t .and. m-q > 0_${ik}$ ) then p1 = min( p+1, m ) q1 = min( q+1, m ) call stdlib${ii}$_zlacpy( 'L', m-q, p, x12, ldx12, v2t, ldv2t ) if( m > p+q ) then call stdlib${ii}$_zlacpy( 'L', m-p-q, m-p-q, x22(p1,q1), ldx22,v2t(p+1,p+1), ldv2t ) end if call stdlib${ii}$_zungqr( m-q, m-q, m-q, v2t, ldv2t, work(itauq2),work(iorgqr), & lorgqrwork, info ) end if end if ! compute the csd of the matrix in bidiagonal-block form call stdlib${ii}$_zbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q, theta,rwork(iphi), & u1, ldu1, u2, ldu2, v1t, ldv1t, v2t,ldv2t, rwork(ib11d), rwork(ib11e), rwork(ib12d),& rwork(ib12e), rwork(ib21d), rwork(ib21e),rwork(ib22d), rwork(ib22e), rwork(ibbcsd),& lbbcsdwork, info ) ! permute rows and columns to place identity submatrices in top- ! left corner of (1,1)-block and/or bottom-right corner of (1,2)- ! block and/or bottom-right corner of (2,1)-block and/or top-left ! corner of (2,2)-block if( q > 0_${ik}$ .and. wantu2 ) then do i = 1, q iwork(i) = m - p - q + i end do do i = q + 1, m - p iwork(i) = i - q end do if( colmajor ) then call stdlib${ii}$_zlapmt( .false., m-p, m-p, u2, ldu2, iwork ) else call stdlib${ii}$_zlapmr( .false., m-p, m-p, u2, ldu2, iwork ) end if end if if( m > 0_${ik}$ .and. wantv2t ) then do i = 1, p iwork(i) = m - p - q + i end do do i = p + 1, m - q iwork(i) = i - p end do if( .not. colmajor ) then call stdlib${ii}$_zlapmt( .false., m-q, m-q, v2t, ldv2t, iwork ) else call stdlib${ii}$_zlapmr( .false., m-q, m-q, v2t, ldv2t, iwork ) end if end if return ! end stdlib${ii}$_zuncsd end subroutine stdlib${ii}$_zuncsd #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] recursive module subroutine stdlib${ii}$_${ci}$uncsd( jobu1, jobu2, jobv1t, jobv2t, trans,signs, m, p, q, x11, & !! ZUNCSD: computes the CS decomposition of an M-by-M partitioned !! unitary matrix X: !! [ I 0 0 | 0 0 0 ] !! [ 0 C 0 | 0 -S 0 ] !! [ X11 | X12 ] [ U1 | ] [ 0 0 0 | 0 0 -I ] [ V1 | ]**H !! X = [-----------] = [---------] [---------------------] [---------] . !! [ X21 | X22 ] [ | U2 ] [ 0 0 0 | I 0 0 ] [ | V2 ] !! [ 0 S 0 | 0 C 0 ] !! [ 0 0 I | 0 0 0 ] !! X11 is P-by-Q. The unitary matrices U1, U2, V1, and V2 are P-by-P, !! (M-P)-by-(M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. C and S are !! R-by-R nonnegative diagonal matrices satisfying C^2 + S^2 = I, in !! which R = MIN(P,M-P,Q,M-Q). ldx11, x12,ldx12, x21, ldx21, x22, ldx22, theta,u1, ldu1, u2, ldu2, v1t, ldv1t, v2t,ldv2t, & work, lwork, rwork, lrwork,iwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, signs, trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, ldx11, ldx12, ldx21, ldx22, & lrwork, lwork, m, p, q ! Array Arguments integer(${ik}$), intent(out) :: iwork(*) real(${ck}$), intent(out) :: theta(*) real(${ck}$), intent(out) :: rwork(*) complex(${ck}$), intent(out) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*), work(*) complex(${ck}$), intent(inout) :: x11(ldx11,*), x12(ldx12,*), x21(ldx21,*), x22(ldx22,*) ! =================================================================== ! Local Scalars character :: transt, signst integer(${ik}$) :: childinfo, i, ib11d, ib11e, ib12d, ib12e, ib21d, ib21e, ib22d, ib22e, & ibbcsd, iorbdb, iorglq, iorgqr, iphi, itaup1, itaup2, itauq1, itauq2, j, lbbcsdwork, & lbbcsdworkmin, lbbcsdworkopt, lorbdbwork, lorbdbworkmin, lorbdbworkopt, lorglqwork, & lorglqworkmin, lorglqworkopt, lorgqrwork, lorgqrworkmin, lorgqrworkopt, lworkmin, & lworkopt, p1, q1 logical(lk) :: colmajor, defaultsigns, lquery, wantu1, wantu2, wantv1t, wantv2t integer(${ik}$) :: lrworkmin, lrworkopt logical(lk) :: lrquery ! Intrinsic Functions ! Executable Statements ! test input arguments info = 0_${ik}$ wantu1 = stdlib_lsame( jobu1, 'Y' ) wantu2 = stdlib_lsame( jobu2, 'Y' ) wantv1t = stdlib_lsame( jobv1t, 'Y' ) wantv2t = stdlib_lsame( jobv2t, 'Y' ) colmajor = .not. stdlib_lsame( trans, 'T' ) defaultsigns = .not. stdlib_lsame( signs, 'O' ) lquery = lwork == -1_${ik}$ lrquery = lrwork == -1_${ik}$ if( m < 0_${ik}$ ) then info = -7_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -8_${ik}$ else if( q < 0_${ik}$ .or. q > m ) then info = -9_${ik}$ else if ( colmajor .and. ldx11 < max( 1_${ik}$, p ) ) then info = -11_${ik}$ else if (.not. colmajor .and. ldx11 < max( 1_${ik}$, q ) ) then info = -11_${ik}$ else if (colmajor .and. ldx12 < max( 1_${ik}$, p ) ) then info = -13_${ik}$ else if (.not. colmajor .and. ldx12 < max( 1_${ik}$, m-q ) ) then info = -13_${ik}$ else if (colmajor .and. ldx21 < max( 1_${ik}$, m-p ) ) then info = -15_${ik}$ else if (.not. colmajor .and. ldx21 < max( 1_${ik}$, q ) ) then info = -15_${ik}$ else if (colmajor .and. ldx22 < max( 1_${ik}$, m-p ) ) then info = -17_${ik}$ else if (.not. colmajor .and. ldx22 < max( 1_${ik}$, m-q ) ) then info = -17_${ik}$ else if( wantu1 .and. ldu1 < p ) then info = -20_${ik}$ else if( wantu2 .and. ldu2 < m-p ) then info = -22_${ik}$ else if( wantv1t .and. ldv1t < q ) then info = -24_${ik}$ else if( wantv2t .and. ldv2t < m-q ) then info = -26_${ik}$ end if ! work with transpose if convenient if( info == 0_${ik}$ .and. min( p, m-p ) < min( q, m-q ) ) then if( colmajor ) then transt = 'T' else transt = 'N' end if if( defaultsigns ) then signst = 'O' else signst = 'D' end if call stdlib${ii}$_${ci}$uncsd( jobv1t, jobv2t, jobu1, jobu2, transt, signst, m,q, p, x11, & ldx11, x21, ldx21, x12, ldx12, x22,ldx22, theta, v1t, ldv1t, v2t, ldv2t, u1, ldu1,& u2, ldu2, work, lwork, rwork, lrwork, iwork,info ) return end if ! work with permutation [ 0 i; i 0 ] * x * [ 0 i; i 0 ] if ! convenient if( info == 0_${ik}$ .and. m-q < q ) then if( defaultsigns ) then signst = 'O' else signst = 'D' end if call stdlib${ii}$_${ci}$uncsd( jobu2, jobu1, jobv2t, jobv1t, trans, signst, m,m-p, m-q, x22, & ldx22, x21, ldx21, x12, ldx12, x11,ldx11, theta, u2, ldu2, u1, ldu1, v2t, ldv2t, & v1t,ldv1t, work, lwork, rwork, lrwork, iwork, info ) return end if ! compute workspace if( info == 0_${ik}$ ) then ! real workspace iphi = 2_${ik}$ ib11d = iphi + max( 1_${ik}$, q - 1_${ik}$ ) ib11e = ib11d + max( 1_${ik}$, q ) ib12d = ib11e + max( 1_${ik}$, q - 1_${ik}$ ) ib12e = ib12d + max( 1_${ik}$, q ) ib21d = ib12e + max( 1_${ik}$, q - 1_${ik}$ ) ib21e = ib21d + max( 1_${ik}$, q ) ib22d = ib21e + max( 1_${ik}$, q - 1_${ik}$ ) ib22e = ib22d + max( 1_${ik}$, q ) ibbcsd = ib22e + max( 1_${ik}$, q - 1_${ik}$ ) call stdlib${ii}$_${ci}$bbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, theta, u1, & ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, theta, theta, theta, theta, theta,theta, & theta, theta, rwork, -1_${ik}$, childinfo ) lbbcsdworkopt = int( rwork(1_${ik}$),KIND=${ik}$) lbbcsdworkmin = lbbcsdworkopt lrworkopt = ibbcsd + lbbcsdworkopt - 1_${ik}$ lrworkmin = ibbcsd + lbbcsdworkmin - 1_${ik}$ rwork(1_${ik}$) = lrworkopt ! complex workspace itaup1 = 2_${ik}$ itaup2 = itaup1 + max( 1_${ik}$, p ) itauq1 = itaup2 + max( 1_${ik}$, m - p ) itauq2 = itauq1 + max( 1_${ik}$, q ) iorgqr = itauq2 + max( 1_${ik}$, m - q ) call stdlib${ii}$_${ci}$ungqr( m-q, m-q, m-q, u1, max(1_${ik}$,m-q), u1, work, -1_${ik}$,childinfo ) lorgqrworkopt = int( work(1_${ik}$),KIND=${ik}$) lorgqrworkmin = max( 1_${ik}$, m - q ) iorglq = itauq2 + max( 1_${ik}$, m - q ) call stdlib${ii}$_${ci}$unglq( m-q, m-q, m-q, u1, max(1_${ik}$,m-q), u1, work, -1_${ik}$,childinfo ) lorglqworkopt = int( work(1_${ik}$),KIND=${ik}$) lorglqworkmin = max( 1_${ik}$, m - q ) iorbdb = itauq2 + max( 1_${ik}$, m - q ) call stdlib${ii}$_${ci}$unbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12,x21, ldx21, x22, & ldx22, theta, theta, u1, u2,v1t, v2t, work, -1_${ik}$, childinfo ) lorbdbworkopt = int( work(1_${ik}$),KIND=${ik}$) lorbdbworkmin = lorbdbworkopt lworkopt = max( iorgqr + lorgqrworkopt, iorglq + lorglqworkopt,iorbdb + & lorbdbworkopt ) - 1_${ik}$ lworkmin = max( iorgqr + lorgqrworkmin, iorglq + lorglqworkmin,iorbdb + & lorbdbworkmin ) - 1_${ik}$ work(1_${ik}$) = max(lworkopt,lworkmin) if( lwork < lworkmin.and. .not. ( lquery .or. lrquery ) ) then info = -22_${ik}$ else if( lrwork < lrworkmin.and. .not. ( lquery .or. lrquery ) ) then info = -24_${ik}$ else lorgqrwork = lwork - iorgqr + 1_${ik}$ lorglqwork = lwork - iorglq + 1_${ik}$ lorbdbwork = lwork - iorbdb + 1_${ik}$ lbbcsdwork = lrwork - ibbcsd + 1_${ik}$ end if end if ! abort if any illegal arguments if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNCSD', -info ) return else if( lquery .or. lrquery ) then return end if ! transform to bidiagonal block form call stdlib${ii}$_${ci}$unbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12, x21,ldx21, x22, & ldx22, theta, rwork(iphi), work(itaup1),work(itaup2), work(itauq1), work(itauq2),work(& iorbdb), lorbdbwork, childinfo ) ! accumulate householder reflectors if( colmajor ) then if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_${ci}$lacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) call stdlib${ii}$_${ci}$ungqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqrwork, & info) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_${ci}$lacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) call stdlib${ii}$_${ci}$ungqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqrwork,& info ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_${ci}$lacpy( 'U', q-1, q-1, x11(1_${ik}$,2_${ik}$), ldx11, v1t(2_${ik}$,2_${ik}$),ldv1t ) v1t(1_${ik}$, 1_${ik}$) = cone do j = 2, q v1t(1_${ik}$,j) = czero v1t(j,1_${ik}$) = czero end do call stdlib${ii}$_${ci}$unglq( q-1, q-1, q-1, v1t(2_${ik}$,2_${ik}$), ldv1t, work(itauq1),work(iorglq), & lorglqwork, info ) end if if( wantv2t .and. m-q > 0_${ik}$ ) then call stdlib${ii}$_${ci}$lacpy( 'U', p, m-q, x12, ldx12, v2t, ldv2t ) if( m-p > q) then call stdlib${ii}$_${ci}$lacpy( 'U', m-p-q, m-p-q, x22(q+1,p+1), ldx22,v2t(p+1,p+1), & ldv2t ) end if if( m > q ) then call stdlib${ii}$_${ci}$unglq( m-q, m-q, m-q, v2t, ldv2t, work(itauq2),work(iorglq), & lorglqwork, info ) end if end if else if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_${ci}$lacpy( 'U', q, p, x11, ldx11, u1, ldu1 ) call stdlib${ii}$_${ci}$unglq( p, p, q, u1, ldu1, work(itaup1), work(iorglq),lorglqwork, & info) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_${ci}$lacpy( 'U', q, m-p, x21, ldx21, u2, ldu2 ) call stdlib${ii}$_${ci}$unglq( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorglq), lorglqwork,& info ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_${ci}$lacpy( 'L', q-1, q-1, x11(2_${ik}$,1_${ik}$), ldx11, v1t(2_${ik}$,2_${ik}$),ldv1t ) v1t(1_${ik}$, 1_${ik}$) = cone do j = 2, q v1t(1_${ik}$,j) = czero v1t(j,1_${ik}$) = czero end do call stdlib${ii}$_${ci}$ungqr( q-1, q-1, q-1, v1t(2_${ik}$,2_${ik}$), ldv1t, work(itauq1),work(iorgqr), & lorgqrwork, info ) end if if( wantv2t .and. m-q > 0_${ik}$ ) then p1 = min( p+1, m ) q1 = min( q+1, m ) call stdlib${ii}$_${ci}$lacpy( 'L', m-q, p, x12, ldx12, v2t, ldv2t ) if( m > p+q ) then call stdlib${ii}$_${ci}$lacpy( 'L', m-p-q, m-p-q, x22(p1,q1), ldx22,v2t(p+1,p+1), ldv2t ) end if call stdlib${ii}$_${ci}$ungqr( m-q, m-q, m-q, v2t, ldv2t, work(itauq2),work(iorgqr), & lorgqrwork, info ) end if end if ! compute the csd of the matrix in bidiagonal-block form call stdlib${ii}$_${ci}$bbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q, theta,rwork(iphi), & u1, ldu1, u2, ldu2, v1t, ldv1t, v2t,ldv2t, rwork(ib11d), rwork(ib11e), rwork(ib12d),& rwork(ib12e), rwork(ib21d), rwork(ib21e),rwork(ib22d), rwork(ib22e), rwork(ibbcsd),& lbbcsdwork, info ) ! permute rows and columns to place identity submatrices in top- ! left corner of (1,1)-block and/or bottom-right corner of (1,2)- ! block and/or bottom-right corner of (2,1)-block and/or top-left ! corner of (2,2)-block if( q > 0_${ik}$ .and. wantu2 ) then do i = 1, q iwork(i) = m - p - q + i end do do i = q + 1, m - p iwork(i) = i - q end do if( colmajor ) then call stdlib${ii}$_${ci}$lapmt( .false., m-p, m-p, u2, ldu2, iwork ) else call stdlib${ii}$_${ci}$lapmr( .false., m-p, m-p, u2, ldu2, iwork ) end if end if if( m > 0_${ik}$ .and. wantv2t ) then do i = 1, p iwork(i) = m - p - q + i end do do i = p + 1, m - q iwork(i) = i - p end do if( .not. colmajor ) then call stdlib${ii}$_${ci}$lapmt( .false., m-q, m-q, v2t, ldv2t, iwork ) else call stdlib${ii}$_${ci}$lapmr( .false., m-q, m-q, v2t, ldv2t, iwork ) end if end if return ! end stdlib${ii}$_${ci}$uncsd end subroutine stdlib${ii}$_${ci}$uncsd #:endif #:endfor module subroutine stdlib${ii}$_cuncsd2by1( jobu1, jobu2, jobv1t, m, p, q, x11, ldx11,x21, ldx21, theta, & !! CUNCSD2BY1 computes the CS decomposition of an M-by-Q matrix X with !! orthonormal columns that has been partitioned into a 2-by-1 block !! structure: !! [ I1 0 0 ] !! [ 0 C 0 ] !! [ X11 ] [ U1 | ] [ 0 0 0 ] !! X = [-----] = [---------] [----------] V1**T . !! [ X21 ] [ | U2 ] [ 0 0 0 ] !! [ 0 S 0 ] !! [ 0 0 I2] !! X11 is P-by-Q. The unitary matrices U1, U2, and V1 are P-by-P, !! (M-P)-by-(M-P), and Q-by-Q, respectively. C and S are R-by-R !! nonnegative diagonal matrices satisfying C^2 + S^2 = I, in which !! R = MIN(P,M-P,Q,M-Q). I1 is a K1-by-K1 identity matrix and I2 is a !! K2-by-K2 identity matrix, where K1 = MAX(Q+P-M,0), K2 = MAX(Q-P,0). u1, ldu1, u2, ldu2, v1t,ldv1t, work, lwork, rwork, lrwork, iwork,info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu1, jobu2, jobv1t integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu1, ldu2, ldv1t, lwork, ldx11, ldx21, m, p, q integer(${ik}$), intent(in) :: lrwork integer(${ik}$) :: lrworkmin, lrworkopt ! Array Arguments real(sp), intent(out) :: rwork(*) real(sp), intent(out) :: theta(*) complex(sp), intent(out) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), work(*) complex(sp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) integer(${ik}$), intent(out) :: iwork(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: childinfo, i, ib11d, ib11e, ib12d, ib12e, ib21d, ib21e, ib22d, ib22e, & ibbcsd, iorbdb, iorglq, iorgqr, iphi, itaup1, itaup2, itauq1, j, lbbcsd, lorbdb, & lorglq, lorglqmin, lorglqopt, lorgqr, lorgqrmin, lorgqropt, lworkmin, lworkopt, & r logical(lk) :: lquery, wantu1, wantu2, wantv1t ! Local Arrays real(sp) :: dum(1_${ik}$) complex(sp) :: cdum(1_${ik}$,1_${ik}$) ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ wantu1 = stdlib_lsame( jobu1, 'Y' ) wantu2 = stdlib_lsame( jobu2, 'Y' ) wantv1t = stdlib_lsame( jobv1t, 'Y' ) lquery = ( lwork==-1_${ik}$ ) .or. ( lrwork==-1_${ik}$ ) if( m < 0_${ik}$ ) then info = -4_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -5_${ik}$ else if( q < 0_${ik}$ .or. q > m ) then info = -6_${ik}$ else if( ldx11 < max( 1_${ik}$, p ) ) then info = -8_${ik}$ else if( ldx21 < max( 1_${ik}$, m-p ) ) then info = -10_${ik}$ else if( wantu1 .and. ldu1 < max( 1_${ik}$, p ) ) then info = -13_${ik}$ else if( wantu2 .and. ldu2 < max( 1_${ik}$, m - p ) ) then info = -15_${ik}$ else if( wantv1t .and. ldv1t < max( 1_${ik}$, q ) ) then info = -17_${ik}$ end if r = min( p, m-p, q, m-q ) ! compute workspace ! work layout: ! |-----------------------------------------| ! | lworkopt (1) | ! |-----------------------------------------| ! | taup1 (max(1,p)) | ! | taup2 (max(1,m-p)) | ! | tauq1 (max(1,q)) | ! |-----------------------------------------| ! | stdlib${ii}$_cunbdb work | stdlib${ii}$_cungqr work | stdlib${ii}$_cunglq work | ! | | | | ! | | | | ! | | | | ! | | | | ! |-----------------------------------------| ! rwork layout: ! |------------------| ! | lrworkopt (1) | ! |------------------| ! | phi (max(1,r-1)) | ! |------------------| ! | b11d (r) | ! | b11e (r-1) | ! | b12d (r) | ! | b12e (r-1) | ! | b21d (r) | ! | b21e (r-1) | ! | b22d (r) | ! | b22e (r-1) | ! | stdlib${ii}$_cbbcsd rwork | ! |------------------| if( info == 0_${ik}$ ) then iphi = 2_${ik}$ ib11d = iphi + max( 1_${ik}$, r-1 ) ib11e = ib11d + max( 1_${ik}$, r ) ib12d = ib11e + max( 1_${ik}$, r - 1_${ik}$ ) ib12e = ib12d + max( 1_${ik}$, r ) ib21d = ib12e + max( 1_${ik}$, r - 1_${ik}$ ) ib21e = ib21d + max( 1_${ik}$, r ) ib22d = ib21e + max( 1_${ik}$, r - 1_${ik}$ ) ib22e = ib22d + max( 1_${ik}$, r ) ibbcsd = ib22e + max( 1_${ik}$, r - 1_${ik}$ ) itaup1 = 2_${ik}$ itaup2 = itaup1 + max( 1_${ik}$, p ) itauq1 = itaup2 + max( 1_${ik}$, m-p ) iorbdb = itauq1 + max( 1_${ik}$, q ) iorgqr = itauq1 + max( 1_${ik}$, q ) iorglq = itauq1 + max( 1_${ik}$, q ) lorgqrmin = 1_${ik}$ lorgqropt = 1_${ik}$ lorglqmin = 1_${ik}$ lorglqopt = 1_${ik}$ if( r == q ) then call stdlib${ii}$_cunbdb1( m, p, q, x11, ldx11, x21, ldx21, theta,dum, cdum, cdum, & cdum, work, -1_${ik}$,childinfo ) lorbdb = int( work(1_${ik}$),KIND=${ik}$) if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_cungqr( p, p, q, u1, ldu1, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorgqrmin = max( lorgqrmin, p ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) endif if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_cungqr( m-p, m-p, q, u2, ldu2, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorgqrmin = max( lorgqrmin, m-p ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_cunglq( q-1, q-1, q-1, v1t, ldv1t,cdum, work(1_${ik}$), -1_${ik}$, childinfo ) lorglqmin = max( lorglqmin, q-1 ) lorglqopt = max( lorglqopt, int( work(1_${ik}$),KIND=${ik}$) ) end if call stdlib${ii}$_cbbcsd( jobu1, jobu2, jobv1t, 'N', 'N', m, p, q, theta,dum(1_${ik}$), u1, & ldu1, u2, ldu2, v1t, ldv1t, cdum,1_${ik}$, dum, dum, dum, dum, dum, dum, dum, dum,rwork(& 1_${ik}$), -1_${ik}$, childinfo ) lbbcsd = int( rwork(1_${ik}$),KIND=${ik}$) else if( r == p ) then call stdlib${ii}$_cunbdb2( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & cdum, work(1_${ik}$), -1_${ik}$, childinfo ) lorbdb = int( work(1_${ik}$),KIND=${ik}$) if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_cungqr( p-1, p-1, p-1, u1(2_${ik}$,2_${ik}$), ldu1, cdum, work(1_${ik}$),-1_${ik}$, childinfo & ) lorgqrmin = max( lorgqrmin, p-1 ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_cungqr( m-p, m-p, q, u2, ldu2, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorgqrmin = max( lorgqrmin, m-p ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_cunglq( q, q, r, v1t, ldv1t, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorglqmin = max( lorglqmin, q ) lorglqopt = max( lorglqopt, int( work(1_${ik}$),KIND=${ik}$) ) end if call stdlib${ii}$_cbbcsd( jobv1t, 'N', jobu1, jobu2, 'T', m, q, p, theta,dum, v1t, & ldv1t, cdum, 1_${ik}$, u1, ldu1, u2, ldu2,dum, dum, dum, dum, dum, dum, dum, dum,rwork(& 1_${ik}$), -1_${ik}$, childinfo ) lbbcsd = int( rwork(1_${ik}$),KIND=${ik}$) else if( r == m-p ) then call stdlib${ii}$_cunbdb3( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & cdum, work(1_${ik}$), -1_${ik}$, childinfo ) lorbdb = int( work(1_${ik}$),KIND=${ik}$) if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_cungqr( p, p, q, u1, ldu1, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorgqrmin = max( lorgqrmin, p ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_cungqr( m-p-1, m-p-1, m-p-1, u2(2_${ik}$,2_${ik}$), ldu2, cdum,work(1_${ik}$), -1_${ik}$, & childinfo ) lorgqrmin = max( lorgqrmin, m-p-1 ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_cunglq( q, q, r, v1t, ldv1t, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorglqmin = max( lorglqmin, q ) lorglqopt = max( lorglqopt, int( work(1_${ik}$),KIND=${ik}$) ) end if call stdlib${ii}$_cbbcsd( 'N', jobv1t, jobu2, jobu1, 'T', m, m-q, m-p,theta, dum, cdum,& 1_${ik}$, v1t, ldv1t, u2, ldu2, u1,ldu1, dum, dum, dum, dum, dum, dum, dum, dum,rwork(& 1_${ik}$), -1_${ik}$, childinfo ) lbbcsd = int( rwork(1_${ik}$),KIND=${ik}$) else call stdlib${ii}$_cunbdb4( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & cdum, cdum, work(1_${ik}$), -1_${ik}$, childinfo) lorbdb = m + int( work(1_${ik}$),KIND=${ik}$) if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_cungqr( p, p, m-q, u1, ldu1, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorgqrmin = max( lorgqrmin, p ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_cungqr( m-p, m-p, m-q, u2, ldu2, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorgqrmin = max( lorgqrmin, m-p ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_cunglq( q, q, q, v1t, ldv1t, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorglqmin = max( lorglqmin, q ) lorglqopt = max( lorglqopt, int( work(1_${ik}$),KIND=${ik}$) ) end if call stdlib${ii}$_cbbcsd( jobu2, jobu1, 'N', jobv1t, 'N', m, m-p, m-q,theta, dum, u2, & ldu2, u1, ldu1, cdum, 1_${ik}$, v1t,ldv1t, dum, dum, dum, dum, dum, dum, dum, dum,rwork(& 1_${ik}$), -1_${ik}$, childinfo ) lbbcsd = int( rwork(1_${ik}$),KIND=${ik}$) end if lrworkmin = ibbcsd+lbbcsd-1 lrworkopt = lrworkmin rwork(1_${ik}$) = lrworkopt lworkmin = max( iorbdb+lorbdb-1,iorgqr+lorgqrmin-1,iorglq+lorglqmin-1 ) lworkopt = max( iorbdb+lorbdb-1,iorgqr+lorgqropt-1,iorglq+lorglqopt-1 ) work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not.lquery ) then info = -19_${ik}$ end if if( lrwork < lrworkmin .and. .not.lquery ) then info = -21_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CUNCSD2BY1', -info ) return else if( lquery ) then return end if lorgqr = lwork-iorgqr+1 lorglq = lwork-iorglq+1 ! handle four cases separately: r = q, r = p, r = m-p, and r = m-q, ! in which r = min(p,m-p,q,m-q) if( r == q ) then ! case 1: r = q ! simultaneously bidiagonalize x11 and x21 call stdlib${ii}$_cunbdb1( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& itaup1), work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) ! accumulate householder reflectors if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_clacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) call stdlib${ii}$_cungqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqr, & childinfo ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_clacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) call stdlib${ii}$_cungqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & childinfo ) end if if( wantv1t .and. q > 0_${ik}$ ) then v1t(1_${ik}$,1_${ik}$) = cone do j = 2, q v1t(1_${ik}$,j) = czero v1t(j,1_${ik}$) = czero end do call stdlib${ii}$_clacpy( 'U', q-1, q-1, x21(1_${ik}$,2_${ik}$), ldx21, v1t(2_${ik}$,2_${ik}$),ldv1t ) call stdlib${ii}$_cunglq( q-1, q-1, q-1, v1t(2_${ik}$,2_${ik}$), ldv1t, work(itauq1),work(iorglq), & lorglq, childinfo ) end if ! simultaneously diagonalize x11 and x21. call stdlib${ii}$_cbbcsd( jobu1, jobu2, jobv1t, 'N', 'N', m, p, q, theta,rwork(iphi), u1, & ldu1, u2, ldu2, v1t, ldv1t, cdum,1_${ik}$, rwork(ib11d), rwork(ib11e), rwork(ib12d),rwork(& ib12e), rwork(ib21d), rwork(ib21e),rwork(ib22d), rwork(ib22e), rwork(ibbcsd),lrwork-& ibbcsd+1, childinfo ) ! permute rows and columns to place czero submatrices in ! preferred positions if( q > 0_${ik}$ .and. wantu2 ) then do i = 1, q iwork(i) = m - p - q + i end do do i = q + 1, m - p iwork(i) = i - q end do call stdlib${ii}$_clapmt( .false., m-p, m-p, u2, ldu2, iwork ) end if else if( r == p ) then ! case 2: r = p ! simultaneously bidiagonalize x11 and x21 call stdlib${ii}$_cunbdb2( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& itaup1), work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) ! accumulate householder reflectors if( wantu1 .and. p > 0_${ik}$ ) then u1(1_${ik}$,1_${ik}$) = cone do j = 2, p u1(1_${ik}$,j) = czero u1(j,1_${ik}$) = czero end do call stdlib${ii}$_clacpy( 'L', p-1, p-1, x11(2_${ik}$,1_${ik}$), ldx11, u1(2_${ik}$,2_${ik}$), ldu1 ) call stdlib${ii}$_cungqr( p-1, p-1, p-1, u1(2_${ik}$,2_${ik}$), ldu1, work(itaup1),work(iorgqr), & lorgqr, childinfo ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_clacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) call stdlib${ii}$_cungqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & childinfo ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_clacpy( 'U', p, q, x11, ldx11, v1t, ldv1t ) call stdlib${ii}$_cunglq( q, q, r, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & childinfo ) end if ! simultaneously diagonalize x11 and x21. call stdlib${ii}$_cbbcsd( jobv1t, 'N', jobu1, jobu2, 'T', m, q, p, theta,rwork(iphi), v1t,& ldv1t, cdum, 1_${ik}$, u1, ldu1, u2,ldu2, rwork(ib11d), rwork(ib11e), rwork(ib12d),rwork(& ib12e), rwork(ib21d), rwork(ib21e),rwork(ib22d), rwork(ib22e), rwork(ibbcsd), & lbbcsd,childinfo ) ! permute rows and columns to place identity submatrices in ! preferred positions if( q > 0_${ik}$ .and. wantu2 ) then do i = 1, q iwork(i) = m - p - q + i end do do i = q + 1, m - p iwork(i) = i - q end do call stdlib${ii}$_clapmt( .false., m-p, m-p, u2, ldu2, iwork ) end if else if( r == m-p ) then ! case 3: r = m-p ! simultaneously bidiagonalize x11 and x21 call stdlib${ii}$_cunbdb3( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& itaup1), work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) ! accumulate householder reflectors if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_clacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) call stdlib${ii}$_cungqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqr, & childinfo ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then u2(1_${ik}$,1_${ik}$) = cone do j = 2, m-p u2(1_${ik}$,j) = czero u2(j,1_${ik}$) = czero end do call stdlib${ii}$_clacpy( 'L', m-p-1, m-p-1, x21(2_${ik}$,1_${ik}$), ldx21, u2(2_${ik}$,2_${ik}$),ldu2 ) call stdlib${ii}$_cungqr( m-p-1, m-p-1, m-p-1, u2(2_${ik}$,2_${ik}$), ldu2,work(itaup2), work(iorgqr)& , lorgqr, childinfo ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_clacpy( 'U', m-p, q, x21, ldx21, v1t, ldv1t ) call stdlib${ii}$_cunglq( q, q, r, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & childinfo ) end if ! simultaneously diagonalize x11 and x21. call stdlib${ii}$_cbbcsd( 'N', jobv1t, jobu2, jobu1, 'T', m, m-q, m-p,theta, rwork(iphi), & cdum, 1_${ik}$, v1t, ldv1t, u2, ldu2,u1, ldu1, rwork(ib11d), rwork(ib11e),rwork(ib12d), & rwork(ib12e), rwork(ib21d),rwork(ib21e), rwork(ib22d), rwork(ib22e),rwork(ibbcsd), & lbbcsd, childinfo ) ! permute rows and columns to place identity submatrices in ! preferred positions if( q > r ) then do i = 1, r iwork(i) = q - r + i end do do i = r + 1, q iwork(i) = i - r end do if( wantu1 ) then call stdlib${ii}$_clapmt( .false., p, q, u1, ldu1, iwork ) end if if( wantv1t ) then call stdlib${ii}$_clapmr( .false., q, q, v1t, ldv1t, iwork ) end if end if else ! case 4: r = m-q ! simultaneously bidiagonalize x11 and x21 call stdlib${ii}$_cunbdb4( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& itaup1), work(itaup2),work(itauq1), work(iorbdb), work(iorbdb+m),lorbdb-m, & childinfo ) ! accumulate householder reflectors if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_ccopy( m-p, work(iorbdb+p), 1_${ik}$, u2, 1_${ik}$ ) end if if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_ccopy( p, work(iorbdb), 1_${ik}$, u1, 1_${ik}$ ) do j = 2, p u1(1_${ik}$,j) = czero end do call stdlib${ii}$_clacpy( 'L', p-1, m-q-1, x11(2_${ik}$,1_${ik}$), ldx11, u1(2_${ik}$,2_${ik}$),ldu1 ) call stdlib${ii}$_cungqr( p, p, m-q, u1, ldu1, work(itaup1),work(iorgqr), lorgqr, & childinfo ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then do j = 2, m-p u2(1_${ik}$,j) = czero end do call stdlib${ii}$_clacpy( 'L', m-p-1, m-q-1, x21(2_${ik}$,1_${ik}$), ldx21, u2(2_${ik}$,2_${ik}$),ldu2 ) call stdlib${ii}$_cungqr( m-p, m-p, m-q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & childinfo ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_clacpy( 'U', m-q, q, x21, ldx21, v1t, ldv1t ) call stdlib${ii}$_clacpy( 'U', p-(m-q), q-(m-q), x11(m-q+1,m-q+1), ldx11,v1t(m-q+1,m-q+& 1_${ik}$), ldv1t ) call stdlib${ii}$_clacpy( 'U', -p+q, q-p, x21(m-q+1,p+1), ldx21,v1t(p+1,p+1), ldv1t ) call stdlib${ii}$_cunglq( q, q, q, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & childinfo ) end if ! simultaneously diagonalize x11 and x21. call stdlib${ii}$_cbbcsd( jobu2, jobu1, 'N', jobv1t, 'N', m, m-p, m-q,theta, rwork(iphi), & u2, ldu2, u1, ldu1, cdum, 1_${ik}$,v1t, ldv1t, rwork(ib11d), rwork(ib11e),rwork(ib12d), & rwork(ib12e), rwork(ib21d),rwork(ib21e), rwork(ib22d), rwork(ib22e),rwork(ibbcsd), & lbbcsd, childinfo ) ! permute rows and columns to place identity submatrices in ! preferred positions if( p > r ) then do i = 1, r iwork(i) = p - r + i end do do i = r + 1, p iwork(i) = i - r end do if( wantu1 ) then call stdlib${ii}$_clapmt( .false., p, p, u1, ldu1, iwork ) end if if( wantv1t ) then call stdlib${ii}$_clapmr( .false., p, q, v1t, ldv1t, iwork ) end if end if end if return end subroutine stdlib${ii}$_cuncsd2by1 module subroutine stdlib${ii}$_zuncsd2by1( jobu1, jobu2, jobv1t, m, p, q, x11, ldx11,x21, ldx21, theta, & !! ZUNCSD2BY1 computes the CS decomposition of an M-by-Q matrix X with !! orthonormal columns that has been partitioned into a 2-by-1 block !! structure: !! [ I1 0 0 ] !! [ 0 C 0 ] !! [ X11 ] [ U1 | ] [ 0 0 0 ] !! X = [-----] = [---------] [----------] V1**T . !! [ X21 ] [ | U2 ] [ 0 0 0 ] !! [ 0 S 0 ] !! [ 0 0 I2] !! X11 is P-by-Q. The unitary matrices U1, U2, and V1 are P-by-P, !! (M-P)-by-(M-P), and Q-by-Q, respectively. C and S are R-by-R !! nonnegative diagonal matrices satisfying C^2 + S^2 = I, in which !! R = MIN(P,M-P,Q,M-Q). I1 is a K1-by-K1 identity matrix and I2 is a !! K2-by-K2 identity matrix, where K1 = MAX(Q+P-M,0), K2 = MAX(Q-P,0). u1, ldu1, u2, ldu2, v1t,ldv1t, work, lwork, rwork, lrwork, iwork,info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu1, jobu2, jobv1t integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu1, ldu2, ldv1t, lwork, ldx11, ldx21, m, p, q integer(${ik}$), intent(in) :: lrwork integer(${ik}$) :: lrworkmin, lrworkopt ! Array Arguments real(dp), intent(out) :: rwork(*) real(dp), intent(out) :: theta(*) complex(dp), intent(out) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), work(*) complex(dp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) integer(${ik}$), intent(out) :: iwork(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: childinfo, i, ib11d, ib11e, ib12d, ib12e, ib21d, ib21e, ib22d, ib22e, & ibbcsd, iorbdb, iorglq, iorgqr, iphi, itaup1, itaup2, itauq1, j, lbbcsd, lorbdb, & lorglq, lorglqmin, lorglqopt, lorgqr, lorgqrmin, lorgqropt, lworkmin, lworkopt, & r logical(lk) :: lquery, wantu1, wantu2, wantv1t ! Local Arrays real(dp) :: dum(1_${ik}$) complex(dp) :: cdum(1_${ik}$,1_${ik}$) ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ wantu1 = stdlib_lsame( jobu1, 'Y' ) wantu2 = stdlib_lsame( jobu2, 'Y' ) wantv1t = stdlib_lsame( jobv1t, 'Y' ) lquery = ( lwork==-1_${ik}$ ) .or. ( lrwork==-1_${ik}$ ) if( m < 0_${ik}$ ) then info = -4_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -5_${ik}$ else if( q < 0_${ik}$ .or. q > m ) then info = -6_${ik}$ else if( ldx11 < max( 1_${ik}$, p ) ) then info = -8_${ik}$ else if( ldx21 < max( 1_${ik}$, m-p ) ) then info = -10_${ik}$ else if( wantu1 .and. ldu1 < max( 1_${ik}$, p ) ) then info = -13_${ik}$ else if( wantu2 .and. ldu2 < max( 1_${ik}$, m - p ) ) then info = -15_${ik}$ else if( wantv1t .and. ldv1t < max( 1_${ik}$, q ) ) then info = -17_${ik}$ end if r = min( p, m-p, q, m-q ) ! compute workspace ! work layout: ! |-----------------------------------------| ! | lworkopt (1) | ! |-----------------------------------------| ! | taup1 (max(1,p)) | ! | taup2 (max(1,m-p)) | ! | tauq1 (max(1,q)) | ! |-----------------------------------------| ! | stdlib${ii}$_zunbdb work | stdlib${ii}$_zungqr work | stdlib${ii}$_zunglq work | ! | | | | ! | | | | ! | | | | ! | | | | ! |-----------------------------------------| ! rwork layout: ! |------------------| ! | lrworkopt (1) | ! |------------------| ! | phi (max(1,r-1)) | ! |------------------| ! | b11d (r) | ! | b11e (r-1) | ! | b12d (r) | ! | b12e (r-1) | ! | b21d (r) | ! | b21e (r-1) | ! | b22d (r) | ! | b22e (r-1) | ! | stdlib${ii}$_zbbcsd rwork | ! |------------------| if( info == 0_${ik}$ ) then iphi = 2_${ik}$ ib11d = iphi + max( 1_${ik}$, r-1 ) ib11e = ib11d + max( 1_${ik}$, r ) ib12d = ib11e + max( 1_${ik}$, r - 1_${ik}$ ) ib12e = ib12d + max( 1_${ik}$, r ) ib21d = ib12e + max( 1_${ik}$, r - 1_${ik}$ ) ib21e = ib21d + max( 1_${ik}$, r ) ib22d = ib21e + max( 1_${ik}$, r - 1_${ik}$ ) ib22e = ib22d + max( 1_${ik}$, r ) ibbcsd = ib22e + max( 1_${ik}$, r - 1_${ik}$ ) itaup1 = 2_${ik}$ itaup2 = itaup1 + max( 1_${ik}$, p ) itauq1 = itaup2 + max( 1_${ik}$, m-p ) iorbdb = itauq1 + max( 1_${ik}$, q ) iorgqr = itauq1 + max( 1_${ik}$, q ) iorglq = itauq1 + max( 1_${ik}$, q ) lorgqrmin = 1_${ik}$ lorgqropt = 1_${ik}$ lorglqmin = 1_${ik}$ lorglqopt = 1_${ik}$ if( r == q ) then call stdlib${ii}$_zunbdb1( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & cdum, work, -1_${ik}$, childinfo ) lorbdb = int( work(1_${ik}$),KIND=${ik}$) if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_zungqr( p, p, q, u1, ldu1, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorgqrmin = max( lorgqrmin, p ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) endif if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_zungqr( m-p, m-p, q, u2, ldu2, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorgqrmin = max( lorgqrmin, m-p ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_zunglq( q-1, q-1, q-1, v1t, ldv1t,cdum, work(1_${ik}$), -1_${ik}$, childinfo ) lorglqmin = max( lorglqmin, q-1 ) lorglqopt = max( lorglqopt, int( work(1_${ik}$),KIND=${ik}$) ) end if call stdlib${ii}$_zbbcsd( jobu1, jobu2, jobv1t, 'N', 'N', m, p, q, theta,dum, u1, ldu1,& u2, ldu2, v1t, ldv1t, cdum, 1_${ik}$,dum, dum, dum, dum, dum, dum, dum, dum,rwork(1_${ik}$), -& 1_${ik}$, childinfo ) lbbcsd = int( rwork(1_${ik}$),KIND=${ik}$) else if( r == p ) then call stdlib${ii}$_zunbdb2( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & cdum, work(1_${ik}$), -1_${ik}$, childinfo ) lorbdb = int( work(1_${ik}$),KIND=${ik}$) if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_zungqr( p-1, p-1, p-1, u1(2_${ik}$,2_${ik}$), ldu1, cdum, work(1_${ik}$),-1_${ik}$, childinfo & ) lorgqrmin = max( lorgqrmin, p-1 ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_zungqr( m-p, m-p, q, u2, ldu2, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorgqrmin = max( lorgqrmin, m-p ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_zunglq( q, q, r, v1t, ldv1t, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorglqmin = max( lorglqmin, q ) lorglqopt = max( lorglqopt, int( work(1_${ik}$),KIND=${ik}$) ) end if call stdlib${ii}$_zbbcsd( jobv1t, 'N', jobu1, jobu2, 'T', m, q, p, theta,dum, v1t, & ldv1t, cdum, 1_${ik}$, u1, ldu1, u2, ldu2,dum, dum, dum, dum, dum, dum, dum, dum,rwork(& 1_${ik}$), -1_${ik}$, childinfo ) lbbcsd = int( rwork(1_${ik}$),KIND=${ik}$) else if( r == m-p ) then call stdlib${ii}$_zunbdb3( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & cdum, work(1_${ik}$), -1_${ik}$, childinfo ) lorbdb = int( work(1_${ik}$),KIND=${ik}$) if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_zungqr( p, p, q, u1, ldu1, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorgqrmin = max( lorgqrmin, p ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_zungqr( m-p-1, m-p-1, m-p-1, u2(2_${ik}$,2_${ik}$), ldu2, cdum,work(1_${ik}$), -1_${ik}$, & childinfo ) lorgqrmin = max( lorgqrmin, m-p-1 ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_zunglq( q, q, r, v1t, ldv1t, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorglqmin = max( lorglqmin, q ) lorglqopt = max( lorglqopt, int( work(1_${ik}$),KIND=${ik}$) ) end if call stdlib${ii}$_zbbcsd( 'N', jobv1t, jobu2, jobu1, 'T', m, m-q, m-p,theta, dum, cdum,& 1_${ik}$, v1t, ldv1t, u2, ldu2, u1,ldu1, dum, dum, dum, dum, dum, dum, dum, dum,rwork(& 1_${ik}$), -1_${ik}$, childinfo ) lbbcsd = int( rwork(1_${ik}$),KIND=${ik}$) else call stdlib${ii}$_zunbdb4( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & cdum, cdum, work(1_${ik}$), -1_${ik}$, childinfo) lorbdb = m + int( work(1_${ik}$),KIND=${ik}$) if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_zungqr( p, p, m-q, u1, ldu1, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorgqrmin = max( lorgqrmin, p ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_zungqr( m-p, m-p, m-q, u2, ldu2, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorgqrmin = max( lorgqrmin, m-p ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_zunglq( q, q, q, v1t, ldv1t, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorglqmin = max( lorglqmin, q ) lorglqopt = max( lorglqopt, int( work(1_${ik}$),KIND=${ik}$) ) end if call stdlib${ii}$_zbbcsd( jobu2, jobu1, 'N', jobv1t, 'N', m, m-p, m-q,theta, dum, u2, & ldu2, u1, ldu1, cdum, 1_${ik}$, v1t,ldv1t, dum, dum, dum, dum, dum, dum, dum, dum,rwork(& 1_${ik}$), -1_${ik}$, childinfo ) lbbcsd = int( rwork(1_${ik}$),KIND=${ik}$) end if lrworkmin = ibbcsd+lbbcsd-1 lrworkopt = lrworkmin rwork(1_${ik}$) = lrworkopt lworkmin = max( iorbdb+lorbdb-1,iorgqr+lorgqrmin-1,iorglq+lorglqmin-1 ) lworkopt = max( iorbdb+lorbdb-1,iorgqr+lorgqropt-1,iorglq+lorglqopt-1 ) work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not.lquery ) then info = -19_${ik}$ end if if( lrwork < lrworkmin .and. .not.lquery ) then info = -21_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNCSD2BY1', -info ) return else if( lquery ) then return end if lorgqr = lwork-iorgqr+1 lorglq = lwork-iorglq+1 ! handle four cases separately: r = q, r = p, r = m-p, and r = m-q, ! in which r = min(p,m-p,q,m-q) if( r == q ) then ! case 1: r = q ! simultaneously bidiagonalize x11 and x21 call stdlib${ii}$_zunbdb1( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& itaup1), work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) ! accumulate householder reflectors if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_zlacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) call stdlib${ii}$_zungqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqr, & childinfo ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_zlacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) call stdlib${ii}$_zungqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & childinfo ) end if if( wantv1t .and. q > 0_${ik}$ ) then v1t(1_${ik}$,1_${ik}$) = cone do j = 2, q v1t(1_${ik}$,j) = czero v1t(j,1_${ik}$) = czero end do call stdlib${ii}$_zlacpy( 'U', q-1, q-1, x21(1_${ik}$,2_${ik}$), ldx21, v1t(2_${ik}$,2_${ik}$),ldv1t ) call stdlib${ii}$_zunglq( q-1, q-1, q-1, v1t(2_${ik}$,2_${ik}$), ldv1t, work(itauq1),work(iorglq), & lorglq, childinfo ) end if ! simultaneously diagonalize x11 and x21. call stdlib${ii}$_zbbcsd( jobu1, jobu2, jobv1t, 'N', 'N', m, p, q, theta,rwork(iphi), u1, & ldu1, u2, ldu2, v1t, ldv1t, cdum,1_${ik}$, rwork(ib11d), rwork(ib11e), rwork(ib12d),rwork(& ib12e), rwork(ib21d), rwork(ib21e),rwork(ib22d), rwork(ib22e), rwork(ibbcsd),lrwork-& ibbcsd+1, childinfo ) ! permute rows and columns to place czero submatrices in ! preferred positions if( q > 0_${ik}$ .and. wantu2 ) then do i = 1, q iwork(i) = m - p - q + i end do do i = q + 1, m - p iwork(i) = i - q end do call stdlib${ii}$_zlapmt( .false., m-p, m-p, u2, ldu2, iwork ) end if else if( r == p ) then ! case 2: r = p ! simultaneously bidiagonalize x11 and x21 call stdlib${ii}$_zunbdb2( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& itaup1), work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) ! accumulate householder reflectors if( wantu1 .and. p > 0_${ik}$ ) then u1(1_${ik}$,1_${ik}$) = cone do j = 2, p u1(1_${ik}$,j) = czero u1(j,1_${ik}$) = czero end do call stdlib${ii}$_zlacpy( 'L', p-1, p-1, x11(2_${ik}$,1_${ik}$), ldx11, u1(2_${ik}$,2_${ik}$), ldu1 ) call stdlib${ii}$_zungqr( p-1, p-1, p-1, u1(2_${ik}$,2_${ik}$), ldu1, work(itaup1),work(iorgqr), & lorgqr, childinfo ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_zlacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) call stdlib${ii}$_zungqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & childinfo ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_zlacpy( 'U', p, q, x11, ldx11, v1t, ldv1t ) call stdlib${ii}$_zunglq( q, q, r, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & childinfo ) end if ! simultaneously diagonalize x11 and x21. call stdlib${ii}$_zbbcsd( jobv1t, 'N', jobu1, jobu2, 'T', m, q, p, theta,rwork(iphi), v1t,& ldv1t, cdum, 1_${ik}$, u1, ldu1, u2,ldu2, rwork(ib11d), rwork(ib11e), rwork(ib12d),rwork(& ib12e), rwork(ib21d), rwork(ib21e),rwork(ib22d), rwork(ib22e), rwork(ibbcsd), & lbbcsd,childinfo ) ! permute rows and columns to place identity submatrices in ! preferred positions if( q > 0_${ik}$ .and. wantu2 ) then do i = 1, q iwork(i) = m - p - q + i end do do i = q + 1, m - p iwork(i) = i - q end do call stdlib${ii}$_zlapmt( .false., m-p, m-p, u2, ldu2, iwork ) end if else if( r == m-p ) then ! case 3: r = m-p ! simultaneously bidiagonalize x11 and x21 call stdlib${ii}$_zunbdb3( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& itaup1), work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) ! accumulate householder reflectors if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_zlacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) call stdlib${ii}$_zungqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqr, & childinfo ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then u2(1_${ik}$,1_${ik}$) = cone do j = 2, m-p u2(1_${ik}$,j) = czero u2(j,1_${ik}$) = czero end do call stdlib${ii}$_zlacpy( 'L', m-p-1, m-p-1, x21(2_${ik}$,1_${ik}$), ldx21, u2(2_${ik}$,2_${ik}$),ldu2 ) call stdlib${ii}$_zungqr( m-p-1, m-p-1, m-p-1, u2(2_${ik}$,2_${ik}$), ldu2,work(itaup2), work(iorgqr)& , lorgqr, childinfo ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_zlacpy( 'U', m-p, q, x21, ldx21, v1t, ldv1t ) call stdlib${ii}$_zunglq( q, q, r, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & childinfo ) end if ! simultaneously diagonalize x11 and x21. call stdlib${ii}$_zbbcsd( 'N', jobv1t, jobu2, jobu1, 'T', m, m-q, m-p,theta, rwork(iphi), & cdum, 1_${ik}$, v1t, ldv1t, u2, ldu2,u1, ldu1, rwork(ib11d), rwork(ib11e),rwork(ib12d), & rwork(ib12e), rwork(ib21d),rwork(ib21e), rwork(ib22d), rwork(ib22e),rwork(ibbcsd), & lbbcsd, childinfo ) ! permute rows and columns to place identity submatrices in ! preferred positions if( q > r ) then do i = 1, r iwork(i) = q - r + i end do do i = r + 1, q iwork(i) = i - r end do if( wantu1 ) then call stdlib${ii}$_zlapmt( .false., p, q, u1, ldu1, iwork ) end if if( wantv1t ) then call stdlib${ii}$_zlapmr( .false., q, q, v1t, ldv1t, iwork ) end if end if else ! case 4: r = m-q ! simultaneously bidiagonalize x11 and x21 call stdlib${ii}$_zunbdb4( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& itaup1), work(itaup2),work(itauq1), work(iorbdb), work(iorbdb+m),lorbdb-m, & childinfo ) ! accumulate householder reflectors if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_zcopy( m-p, work(iorbdb+p), 1_${ik}$, u2, 1_${ik}$ ) end if if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_zcopy( p, work(iorbdb), 1_${ik}$, u1, 1_${ik}$ ) do j = 2, p u1(1_${ik}$,j) = czero end do call stdlib${ii}$_zlacpy( 'L', p-1, m-q-1, x11(2_${ik}$,1_${ik}$), ldx11, u1(2_${ik}$,2_${ik}$),ldu1 ) call stdlib${ii}$_zungqr( p, p, m-q, u1, ldu1, work(itaup1),work(iorgqr), lorgqr, & childinfo ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then do j = 2, m-p u2(1_${ik}$,j) = czero end do call stdlib${ii}$_zlacpy( 'L', m-p-1, m-q-1, x21(2_${ik}$,1_${ik}$), ldx21, u2(2_${ik}$,2_${ik}$),ldu2 ) call stdlib${ii}$_zungqr( m-p, m-p, m-q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & childinfo ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_zlacpy( 'U', m-q, q, x21, ldx21, v1t, ldv1t ) call stdlib${ii}$_zlacpy( 'U', p-(m-q), q-(m-q), x11(m-q+1,m-q+1), ldx11,v1t(m-q+1,m-q+& 1_${ik}$), ldv1t ) call stdlib${ii}$_zlacpy( 'U', -p+q, q-p, x21(m-q+1,p+1), ldx21,v1t(p+1,p+1), ldv1t ) call stdlib${ii}$_zunglq( q, q, q, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & childinfo ) end if ! simultaneously diagonalize x11 and x21. call stdlib${ii}$_zbbcsd( jobu2, jobu1, 'N', jobv1t, 'N', m, m-p, m-q,theta, rwork(iphi), & u2, ldu2, u1, ldu1, cdum, 1_${ik}$,v1t, ldv1t, rwork(ib11d), rwork(ib11e),rwork(ib12d), & rwork(ib12e), rwork(ib21d),rwork(ib21e), rwork(ib22d), rwork(ib22e),rwork(ibbcsd), & lbbcsd, childinfo ) ! permute rows and columns to place identity submatrices in ! preferred positions if( p > r ) then do i = 1, r iwork(i) = p - r + i end do do i = r + 1, p iwork(i) = i - r end do if( wantu1 ) then call stdlib${ii}$_zlapmt( .false., p, p, u1, ldu1, iwork ) end if if( wantv1t ) then call stdlib${ii}$_zlapmr( .false., p, q, v1t, ldv1t, iwork ) end if end if end if return end subroutine stdlib${ii}$_zuncsd2by1 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] module subroutine stdlib${ii}$_${ci}$uncsd2by1( jobu1, jobu2, jobv1t, m, p, q, x11, ldx11,x21, ldx21, theta, & !! ZUNCSD2BY1: computes the CS decomposition of an M-by-Q matrix X with !! orthonormal columns that has been partitioned into a 2-by-1 block !! structure: !! [ I1 0 0 ] !! [ 0 C 0 ] !! [ X11 ] [ U1 | ] [ 0 0 0 ] !! X = [-----] = [---------] [----------] V1**T . !! [ X21 ] [ | U2 ] [ 0 0 0 ] !! [ 0 S 0 ] !! [ 0 0 I2] !! X11 is P-by-Q. The unitary matrices U1, U2, and V1 are P-by-P, !! (M-P)-by-(M-P), and Q-by-Q, respectively. C and S are R-by-R !! nonnegative diagonal matrices satisfying C^2 + S^2 = I, in which !! R = MIN(P,M-P,Q,M-Q). I1 is a K1-by-K1 identity matrix and I2 is a !! K2-by-K2 identity matrix, where K1 = MAX(Q+P-M,0), K2 = MAX(Q-P,0). u1, ldu1, u2, ldu2, v1t,ldv1t, work, lwork, rwork, lrwork, iwork,info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: jobu1, jobu2, jobv1t integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu1, ldu2, ldv1t, lwork, ldx11, ldx21, m, p, q integer(${ik}$), intent(in) :: lrwork integer(${ik}$) :: lrworkmin, lrworkopt ! Array Arguments real(${ck}$), intent(out) :: rwork(*) real(${ck}$), intent(out) :: theta(*) complex(${ck}$), intent(out) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), work(*) complex(${ck}$), intent(inout) :: x11(ldx11,*), x21(ldx21,*) integer(${ik}$), intent(out) :: iwork(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: childinfo, i, ib11d, ib11e, ib12d, ib12e, ib21d, ib21e, ib22d, ib22e, & ibbcsd, iorbdb, iorglq, iorgqr, iphi, itaup1, itaup2, itauq1, j, lbbcsd, lorbdb, & lorglq, lorglqmin, lorglqopt, lorgqr, lorgqrmin, lorgqropt, lworkmin, lworkopt, & r logical(lk) :: lquery, wantu1, wantu2, wantv1t ! Local Arrays real(${ck}$) :: dum(1_${ik}$) complex(${ck}$) :: cdum(1_${ik}$,1_${ik}$) ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ wantu1 = stdlib_lsame( jobu1, 'Y' ) wantu2 = stdlib_lsame( jobu2, 'Y' ) wantv1t = stdlib_lsame( jobv1t, 'Y' ) lquery = ( lwork==-1_${ik}$ ) .or. ( lrwork==-1_${ik}$ ) if( m < 0_${ik}$ ) then info = -4_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -5_${ik}$ else if( q < 0_${ik}$ .or. q > m ) then info = -6_${ik}$ else if( ldx11 < max( 1_${ik}$, p ) ) then info = -8_${ik}$ else if( ldx21 < max( 1_${ik}$, m-p ) ) then info = -10_${ik}$ else if( wantu1 .and. ldu1 < max( 1_${ik}$, p ) ) then info = -13_${ik}$ else if( wantu2 .and. ldu2 < max( 1_${ik}$, m - p ) ) then info = -15_${ik}$ else if( wantv1t .and. ldv1t < max( 1_${ik}$, q ) ) then info = -17_${ik}$ end if r = min( p, m-p, q, m-q ) ! compute workspace ! work layout: ! |-----------------------------------------| ! | lworkopt (1) | ! |-----------------------------------------| ! | taup1 (max(1,p)) | ! | taup2 (max(1,m-p)) | ! | tauq1 (max(1,q)) | ! |-----------------------------------------| ! | stdlib${ii}$_${ci}$unbdb work | stdlib${ii}$_${ci}$ungqr work | stdlib${ii}$_${ci}$unglq work | ! | | | | ! | | | | ! | | | | ! | | | | ! |-----------------------------------------| ! rwork layout: ! |------------------| ! | lrworkopt (1) | ! |------------------| ! | phi (max(1,r-1)) | ! |------------------| ! | b11d (r) | ! | b11e (r-1) | ! | b12d (r) | ! | b12e (r-1) | ! | b21d (r) | ! | b21e (r-1) | ! | b22d (r) | ! | b22e (r-1) | ! | stdlib${ii}$_${ci}$bbcsd rwork | ! |------------------| if( info == 0_${ik}$ ) then iphi = 2_${ik}$ ib11d = iphi + max( 1_${ik}$, r-1 ) ib11e = ib11d + max( 1_${ik}$, r ) ib12d = ib11e + max( 1_${ik}$, r - 1_${ik}$ ) ib12e = ib12d + max( 1_${ik}$, r ) ib21d = ib12e + max( 1_${ik}$, r - 1_${ik}$ ) ib21e = ib21d + max( 1_${ik}$, r ) ib22d = ib21e + max( 1_${ik}$, r - 1_${ik}$ ) ib22e = ib22d + max( 1_${ik}$, r ) ibbcsd = ib22e + max( 1_${ik}$, r - 1_${ik}$ ) itaup1 = 2_${ik}$ itaup2 = itaup1 + max( 1_${ik}$, p ) itauq1 = itaup2 + max( 1_${ik}$, m-p ) iorbdb = itauq1 + max( 1_${ik}$, q ) iorgqr = itauq1 + max( 1_${ik}$, q ) iorglq = itauq1 + max( 1_${ik}$, q ) lorgqrmin = 1_${ik}$ lorgqropt = 1_${ik}$ lorglqmin = 1_${ik}$ lorglqopt = 1_${ik}$ if( r == q ) then call stdlib${ii}$_${ci}$unbdb1( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & cdum, work, -1_${ik}$, childinfo ) lorbdb = int( work(1_${ik}$),KIND=${ik}$) if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_${ci}$ungqr( p, p, q, u1, ldu1, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorgqrmin = max( lorgqrmin, p ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) endif if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_${ci}$ungqr( m-p, m-p, q, u2, ldu2, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorgqrmin = max( lorgqrmin, m-p ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_${ci}$unglq( q-1, q-1, q-1, v1t, ldv1t,cdum, work(1_${ik}$), -1_${ik}$, childinfo ) lorglqmin = max( lorglqmin, q-1 ) lorglqopt = max( lorglqopt, int( work(1_${ik}$),KIND=${ik}$) ) end if call stdlib${ii}$_${ci}$bbcsd( jobu1, jobu2, jobv1t, 'N', 'N', m, p, q, theta,dum, u1, ldu1,& u2, ldu2, v1t, ldv1t, cdum, 1_${ik}$,dum, dum, dum, dum, dum, dum, dum, dum,rwork(1_${ik}$), -& 1_${ik}$, childinfo ) lbbcsd = int( rwork(1_${ik}$),KIND=${ik}$) else if( r == p ) then call stdlib${ii}$_${ci}$unbdb2( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & cdum, work(1_${ik}$), -1_${ik}$, childinfo ) lorbdb = int( work(1_${ik}$),KIND=${ik}$) if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_${ci}$ungqr( p-1, p-1, p-1, u1(2_${ik}$,2_${ik}$), ldu1, cdum, work(1_${ik}$),-1_${ik}$, childinfo & ) lorgqrmin = max( lorgqrmin, p-1 ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_${ci}$ungqr( m-p, m-p, q, u2, ldu2, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorgqrmin = max( lorgqrmin, m-p ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_${ci}$unglq( q, q, r, v1t, ldv1t, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorglqmin = max( lorglqmin, q ) lorglqopt = max( lorglqopt, int( work(1_${ik}$),KIND=${ik}$) ) end if call stdlib${ii}$_${ci}$bbcsd( jobv1t, 'N', jobu1, jobu2, 'T', m, q, p, theta,dum, v1t, & ldv1t, cdum, 1_${ik}$, u1, ldu1, u2, ldu2,dum, dum, dum, dum, dum, dum, dum, dum,rwork(& 1_${ik}$), -1_${ik}$, childinfo ) lbbcsd = int( rwork(1_${ik}$),KIND=${ik}$) else if( r == m-p ) then call stdlib${ii}$_${ci}$unbdb3( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & cdum, work(1_${ik}$), -1_${ik}$, childinfo ) lorbdb = int( work(1_${ik}$),KIND=${ik}$) if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_${ci}$ungqr( p, p, q, u1, ldu1, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorgqrmin = max( lorgqrmin, p ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_${ci}$ungqr( m-p-1, m-p-1, m-p-1, u2(2_${ik}$,2_${ik}$), ldu2, cdum,work(1_${ik}$), -1_${ik}$, & childinfo ) lorgqrmin = max( lorgqrmin, m-p-1 ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_${ci}$unglq( q, q, r, v1t, ldv1t, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorglqmin = max( lorglqmin, q ) lorglqopt = max( lorglqopt, int( work(1_${ik}$),KIND=${ik}$) ) end if call stdlib${ii}$_${ci}$bbcsd( 'N', jobv1t, jobu2, jobu1, 'T', m, m-q, m-p,theta, dum, cdum,& 1_${ik}$, v1t, ldv1t, u2, ldu2, u1,ldu1, dum, dum, dum, dum, dum, dum, dum, dum,rwork(& 1_${ik}$), -1_${ik}$, childinfo ) lbbcsd = int( rwork(1_${ik}$),KIND=${ik}$) else call stdlib${ii}$_${ci}$unbdb4( m, p, q, x11, ldx11, x21, ldx21, theta, dum,cdum, cdum, & cdum, cdum, work(1_${ik}$), -1_${ik}$, childinfo) lorbdb = m + int( work(1_${ik}$),KIND=${ik}$) if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_${ci}$ungqr( p, p, m-q, u1, ldu1, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorgqrmin = max( lorgqrmin, p ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_${ci}$ungqr( m-p, m-p, m-q, u2, ldu2, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorgqrmin = max( lorgqrmin, m-p ) lorgqropt = max( lorgqropt, int( work(1_${ik}$),KIND=${ik}$) ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_${ci}$unglq( q, q, q, v1t, ldv1t, cdum, work(1_${ik}$), -1_${ik}$,childinfo ) lorglqmin = max( lorglqmin, q ) lorglqopt = max( lorglqopt, int( work(1_${ik}$),KIND=${ik}$) ) end if call stdlib${ii}$_${ci}$bbcsd( jobu2, jobu1, 'N', jobv1t, 'N', m, m-p, m-q,theta, dum, u2, & ldu2, u1, ldu1, cdum, 1_${ik}$, v1t,ldv1t, dum, dum, dum, dum, dum, dum, dum, dum,rwork(& 1_${ik}$), -1_${ik}$, childinfo ) lbbcsd = int( rwork(1_${ik}$),KIND=${ik}$) end if lrworkmin = ibbcsd+lbbcsd-1 lrworkopt = lrworkmin rwork(1_${ik}$) = lrworkopt lworkmin = max( iorbdb+lorbdb-1,iorgqr+lorgqrmin-1,iorglq+lorglqmin-1 ) lworkopt = max( iorbdb+lorbdb-1,iorgqr+lorgqropt-1,iorglq+lorglqopt-1 ) work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not.lquery ) then info = -19_${ik}$ end if if( lrwork < lrworkmin .and. .not.lquery ) then info = -21_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNCSD2BY1', -info ) return else if( lquery ) then return end if lorgqr = lwork-iorgqr+1 lorglq = lwork-iorglq+1 ! handle four cases separately: r = q, r = p, r = m-p, and r = m-q, ! in which r = min(p,m-p,q,m-q) if( r == q ) then ! case 1: r = q ! simultaneously bidiagonalize x11 and x21 call stdlib${ii}$_${ci}$unbdb1( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& itaup1), work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) ! accumulate householder reflectors if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_${ci}$lacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) call stdlib${ii}$_${ci}$ungqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqr, & childinfo ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_${ci}$lacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) call stdlib${ii}$_${ci}$ungqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & childinfo ) end if if( wantv1t .and. q > 0_${ik}$ ) then v1t(1_${ik}$,1_${ik}$) = cone do j = 2, q v1t(1_${ik}$,j) = czero v1t(j,1_${ik}$) = czero end do call stdlib${ii}$_${ci}$lacpy( 'U', q-1, q-1, x21(1_${ik}$,2_${ik}$), ldx21, v1t(2_${ik}$,2_${ik}$),ldv1t ) call stdlib${ii}$_${ci}$unglq( q-1, q-1, q-1, v1t(2_${ik}$,2_${ik}$), ldv1t, work(itauq1),work(iorglq), & lorglq, childinfo ) end if ! simultaneously diagonalize x11 and x21. call stdlib${ii}$_${ci}$bbcsd( jobu1, jobu2, jobv1t, 'N', 'N', m, p, q, theta,rwork(iphi), u1, & ldu1, u2, ldu2, v1t, ldv1t, cdum,1_${ik}$, rwork(ib11d), rwork(ib11e), rwork(ib12d),rwork(& ib12e), rwork(ib21d), rwork(ib21e),rwork(ib22d), rwork(ib22e), rwork(ibbcsd),lrwork-& ibbcsd+1, childinfo ) ! permute rows and columns to place czero submatrices in ! preferred positions if( q > 0_${ik}$ .and. wantu2 ) then do i = 1, q iwork(i) = m - p - q + i end do do i = q + 1, m - p iwork(i) = i - q end do call stdlib${ii}$_${ci}$lapmt( .false., m-p, m-p, u2, ldu2, iwork ) end if else if( r == p ) then ! case 2: r = p ! simultaneously bidiagonalize x11 and x21 call stdlib${ii}$_${ci}$unbdb2( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& itaup1), work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) ! accumulate householder reflectors if( wantu1 .and. p > 0_${ik}$ ) then u1(1_${ik}$,1_${ik}$) = cone do j = 2, p u1(1_${ik}$,j) = czero u1(j,1_${ik}$) = czero end do call stdlib${ii}$_${ci}$lacpy( 'L', p-1, p-1, x11(2_${ik}$,1_${ik}$), ldx11, u1(2_${ik}$,2_${ik}$), ldu1 ) call stdlib${ii}$_${ci}$ungqr( p-1, p-1, p-1, u1(2_${ik}$,2_${ik}$), ldu1, work(itaup1),work(iorgqr), & lorgqr, childinfo ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_${ci}$lacpy( 'L', m-p, q, x21, ldx21, u2, ldu2 ) call stdlib${ii}$_${ci}$ungqr( m-p, m-p, q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & childinfo ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_${ci}$lacpy( 'U', p, q, x11, ldx11, v1t, ldv1t ) call stdlib${ii}$_${ci}$unglq( q, q, r, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & childinfo ) end if ! simultaneously diagonalize x11 and x21. call stdlib${ii}$_${ci}$bbcsd( jobv1t, 'N', jobu1, jobu2, 'T', m, q, p, theta,rwork(iphi), v1t,& ldv1t, cdum, 1_${ik}$, u1, ldu1, u2,ldu2, rwork(ib11d), rwork(ib11e), rwork(ib12d),rwork(& ib12e), rwork(ib21d), rwork(ib21e),rwork(ib22d), rwork(ib22e), rwork(ibbcsd), & lbbcsd,childinfo ) ! permute rows and columns to place identity submatrices in ! preferred positions if( q > 0_${ik}$ .and. wantu2 ) then do i = 1, q iwork(i) = m - p - q + i end do do i = q + 1, m - p iwork(i) = i - q end do call stdlib${ii}$_${ci}$lapmt( .false., m-p, m-p, u2, ldu2, iwork ) end if else if( r == m-p ) then ! case 3: r = m-p ! simultaneously bidiagonalize x11 and x21 call stdlib${ii}$_${ci}$unbdb3( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& itaup1), work(itaup2),work(itauq1), work(iorbdb), lorbdb, childinfo ) ! accumulate householder reflectors if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_${ci}$lacpy( 'L', p, q, x11, ldx11, u1, ldu1 ) call stdlib${ii}$_${ci}$ungqr( p, p, q, u1, ldu1, work(itaup1), work(iorgqr),lorgqr, & childinfo ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then u2(1_${ik}$,1_${ik}$) = cone do j = 2, m-p u2(1_${ik}$,j) = czero u2(j,1_${ik}$) = czero end do call stdlib${ii}$_${ci}$lacpy( 'L', m-p-1, m-p-1, x21(2_${ik}$,1_${ik}$), ldx21, u2(2_${ik}$,2_${ik}$),ldu2 ) call stdlib${ii}$_${ci}$ungqr( m-p-1, m-p-1, m-p-1, u2(2_${ik}$,2_${ik}$), ldu2,work(itaup2), work(iorgqr)& , lorgqr, childinfo ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_${ci}$lacpy( 'U', m-p, q, x21, ldx21, v1t, ldv1t ) call stdlib${ii}$_${ci}$unglq( q, q, r, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & childinfo ) end if ! simultaneously diagonalize x11 and x21. call stdlib${ii}$_${ci}$bbcsd( 'N', jobv1t, jobu2, jobu1, 'T', m, m-q, m-p,theta, rwork(iphi), & cdum, 1_${ik}$, v1t, ldv1t, u2, ldu2,u1, ldu1, rwork(ib11d), rwork(ib11e),rwork(ib12d), & rwork(ib12e), rwork(ib21d),rwork(ib21e), rwork(ib22d), rwork(ib22e),rwork(ibbcsd), & lbbcsd, childinfo ) ! permute rows and columns to place identity submatrices in ! preferred positions if( q > r ) then do i = 1, r iwork(i) = q - r + i end do do i = r + 1, q iwork(i) = i - r end do if( wantu1 ) then call stdlib${ii}$_${ci}$lapmt( .false., p, q, u1, ldu1, iwork ) end if if( wantv1t ) then call stdlib${ii}$_${ci}$lapmr( .false., q, q, v1t, ldv1t, iwork ) end if end if else ! case 4: r = m-q ! simultaneously bidiagonalize x11 and x21 call stdlib${ii}$_${ci}$unbdb4( m, p, q, x11, ldx11, x21, ldx21, theta,rwork(iphi), work(& itaup1), work(itaup2),work(itauq1), work(iorbdb), work(iorbdb+m),lorbdb-m, & childinfo ) ! accumulate householder reflectors if( wantu2 .and. m-p > 0_${ik}$ ) then call stdlib${ii}$_${ci}$copy( m-p, work(iorbdb+p), 1_${ik}$, u2, 1_${ik}$ ) end if if( wantu1 .and. p > 0_${ik}$ ) then call stdlib${ii}$_${ci}$copy( p, work(iorbdb), 1_${ik}$, u1, 1_${ik}$ ) do j = 2, p u1(1_${ik}$,j) = czero end do call stdlib${ii}$_${ci}$lacpy( 'L', p-1, m-q-1, x11(2_${ik}$,1_${ik}$), ldx11, u1(2_${ik}$,2_${ik}$),ldu1 ) call stdlib${ii}$_${ci}$ungqr( p, p, m-q, u1, ldu1, work(itaup1),work(iorgqr), lorgqr, & childinfo ) end if if( wantu2 .and. m-p > 0_${ik}$ ) then do j = 2, m-p u2(1_${ik}$,j) = czero end do call stdlib${ii}$_${ci}$lacpy( 'L', m-p-1, m-q-1, x21(2_${ik}$,1_${ik}$), ldx21, u2(2_${ik}$,2_${ik}$),ldu2 ) call stdlib${ii}$_${ci}$ungqr( m-p, m-p, m-q, u2, ldu2, work(itaup2),work(iorgqr), lorgqr, & childinfo ) end if if( wantv1t .and. q > 0_${ik}$ ) then call stdlib${ii}$_${ci}$lacpy( 'U', m-q, q, x21, ldx21, v1t, ldv1t ) call stdlib${ii}$_${ci}$lacpy( 'U', p-(m-q), q-(m-q), x11(m-q+1,m-q+1), ldx11,v1t(m-q+1,m-q+& 1_${ik}$), ldv1t ) call stdlib${ii}$_${ci}$lacpy( 'U', -p+q, q-p, x21(m-q+1,p+1), ldx21,v1t(p+1,p+1), ldv1t ) call stdlib${ii}$_${ci}$unglq( q, q, q, v1t, ldv1t, work(itauq1),work(iorglq), lorglq, & childinfo ) end if ! simultaneously diagonalize x11 and x21. call stdlib${ii}$_${ci}$bbcsd( jobu2, jobu1, 'N', jobv1t, 'N', m, m-p, m-q,theta, rwork(iphi), & u2, ldu2, u1, ldu1, cdum, 1_${ik}$,v1t, ldv1t, rwork(ib11d), rwork(ib11e),rwork(ib12d), & rwork(ib12e), rwork(ib21d),rwork(ib21e), rwork(ib22d), rwork(ib22e),rwork(ibbcsd), & lbbcsd, childinfo ) ! permute rows and columns to place identity submatrices in ! preferred positions if( p > r ) then do i = 1, r iwork(i) = p - r + i end do do i = r + 1, p iwork(i) = i - r end do if( wantu1 ) then call stdlib${ii}$_${ci}$lapmt( .false., p, p, u1, ldu1, iwork ) end if if( wantv1t ) then call stdlib${ii}$_${ci}$lapmr( .false., p, q, v1t, ldv1t, iwork ) end if end if end if return end subroutine stdlib${ii}$_${ci}$uncsd2by1 #:endif #:endfor module subroutine stdlib${ii}$_cunbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12,x21, ldx21, x22, & !! CUNBDB simultaneously bidiagonalizes the blocks of an M-by-M !! partitioned unitary matrix X: !! [ B11 | B12 0 0 ] !! [ X11 | X12 ] [ P1 | ] [ 0 | 0 -I 0 ] [ Q1 | ]**H !! X = [-----------] = [---------] [----------------] [---------] . !! [ X21 | X22 ] [ | P2 ] [ B21 | B22 0 0 ] [ | Q2 ] !! [ 0 | 0 0 I ] !! X11 is P-by-Q. Q must be no larger than P, M-P, or M-Q. (If this is !! not the case, then X must be transposed and/or permuted. This can be !! done in constant time using the TRANS and SIGNS options. See CUNCSD !! for details.) !! The unitary matrices P1, P2, Q1, and Q2 are P-by-P, (M-P)-by- !! (M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. They are !! represented implicitly by Householder vectors. !! B11, B12, B21, and B22 are Q-by-Q bidiagonal matrices represented !! implicitly by angles THETA, PHI. ldx22, theta, phi, taup1,taup2, tauq1, tauq2, work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: signs, trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldx11, ldx12, ldx21, ldx22, lwork, m, p, q ! Array Arguments real(sp), intent(out) :: phi(*), theta(*) complex(sp), intent(out) :: taup1(*), taup2(*), tauq1(*), tauq2(*), work(*) complex(sp), intent(inout) :: x11(ldx11,*), x12(ldx12,*), x21(ldx21,*), x22(ldx22,*) ! ==================================================================== ! Parameters ! Local Scalars logical(lk) :: colmajor, lquery integer(${ik}$) :: i, lworkmin, lworkopt real(sp) :: z1, z2, z3, z4 ! Intrinsic Functions ! Executable Statements ! test input arguments info = 0_${ik}$ colmajor = .not. stdlib_lsame( trans, 'T' ) if( .not. stdlib_lsame( signs, 'O' ) ) then z1 = one z2 = one z3 = one z4 = one else z1 = one z2 = -one z3 = one z4 = -one end if lquery = lwork == -1_${ik}$ if( m < 0_${ik}$ ) then info = -3_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -4_${ik}$ else if( q < 0_${ik}$ .or. q > p .or. q > m-p .or.q > m-q ) then info = -5_${ik}$ else if( colmajor .and. ldx11 < max( 1_${ik}$, p ) ) then info = -7_${ik}$ else if( .not.colmajor .and. ldx11 < max( 1_${ik}$, q ) ) then info = -7_${ik}$ else if( colmajor .and. ldx12 < max( 1_${ik}$, p ) ) then info = -9_${ik}$ else if( .not.colmajor .and. ldx12 < max( 1_${ik}$, m-q ) ) then info = -9_${ik}$ else if( colmajor .and. ldx21 < max( 1_${ik}$, m-p ) ) then info = -11_${ik}$ else if( .not.colmajor .and. ldx21 < max( 1_${ik}$, q ) ) then info = -11_${ik}$ else if( colmajor .and. ldx22 < max( 1_${ik}$, m-p ) ) then info = -13_${ik}$ else if( .not.colmajor .and. ldx22 < max( 1_${ik}$, m-q ) ) then info = -13_${ik}$ end if ! compute workspace if( info == 0_${ik}$ ) then lworkopt = m - q lworkmin = m - q work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not. lquery ) then info = -21_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'XORBDB', -info ) return else if( lquery ) then return end if ! handle column-major and row-major separately if( colmajor ) then ! reduce columns 1, ..., q of x11, x12, x21, and x22 do i = 1, q if( i == 1_${ik}$ ) then call stdlib${ii}$_cscal( p-i+1, cmplx( z1, 0.0_sp,KIND=sp), x11(i,i), 1_${ik}$ ) else call stdlib${ii}$_cscal( p-i+1, cmplx( z1*cos(phi(i-1)), 0.0_sp,KIND=sp),x11(i,i), & 1_${ik}$ ) call stdlib${ii}$_caxpy( p-i+1, cmplx( -z1*z3*z4*sin(phi(i-1)),0.0_sp,KIND=sp), x12(& i,i-1), 1_${ik}$, x11(i,i), 1_${ik}$ ) end if if( i == 1_${ik}$ ) then call stdlib${ii}$_cscal( m-p-i+1, cmplx( z2, 0.0_sp,KIND=sp), x21(i,i), 1_${ik}$ ) else call stdlib${ii}$_cscal( m-p-i+1, cmplx( z2*cos(phi(i-1)), 0.0_sp,KIND=sp),x21(i,i),& 1_${ik}$ ) call stdlib${ii}$_caxpy( m-p-i+1, cmplx( -z2*z3*z4*sin(phi(i-1)),0.0_sp,KIND=sp), & x22(i,i-1), 1_${ik}$, x21(i,i), 1_${ik}$ ) end if theta(i) = atan2( stdlib${ii}$_scnrm2( m-p-i+1, x21(i,i), 1_${ik}$ ),stdlib${ii}$_scnrm2( p-i+1, & x11(i,i), 1_${ik}$ ) ) if( p > i ) then call stdlib${ii}$_clarfgp( p-i+1, x11(i,i), x11(i+1,i), 1_${ik}$, taup1(i) ) else if ( p == i ) then call stdlib${ii}$_clarfgp( p-i+1, x11(i,i), x11(i,i), 1_${ik}$, taup1(i) ) end if x11(i,i) = cone if ( m-p > i ) then call stdlib${ii}$_clarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1_${ik}$,taup2(i) ) else if ( m-p == i ) then call stdlib${ii}$_clarfgp( m-p-i+1, x21(i,i), x21(i,i), 1_${ik}$,taup2(i) ) end if x21(i,i) = cone if ( q > i ) then call stdlib${ii}$_clarf( 'L', p-i+1, q-i, x11(i,i), 1_${ik}$,conjg(taup1(i)), x11(i,i+1), & ldx11, work ) call stdlib${ii}$_clarf( 'L', m-p-i+1, q-i, x21(i,i), 1_${ik}$,conjg(taup2(i)), x21(i,i+1),& ldx21, work ) end if if ( m-q+1 > i ) then call stdlib${ii}$_clarf( 'L', p-i+1, m-q-i+1, x11(i,i), 1_${ik}$,conjg(taup1(i)), x12(i,i),& ldx12, work ) call stdlib${ii}$_clarf( 'L', m-p-i+1, m-q-i+1, x21(i,i), 1_${ik}$,conjg(taup2(i)), x22(i,& i), ldx22, work ) end if if( i < q ) then call stdlib${ii}$_cscal( q-i, cmplx( -z1*z3*sin(theta(i)), 0.0_sp,KIND=sp),x11(i,i+& 1_${ik}$), ldx11 ) call stdlib${ii}$_caxpy( q-i, cmplx( z2*z3*cos(theta(i)), 0.0_sp,KIND=sp),x21(i,i+1)& , ldx21, x11(i,i+1), ldx11 ) end if call stdlib${ii}$_cscal( m-q-i+1, cmplx( -z1*z4*sin(theta(i)), 0.0_sp,KIND=sp),x12(i,i)& , ldx12 ) call stdlib${ii}$_caxpy( m-q-i+1, cmplx( z2*z4*cos(theta(i)), 0.0_sp,KIND=sp),x22(i,i),& ldx22, x12(i,i), ldx12 ) if( i < q )phi(i) = atan2( stdlib${ii}$_scnrm2( q-i, x11(i,i+1), ldx11 ),stdlib${ii}$_scnrm2(& m-q-i+1, x12(i,i), ldx12 ) ) if( i < q ) then call stdlib${ii}$_clacgv( q-i, x11(i,i+1), ldx11 ) if ( i == q-1 ) then call stdlib${ii}$_clarfgp( q-i, x11(i,i+1), x11(i,i+1), ldx11,tauq1(i) ) else call stdlib${ii}$_clarfgp( q-i, x11(i,i+1), x11(i,i+2), ldx11,tauq1(i) ) end if x11(i,i+1) = cone end if if ( m-q+1 > i ) then call stdlib${ii}$_clacgv( m-q-i+1, x12(i,i), ldx12 ) if ( m-q == i ) then call stdlib${ii}$_clarfgp( m-q-i+1, x12(i,i), x12(i,i), ldx12,tauq2(i) ) else call stdlib${ii}$_clarfgp( m-q-i+1, x12(i,i), x12(i,i+1), ldx12,tauq2(i) ) end if end if x12(i,i) = cone if( i < q ) then call stdlib${ii}$_clarf( 'R', p-i, q-i, x11(i,i+1), ldx11, tauq1(i),x11(i+1,i+1), & ldx11, work ) call stdlib${ii}$_clarf( 'R', m-p-i, q-i, x11(i,i+1), ldx11, tauq1(i),x21(i+1,i+1), & ldx21, work ) end if if ( p > i ) then call stdlib${ii}$_clarf( 'R', p-i, m-q-i+1, x12(i,i), ldx12, tauq2(i),x12(i+1,i), & ldx12, work ) end if if ( m-p > i ) then call stdlib${ii}$_clarf( 'R', m-p-i, m-q-i+1, x12(i,i), ldx12,tauq2(i), x22(i+1,i), & ldx22, work ) end if if( i < q )call stdlib${ii}$_clacgv( q-i, x11(i,i+1), ldx11 ) call stdlib${ii}$_clacgv( m-q-i+1, x12(i,i), ldx12 ) end do ! reduce columns q + 1, ..., p of x12, x22 do i = q + 1, p call stdlib${ii}$_cscal( m-q-i+1, cmplx( -z1*z4, 0.0_sp,KIND=sp), x12(i,i),ldx12 ) call stdlib${ii}$_clacgv( m-q-i+1, x12(i,i), ldx12 ) if ( i >= m-q ) then call stdlib${ii}$_clarfgp( m-q-i+1, x12(i,i), x12(i,i), ldx12,tauq2(i) ) else call stdlib${ii}$_clarfgp( m-q-i+1, x12(i,i), x12(i,i+1), ldx12,tauq2(i) ) end if x12(i,i) = cone if ( p > i ) then call stdlib${ii}$_clarf( 'R', p-i, m-q-i+1, x12(i,i), ldx12, tauq2(i),x12(i+1,i), & ldx12, work ) end if if( m-p-q >= 1_${ik}$ )call stdlib${ii}$_clarf( 'R', m-p-q, m-q-i+1, x12(i,i), ldx12,tauq2(i),& x22(q+1,i), ldx22, work ) call stdlib${ii}$_clacgv( m-q-i+1, x12(i,i), ldx12 ) end do ! reduce columns p + 1, ..., m - q of x12, x22 do i = 1, m - p - q call stdlib${ii}$_cscal( m-p-q-i+1, cmplx( z2*z4, 0.0_sp,KIND=sp),x22(q+i,p+i), ldx22 ) call stdlib${ii}$_clacgv( m-p-q-i+1, x22(q+i,p+i), ldx22 ) call stdlib${ii}$_clarfgp( m-p-q-i+1, x22(q+i,p+i), x22(q+i,p+i+1),ldx22, tauq2(p+i) ) x22(q+i,p+i) = cone call stdlib${ii}$_clarf( 'R', m-p-q-i, m-p-q-i+1, x22(q+i,p+i), ldx22,tauq2(p+i), x22(& q+i+1,p+i), ldx22, work ) call stdlib${ii}$_clacgv( m-p-q-i+1, x22(q+i,p+i), ldx22 ) end do else ! reduce columns 1, ..., q of x11, x12, x21, x22 do i = 1, q if( i == 1_${ik}$ ) then call stdlib${ii}$_cscal( p-i+1, cmplx( z1, 0.0_sp,KIND=sp), x11(i,i),ldx11 ) else call stdlib${ii}$_cscal( p-i+1, cmplx( z1*cos(phi(i-1)), 0.0_sp,KIND=sp),x11(i,i), & ldx11 ) call stdlib${ii}$_caxpy( p-i+1, cmplx( -z1*z3*z4*sin(phi(i-1)),0.0_sp,KIND=sp), x12(& i-1,i), ldx12, x11(i,i), ldx11 ) end if if( i == 1_${ik}$ ) then call stdlib${ii}$_cscal( m-p-i+1, cmplx( z2, 0.0_sp,KIND=sp), x21(i,i),ldx21 ) else call stdlib${ii}$_cscal( m-p-i+1, cmplx( z2*cos(phi(i-1)), 0.0_sp,KIND=sp),x21(i,i),& ldx21 ) call stdlib${ii}$_caxpy( m-p-i+1, cmplx( -z2*z3*z4*sin(phi(i-1)),0.0_sp,KIND=sp), & x22(i-1,i), ldx22, x21(i,i), ldx21 ) end if theta(i) = atan2( stdlib${ii}$_scnrm2( m-p-i+1, x21(i,i), ldx21 ),stdlib${ii}$_scnrm2( p-i+1,& x11(i,i), ldx11 ) ) call stdlib${ii}$_clacgv( p-i+1, x11(i,i), ldx11 ) call stdlib${ii}$_clacgv( m-p-i+1, x21(i,i), ldx21 ) call stdlib${ii}$_clarfgp( p-i+1, x11(i,i), x11(i,i+1), ldx11, taup1(i) ) x11(i,i) = cone if ( i == m-p ) then call stdlib${ii}$_clarfgp( m-p-i+1, x21(i,i), x21(i,i), ldx21,taup2(i) ) else call stdlib${ii}$_clarfgp( m-p-i+1, x21(i,i), x21(i,i+1), ldx21,taup2(i) ) end if x21(i,i) = cone call stdlib${ii}$_clarf( 'R', q-i, p-i+1, x11(i,i), ldx11, taup1(i),x11(i+1,i), ldx11, & work ) call stdlib${ii}$_clarf( 'R', m-q-i+1, p-i+1, x11(i,i), ldx11, taup1(i),x12(i,i), & ldx12, work ) call stdlib${ii}$_clarf( 'R', q-i, m-p-i+1, x21(i,i), ldx21, taup2(i),x21(i+1,i), & ldx21, work ) call stdlib${ii}$_clarf( 'R', m-q-i+1, m-p-i+1, x21(i,i), ldx21,taup2(i), x22(i,i), & ldx22, work ) call stdlib${ii}$_clacgv( p-i+1, x11(i,i), ldx11 ) call stdlib${ii}$_clacgv( m-p-i+1, x21(i,i), ldx21 ) if( i < q ) then call stdlib${ii}$_cscal( q-i, cmplx( -z1*z3*sin(theta(i)), 0.0_sp,KIND=sp),x11(i+1,& i), 1_${ik}$ ) call stdlib${ii}$_caxpy( q-i, cmplx( z2*z3*cos(theta(i)), 0.0_sp,KIND=sp),x21(i+1,i)& , 1_${ik}$, x11(i+1,i), 1_${ik}$ ) end if call stdlib${ii}$_cscal( m-q-i+1, cmplx( -z1*z4*sin(theta(i)), 0.0_sp,KIND=sp),x12(i,i)& , 1_${ik}$ ) call stdlib${ii}$_caxpy( m-q-i+1, cmplx( z2*z4*cos(theta(i)), 0.0_sp,KIND=sp),x22(i,i),& 1_${ik}$, x12(i,i), 1_${ik}$ ) if( i < q )phi(i) = atan2( stdlib${ii}$_scnrm2( q-i, x11(i+1,i), 1_${ik}$ ),stdlib${ii}$_scnrm2( m-& q-i+1, x12(i,i), 1_${ik}$ ) ) if( i < q ) then call stdlib${ii}$_clarfgp( q-i, x11(i+1,i), x11(i+2,i), 1_${ik}$, tauq1(i) ) x11(i+1,i) = cone end if call stdlib${ii}$_clarfgp( m-q-i+1, x12(i,i), x12(i+1,i), 1_${ik}$, tauq2(i) ) x12(i,i) = cone if( i < q ) then call stdlib${ii}$_clarf( 'L', q-i, p-i, x11(i+1,i), 1_${ik}$,conjg(tauq1(i)), x11(i+1,i+1),& ldx11, work ) call stdlib${ii}$_clarf( 'L', q-i, m-p-i, x11(i+1,i), 1_${ik}$,conjg(tauq1(i)), x21(i+1,i+& 1_${ik}$), ldx21, work ) end if call stdlib${ii}$_clarf( 'L', m-q-i+1, p-i, x12(i,i), 1_${ik}$, conjg(tauq2(i)),x12(i,i+1), & ldx12, work ) if ( m-p > i ) then call stdlib${ii}$_clarf( 'L', m-q-i+1, m-p-i, x12(i,i), 1_${ik}$,conjg(tauq2(i)), x22(i,i+& 1_${ik}$), ldx22, work ) end if end do ! reduce columns q + 1, ..., p of x12, x22 do i = q + 1, p call stdlib${ii}$_cscal( m-q-i+1, cmplx( -z1*z4, 0.0_sp,KIND=sp), x12(i,i), 1_${ik}$ ) call stdlib${ii}$_clarfgp( m-q-i+1, x12(i,i), x12(i+1,i), 1_${ik}$, tauq2(i) ) x12(i,i) = cone if ( p > i ) then call stdlib${ii}$_clarf( 'L', m-q-i+1, p-i, x12(i,i), 1_${ik}$,conjg(tauq2(i)), x12(i,i+1),& ldx12, work ) end if if( m-p-q >= 1_${ik}$ )call stdlib${ii}$_clarf( 'L', m-q-i+1, m-p-q, x12(i,i), 1_${ik}$,conjg(tauq2(& i)), x22(i,q+1), ldx22, work ) end do ! reduce columns p + 1, ..., m - q of x12, x22 do i = 1, m - p - q call stdlib${ii}$_cscal( m-p-q-i+1, cmplx( z2*z4, 0.0_sp,KIND=sp),x22(p+i,q+i), 1_${ik}$ ) call stdlib${ii}$_clarfgp( m-p-q-i+1, x22(p+i,q+i), x22(p+i+1,q+i), 1_${ik}$,tauq2(p+i) ) x22(p+i,q+i) = cone if ( m-p-q /= i ) then call stdlib${ii}$_clarf( 'L', m-p-q-i+1, m-p-q-i, x22(p+i,q+i), 1_${ik}$,conjg(tauq2(p+i)),& x22(p+i,q+i+1), ldx22,work ) end if end do end if return end subroutine stdlib${ii}$_cunbdb module subroutine stdlib${ii}$_zunbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12,x21, ldx21, x22, & !! ZUNBDB simultaneously bidiagonalizes the blocks of an M-by-M !! partitioned unitary matrix X: !! [ B11 | B12 0 0 ] !! [ X11 | X12 ] [ P1 | ] [ 0 | 0 -I 0 ] [ Q1 | ]**H !! X = [-----------] = [---------] [----------------] [---------] . !! [ X21 | X22 ] [ | P2 ] [ B21 | B22 0 0 ] [ | Q2 ] !! [ 0 | 0 0 I ] !! X11 is P-by-Q. Q must be no larger than P, M-P, or M-Q. (If this is !! not the case, then X must be transposed and/or permuted. This can be !! done in constant time using the TRANS and SIGNS options. See ZUNCSD !! for details.) !! The unitary matrices P1, P2, Q1, and Q2 are P-by-P, (M-P)-by- !! (M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. They are !! represented implicitly by Householder vectors. !! B11, B12, B21, and B22 are Q-by-Q bidiagonal matrices represented !! implicitly by angles THETA, PHI. ldx22, theta, phi, taup1,taup2, tauq1, tauq2, work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: signs, trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldx11, ldx12, ldx21, ldx22, lwork, m, p, q ! Array Arguments real(dp), intent(out) :: phi(*), theta(*) complex(dp), intent(out) :: taup1(*), taup2(*), tauq1(*), tauq2(*), work(*) complex(dp), intent(inout) :: x11(ldx11,*), x12(ldx12,*), x21(ldx21,*), x22(ldx22,*) ! ==================================================================== ! Parameters ! Local Scalars logical(lk) :: colmajor, lquery integer(${ik}$) :: i, lworkmin, lworkopt real(dp) :: z1, z2, z3, z4 ! Intrinsic Functions ! Executable Statements ! test input arguments info = 0_${ik}$ colmajor = .not. stdlib_lsame( trans, 'T' ) if( .not. stdlib_lsame( signs, 'O' ) ) then z1 = one z2 = one z3 = one z4 = one else z1 = one z2 = -one z3 = one z4 = -one end if lquery = lwork == -1_${ik}$ if( m < 0_${ik}$ ) then info = -3_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -4_${ik}$ else if( q < 0_${ik}$ .or. q > p .or. q > m-p .or.q > m-q ) then info = -5_${ik}$ else if( colmajor .and. ldx11 < max( 1_${ik}$, p ) ) then info = -7_${ik}$ else if( .not.colmajor .and. ldx11 < max( 1_${ik}$, q ) ) then info = -7_${ik}$ else if( colmajor .and. ldx12 < max( 1_${ik}$, p ) ) then info = -9_${ik}$ else if( .not.colmajor .and. ldx12 < max( 1_${ik}$, m-q ) ) then info = -9_${ik}$ else if( colmajor .and. ldx21 < max( 1_${ik}$, m-p ) ) then info = -11_${ik}$ else if( .not.colmajor .and. ldx21 < max( 1_${ik}$, q ) ) then info = -11_${ik}$ else if( colmajor .and. ldx22 < max( 1_${ik}$, m-p ) ) then info = -13_${ik}$ else if( .not.colmajor .and. ldx22 < max( 1_${ik}$, m-q ) ) then info = -13_${ik}$ end if ! compute workspace if( info == 0_${ik}$ ) then lworkopt = m - q lworkmin = m - q work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not. lquery ) then info = -21_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'XORBDB', -info ) return else if( lquery ) then return end if ! handle column-major and row-major separately if( colmajor ) then ! reduce columns 1, ..., q of x11, x12, x21, and x22 do i = 1, q if( i == 1_${ik}$ ) then call stdlib${ii}$_zscal( p-i+1, cmplx( z1, 0.0_dp,KIND=dp), x11(i,i), 1_${ik}$ ) else call stdlib${ii}$_zscal( p-i+1, cmplx( z1*cos(phi(i-1)), 0.0_dp,KIND=dp),x11(i,i), & 1_${ik}$ ) call stdlib${ii}$_zaxpy( p-i+1, cmplx( -z1*z3*z4*sin(phi(i-1)),0.0_dp,KIND=dp), x12(& i,i-1), 1_${ik}$, x11(i,i), 1_${ik}$ ) end if if( i == 1_${ik}$ ) then call stdlib${ii}$_zscal( m-p-i+1, cmplx( z2, 0.0_dp,KIND=dp), x21(i,i), 1_${ik}$ ) else call stdlib${ii}$_zscal( m-p-i+1, cmplx( z2*cos(phi(i-1)), 0.0_dp,KIND=dp),x21(i,i),& 1_${ik}$ ) call stdlib${ii}$_zaxpy( m-p-i+1, cmplx( -z2*z3*z4*sin(phi(i-1)),0.0_dp,KIND=dp), & x22(i,i-1), 1_${ik}$, x21(i,i), 1_${ik}$ ) end if theta(i) = atan2( stdlib${ii}$_dznrm2( m-p-i+1, x21(i,i), 1_${ik}$ ),stdlib${ii}$_dznrm2( p-i+1, & x11(i,i), 1_${ik}$ ) ) if( p > i ) then call stdlib${ii}$_zlarfgp( p-i+1, x11(i,i), x11(i+1,i), 1_${ik}$, taup1(i) ) else if ( p == i ) then call stdlib${ii}$_zlarfgp( p-i+1, x11(i,i), x11(i,i), 1_${ik}$, taup1(i) ) end if x11(i,i) = cone if ( m-p > i ) then call stdlib${ii}$_zlarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1_${ik}$,taup2(i) ) else if ( m-p == i ) then call stdlib${ii}$_zlarfgp( m-p-i+1, x21(i,i), x21(i,i), 1_${ik}$,taup2(i) ) end if x21(i,i) = cone if ( q > i ) then call stdlib${ii}$_zlarf( 'L', p-i+1, q-i, x11(i,i), 1_${ik}$,conjg(taup1(i)), x11(i,i+1), & ldx11, work ) call stdlib${ii}$_zlarf( 'L', m-p-i+1, q-i, x21(i,i), 1_${ik}$,conjg(taup2(i)), x21(i,i+1),& ldx21, work ) end if if ( m-q+1 > i ) then call stdlib${ii}$_zlarf( 'L', p-i+1, m-q-i+1, x11(i,i), 1_${ik}$,conjg(taup1(i)), x12(i,i),& ldx12, work ) call stdlib${ii}$_zlarf( 'L', m-p-i+1, m-q-i+1, x21(i,i), 1_${ik}$,conjg(taup2(i)), x22(i,& i), ldx22, work ) end if if( i < q ) then call stdlib${ii}$_zscal( q-i, cmplx( -z1*z3*sin(theta(i)), 0.0_dp,KIND=dp),x11(i,i+& 1_${ik}$), ldx11 ) call stdlib${ii}$_zaxpy( q-i, cmplx( z2*z3*cos(theta(i)), 0.0_dp,KIND=dp),x21(i,i+1)& , ldx21, x11(i,i+1), ldx11 ) end if call stdlib${ii}$_zscal( m-q-i+1, cmplx( -z1*z4*sin(theta(i)), 0.0_dp,KIND=dp),x12(i,i)& , ldx12 ) call stdlib${ii}$_zaxpy( m-q-i+1, cmplx( z2*z4*cos(theta(i)), 0.0_dp,KIND=dp),x22(i,i),& ldx22, x12(i,i), ldx12 ) if( i < q )phi(i) = atan2( stdlib${ii}$_dznrm2( q-i, x11(i,i+1), ldx11 ),stdlib${ii}$_dznrm2(& m-q-i+1, x12(i,i), ldx12 ) ) if( i < q ) then call stdlib${ii}$_zlacgv( q-i, x11(i,i+1), ldx11 ) if ( i == q-1 ) then call stdlib${ii}$_zlarfgp( q-i, x11(i,i+1), x11(i,i+1), ldx11,tauq1(i) ) else call stdlib${ii}$_zlarfgp( q-i, x11(i,i+1), x11(i,i+2), ldx11,tauq1(i) ) end if x11(i,i+1) = cone end if if ( m-q+1 > i ) then call stdlib${ii}$_zlacgv( m-q-i+1, x12(i,i), ldx12 ) if ( m-q == i ) then call stdlib${ii}$_zlarfgp( m-q-i+1, x12(i,i), x12(i,i), ldx12,tauq2(i) ) else call stdlib${ii}$_zlarfgp( m-q-i+1, x12(i,i), x12(i,i+1), ldx12,tauq2(i) ) end if end if x12(i,i) = cone if( i < q ) then call stdlib${ii}$_zlarf( 'R', p-i, q-i, x11(i,i+1), ldx11, tauq1(i),x11(i+1,i+1), & ldx11, work ) call stdlib${ii}$_zlarf( 'R', m-p-i, q-i, x11(i,i+1), ldx11, tauq1(i),x21(i+1,i+1), & ldx21, work ) end if if ( p > i ) then call stdlib${ii}$_zlarf( 'R', p-i, m-q-i+1, x12(i,i), ldx12, tauq2(i),x12(i+1,i), & ldx12, work ) end if if ( m-p > i ) then call stdlib${ii}$_zlarf( 'R', m-p-i, m-q-i+1, x12(i,i), ldx12,tauq2(i), x22(i+1,i), & ldx22, work ) end if if( i < q )call stdlib${ii}$_zlacgv( q-i, x11(i,i+1), ldx11 ) call stdlib${ii}$_zlacgv( m-q-i+1, x12(i,i), ldx12 ) end do ! reduce columns q + 1, ..., p of x12, x22 do i = q + 1, p call stdlib${ii}$_zscal( m-q-i+1, cmplx( -z1*z4, 0.0_dp,KIND=dp), x12(i,i),ldx12 ) call stdlib${ii}$_zlacgv( m-q-i+1, x12(i,i), ldx12 ) if ( i >= m-q ) then call stdlib${ii}$_zlarfgp( m-q-i+1, x12(i,i), x12(i,i), ldx12,tauq2(i) ) else call stdlib${ii}$_zlarfgp( m-q-i+1, x12(i,i), x12(i,i+1), ldx12,tauq2(i) ) end if x12(i,i) = cone if ( p > i ) then call stdlib${ii}$_zlarf( 'R', p-i, m-q-i+1, x12(i,i), ldx12, tauq2(i),x12(i+1,i), & ldx12, work ) end if if( m-p-q >= 1_${ik}$ )call stdlib${ii}$_zlarf( 'R', m-p-q, m-q-i+1, x12(i,i), ldx12,tauq2(i),& x22(q+1,i), ldx22, work ) call stdlib${ii}$_zlacgv( m-q-i+1, x12(i,i), ldx12 ) end do ! reduce columns p + 1, ..., m - q of x12, x22 do i = 1, m - p - q call stdlib${ii}$_zscal( m-p-q-i+1, cmplx( z2*z4, 0.0_dp,KIND=dp),x22(q+i,p+i), ldx22 ) call stdlib${ii}$_zlacgv( m-p-q-i+1, x22(q+i,p+i), ldx22 ) call stdlib${ii}$_zlarfgp( m-p-q-i+1, x22(q+i,p+i), x22(q+i,p+i+1),ldx22, tauq2(p+i) ) x22(q+i,p+i) = cone call stdlib${ii}$_zlarf( 'R', m-p-q-i, m-p-q-i+1, x22(q+i,p+i), ldx22,tauq2(p+i), x22(& q+i+1,p+i), ldx22, work ) call stdlib${ii}$_zlacgv( m-p-q-i+1, x22(q+i,p+i), ldx22 ) end do else ! reduce columns 1, ..., q of x11, x12, x21, x22 do i = 1, q if( i == 1_${ik}$ ) then call stdlib${ii}$_zscal( p-i+1, cmplx( z1, 0.0_dp,KIND=dp), x11(i,i),ldx11 ) else call stdlib${ii}$_zscal( p-i+1, cmplx( z1*cos(phi(i-1)), 0.0_dp,KIND=dp),x11(i,i), & ldx11 ) call stdlib${ii}$_zaxpy( p-i+1, cmplx( -z1*z3*z4*sin(phi(i-1)),0.0_dp,KIND=dp), x12(& i-1,i), ldx12, x11(i,i), ldx11 ) end if if( i == 1_${ik}$ ) then call stdlib${ii}$_zscal( m-p-i+1, cmplx( z2, 0.0_dp,KIND=dp), x21(i,i),ldx21 ) else call stdlib${ii}$_zscal( m-p-i+1, cmplx( z2*cos(phi(i-1)), 0.0_dp,KIND=dp),x21(i,i),& ldx21 ) call stdlib${ii}$_zaxpy( m-p-i+1, cmplx( -z2*z3*z4*sin(phi(i-1)),0.0_dp,KIND=dp), & x22(i-1,i), ldx22, x21(i,i), ldx21 ) end if theta(i) = atan2( stdlib${ii}$_dznrm2( m-p-i+1, x21(i,i), ldx21 ),stdlib${ii}$_dznrm2( p-i+1,& x11(i,i), ldx11 ) ) call stdlib${ii}$_zlacgv( p-i+1, x11(i,i), ldx11 ) call stdlib${ii}$_zlacgv( m-p-i+1, x21(i,i), ldx21 ) call stdlib${ii}$_zlarfgp( p-i+1, x11(i,i), x11(i,i+1), ldx11, taup1(i) ) x11(i,i) = cone if ( i == m-p ) then call stdlib${ii}$_zlarfgp( m-p-i+1, x21(i,i), x21(i,i), ldx21,taup2(i) ) else call stdlib${ii}$_zlarfgp( m-p-i+1, x21(i,i), x21(i,i+1), ldx21,taup2(i) ) end if x21(i,i) = cone call stdlib${ii}$_zlarf( 'R', q-i, p-i+1, x11(i,i), ldx11, taup1(i),x11(i+1,i), ldx11, & work ) call stdlib${ii}$_zlarf( 'R', m-q-i+1, p-i+1, x11(i,i), ldx11, taup1(i),x12(i,i), & ldx12, work ) call stdlib${ii}$_zlarf( 'R', q-i, m-p-i+1, x21(i,i), ldx21, taup2(i),x21(i+1,i), & ldx21, work ) call stdlib${ii}$_zlarf( 'R', m-q-i+1, m-p-i+1, x21(i,i), ldx21,taup2(i), x22(i,i), & ldx22, work ) call stdlib${ii}$_zlacgv( p-i+1, x11(i,i), ldx11 ) call stdlib${ii}$_zlacgv( m-p-i+1, x21(i,i), ldx21 ) if( i < q ) then call stdlib${ii}$_zscal( q-i, cmplx( -z1*z3*sin(theta(i)), 0.0_dp,KIND=dp),x11(i+1,& i), 1_${ik}$ ) call stdlib${ii}$_zaxpy( q-i, cmplx( z2*z3*cos(theta(i)), 0.0_dp,KIND=dp),x21(i+1,i)& , 1_${ik}$, x11(i+1,i), 1_${ik}$ ) end if call stdlib${ii}$_zscal( m-q-i+1, cmplx( -z1*z4*sin(theta(i)), 0.0_dp,KIND=dp),x12(i,i)& , 1_${ik}$ ) call stdlib${ii}$_zaxpy( m-q-i+1, cmplx( z2*z4*cos(theta(i)), 0.0_dp,KIND=dp),x22(i,i),& 1_${ik}$, x12(i,i), 1_${ik}$ ) if( i < q )phi(i) = atan2( stdlib${ii}$_dznrm2( q-i, x11(i+1,i), 1_${ik}$ ),stdlib${ii}$_dznrm2( m-& q-i+1, x12(i,i), 1_${ik}$ ) ) if( i < q ) then call stdlib${ii}$_zlarfgp( q-i, x11(i+1,i), x11(i+2,i), 1_${ik}$, tauq1(i) ) x11(i+1,i) = cone end if call stdlib${ii}$_zlarfgp( m-q-i+1, x12(i,i), x12(i+1,i), 1_${ik}$, tauq2(i) ) x12(i,i) = cone if( i < q ) then call stdlib${ii}$_zlarf( 'L', q-i, p-i, x11(i+1,i), 1_${ik}$,conjg(tauq1(i)), x11(i+1,i+1),& ldx11, work ) call stdlib${ii}$_zlarf( 'L', q-i, m-p-i, x11(i+1,i), 1_${ik}$,conjg(tauq1(i)), x21(i+1,i+& 1_${ik}$), ldx21, work ) end if call stdlib${ii}$_zlarf( 'L', m-q-i+1, p-i, x12(i,i), 1_${ik}$,conjg(tauq2(i)), x12(i,i+1), & ldx12, work ) if ( m-p > i ) then call stdlib${ii}$_zlarf( 'L', m-q-i+1, m-p-i, x12(i,i), 1_${ik}$,conjg(tauq2(i)), x22(i,i+& 1_${ik}$), ldx22, work ) end if end do ! reduce columns q + 1, ..., p of x12, x22 do i = q + 1, p call stdlib${ii}$_zscal( m-q-i+1, cmplx( -z1*z4, 0.0_dp,KIND=dp), x12(i,i), 1_${ik}$ ) call stdlib${ii}$_zlarfgp( m-q-i+1, x12(i,i), x12(i+1,i), 1_${ik}$, tauq2(i) ) x12(i,i) = cone if ( p > i ) then call stdlib${ii}$_zlarf( 'L', m-q-i+1, p-i, x12(i,i), 1_${ik}$,conjg(tauq2(i)), x12(i,i+1),& ldx12, work ) end if if( m-p-q >= 1_${ik}$ )call stdlib${ii}$_zlarf( 'L', m-q-i+1, m-p-q, x12(i,i), 1_${ik}$,conjg(tauq2(& i)), x22(i,q+1), ldx22, work ) end do ! reduce columns p + 1, ..., m - q of x12, x22 do i = 1, m - p - q call stdlib${ii}$_zscal( m-p-q-i+1, cmplx( z2*z4, 0.0_dp,KIND=dp),x22(p+i,q+i), 1_${ik}$ ) call stdlib${ii}$_zlarfgp( m-p-q-i+1, x22(p+i,q+i), x22(p+i+1,q+i), 1_${ik}$,tauq2(p+i) ) x22(p+i,q+i) = cone if ( m-p-q /= i ) then call stdlib${ii}$_zlarf( 'L', m-p-q-i+1, m-p-q-i, x22(p+i,q+i), 1_${ik}$,conjg(tauq2(p+i)),& x22(p+i,q+i+1), ldx22,work ) end if end do end if return end subroutine stdlib${ii}$_zunbdb #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] module subroutine stdlib${ii}$_${ci}$unbdb( trans, signs, m, p, q, x11, ldx11, x12, ldx12,x21, ldx21, x22, & !! ZUNBDB: simultaneously bidiagonalizes the blocks of an M-by-M !! partitioned unitary matrix X: !! [ B11 | B12 0 0 ] !! [ X11 | X12 ] [ P1 | ] [ 0 | 0 -I 0 ] [ Q1 | ]**H !! X = [-----------] = [---------] [----------------] [---------] . !! [ X21 | X22 ] [ | P2 ] [ B21 | B22 0 0 ] [ | Q2 ] !! [ 0 | 0 0 I ] !! X11 is P-by-Q. Q must be no larger than P, M-P, or M-Q. (If this is !! not the case, then X must be transposed and/or permuted. This can be !! done in constant time using the TRANS and SIGNS options. See ZUNCSD !! for details.) !! The unitary matrices P1, P2, Q1, and Q2 are P-by-P, (M-P)-by- !! (M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. They are !! represented implicitly by Householder vectors. !! B11, B12, B21, and B22 are Q-by-Q bidiagonal matrices represented !! implicitly by angles THETA, PHI. ldx22, theta, phi, taup1,taup2, tauq1, tauq2, work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments character, intent(in) :: signs, trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldx11, ldx12, ldx21, ldx22, lwork, m, p, q ! Array Arguments real(${ck}$), intent(out) :: phi(*), theta(*) complex(${ck}$), intent(out) :: taup1(*), taup2(*), tauq1(*), tauq2(*), work(*) complex(${ck}$), intent(inout) :: x11(ldx11,*), x12(ldx12,*), x21(ldx21,*), x22(ldx22,*) ! ==================================================================== ! Parameters ! Local Scalars logical(lk) :: colmajor, lquery integer(${ik}$) :: i, lworkmin, lworkopt real(${ck}$) :: z1, z2, z3, z4 ! Intrinsic Functions ! Executable Statements ! test input arguments info = 0_${ik}$ colmajor = .not. stdlib_lsame( trans, 'T' ) if( .not. stdlib_lsame( signs, 'O' ) ) then z1 = one z2 = one z3 = one z4 = one else z1 = one z2 = -one z3 = one z4 = -one end if lquery = lwork == -1_${ik}$ if( m < 0_${ik}$ ) then info = -3_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -4_${ik}$ else if( q < 0_${ik}$ .or. q > p .or. q > m-p .or.q > m-q ) then info = -5_${ik}$ else if( colmajor .and. ldx11 < max( 1_${ik}$, p ) ) then info = -7_${ik}$ else if( .not.colmajor .and. ldx11 < max( 1_${ik}$, q ) ) then info = -7_${ik}$ else if( colmajor .and. ldx12 < max( 1_${ik}$, p ) ) then info = -9_${ik}$ else if( .not.colmajor .and. ldx12 < max( 1_${ik}$, m-q ) ) then info = -9_${ik}$ else if( colmajor .and. ldx21 < max( 1_${ik}$, m-p ) ) then info = -11_${ik}$ else if( .not.colmajor .and. ldx21 < max( 1_${ik}$, q ) ) then info = -11_${ik}$ else if( colmajor .and. ldx22 < max( 1_${ik}$, m-p ) ) then info = -13_${ik}$ else if( .not.colmajor .and. ldx22 < max( 1_${ik}$, m-q ) ) then info = -13_${ik}$ end if ! compute workspace if( info == 0_${ik}$ ) then lworkopt = m - q lworkmin = m - q work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not. lquery ) then info = -21_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'XORBDB', -info ) return else if( lquery ) then return end if ! handle column-major and row-major separately if( colmajor ) then ! reduce columns 1, ..., q of x11, x12, x21, and x22 do i = 1, q if( i == 1_${ik}$ ) then call stdlib${ii}$_${ci}$scal( p-i+1, cmplx( z1, 0.0_${ck}$,KIND=${ck}$), x11(i,i), 1_${ik}$ ) else call stdlib${ii}$_${ci}$scal( p-i+1, cmplx( z1*cos(phi(i-1)), 0.0_${ck}$,KIND=${ck}$),x11(i,i), & 1_${ik}$ ) call stdlib${ii}$_${ci}$axpy( p-i+1, cmplx( -z1*z3*z4*sin(phi(i-1)),0.0_${ck}$,KIND=${ck}$), x12(& i,i-1), 1_${ik}$, x11(i,i), 1_${ik}$ ) end if if( i == 1_${ik}$ ) then call stdlib${ii}$_${ci}$scal( m-p-i+1, cmplx( z2, 0.0_${ck}$,KIND=${ck}$), x21(i,i), 1_${ik}$ ) else call stdlib${ii}$_${ci}$scal( m-p-i+1, cmplx( z2*cos(phi(i-1)), 0.0_${ck}$,KIND=${ck}$),x21(i,i),& 1_${ik}$ ) call stdlib${ii}$_${ci}$axpy( m-p-i+1, cmplx( -z2*z3*z4*sin(phi(i-1)),0.0_${ck}$,KIND=${ck}$), & x22(i,i-1), 1_${ik}$, x21(i,i), 1_${ik}$ ) end if theta(i) = atan2( stdlib${ii}$_${c2ri(ci)}$znrm2( m-p-i+1, x21(i,i), 1_${ik}$ ),stdlib${ii}$_${c2ri(ci)}$znrm2( p-i+1, & x11(i,i), 1_${ik}$ ) ) if( p > i ) then call stdlib${ii}$_${ci}$larfgp( p-i+1, x11(i,i), x11(i+1,i), 1_${ik}$, taup1(i) ) else if ( p == i ) then call stdlib${ii}$_${ci}$larfgp( p-i+1, x11(i,i), x11(i,i), 1_${ik}$, taup1(i) ) end if x11(i,i) = cone if ( m-p > i ) then call stdlib${ii}$_${ci}$larfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1_${ik}$,taup2(i) ) else if ( m-p == i ) then call stdlib${ii}$_${ci}$larfgp( m-p-i+1, x21(i,i), x21(i,i), 1_${ik}$,taup2(i) ) end if x21(i,i) = cone if ( q > i ) then call stdlib${ii}$_${ci}$larf( 'L', p-i+1, q-i, x11(i,i), 1_${ik}$,conjg(taup1(i)), x11(i,i+1), & ldx11, work ) call stdlib${ii}$_${ci}$larf( 'L', m-p-i+1, q-i, x21(i,i), 1_${ik}$,conjg(taup2(i)), x21(i,i+1),& ldx21, work ) end if if ( m-q+1 > i ) then call stdlib${ii}$_${ci}$larf( 'L', p-i+1, m-q-i+1, x11(i,i), 1_${ik}$,conjg(taup1(i)), x12(i,i),& ldx12, work ) call stdlib${ii}$_${ci}$larf( 'L', m-p-i+1, m-q-i+1, x21(i,i), 1_${ik}$,conjg(taup2(i)), x22(i,& i), ldx22, work ) end if if( i < q ) then call stdlib${ii}$_${ci}$scal( q-i, cmplx( -z1*z3*sin(theta(i)), 0.0_${ck}$,KIND=${ck}$),x11(i,i+& 1_${ik}$), ldx11 ) call stdlib${ii}$_${ci}$axpy( q-i, cmplx( z2*z3*cos(theta(i)), 0.0_${ck}$,KIND=${ck}$),x21(i,i+1)& , ldx21, x11(i,i+1), ldx11 ) end if call stdlib${ii}$_${ci}$scal( m-q-i+1, cmplx( -z1*z4*sin(theta(i)), 0.0_${ck}$,KIND=${ck}$),x12(i,i)& , ldx12 ) call stdlib${ii}$_${ci}$axpy( m-q-i+1, cmplx( z2*z4*cos(theta(i)), 0.0_${ck}$,KIND=${ck}$),x22(i,i),& ldx22, x12(i,i), ldx12 ) if( i < q )phi(i) = atan2( stdlib${ii}$_${c2ri(ci)}$znrm2( q-i, x11(i,i+1), ldx11 ),stdlib${ii}$_${c2ri(ci)}$znrm2(& m-q-i+1, x12(i,i), ldx12 ) ) if( i < q ) then call stdlib${ii}$_${ci}$lacgv( q-i, x11(i,i+1), ldx11 ) if ( i == q-1 ) then call stdlib${ii}$_${ci}$larfgp( q-i, x11(i,i+1), x11(i,i+1), ldx11,tauq1(i) ) else call stdlib${ii}$_${ci}$larfgp( q-i, x11(i,i+1), x11(i,i+2), ldx11,tauq1(i) ) end if x11(i,i+1) = cone end if if ( m-q+1 > i ) then call stdlib${ii}$_${ci}$lacgv( m-q-i+1, x12(i,i), ldx12 ) if ( m-q == i ) then call stdlib${ii}$_${ci}$larfgp( m-q-i+1, x12(i,i), x12(i,i), ldx12,tauq2(i) ) else call stdlib${ii}$_${ci}$larfgp( m-q-i+1, x12(i,i), x12(i,i+1), ldx12,tauq2(i) ) end if end if x12(i,i) = cone if( i < q ) then call stdlib${ii}$_${ci}$larf( 'R', p-i, q-i, x11(i,i+1), ldx11, tauq1(i),x11(i+1,i+1), & ldx11, work ) call stdlib${ii}$_${ci}$larf( 'R', m-p-i, q-i, x11(i,i+1), ldx11, tauq1(i),x21(i+1,i+1), & ldx21, work ) end if if ( p > i ) then call stdlib${ii}$_${ci}$larf( 'R', p-i, m-q-i+1, x12(i,i), ldx12, tauq2(i),x12(i+1,i), & ldx12, work ) end if if ( m-p > i ) then call stdlib${ii}$_${ci}$larf( 'R', m-p-i, m-q-i+1, x12(i,i), ldx12,tauq2(i), x22(i+1,i), & ldx22, work ) end if if( i < q )call stdlib${ii}$_${ci}$lacgv( q-i, x11(i,i+1), ldx11 ) call stdlib${ii}$_${ci}$lacgv( m-q-i+1, x12(i,i), ldx12 ) end do ! reduce columns q + 1, ..., p of x12, x22 do i = q + 1, p call stdlib${ii}$_${ci}$scal( m-q-i+1, cmplx( -z1*z4, 0.0_${ck}$,KIND=${ck}$), x12(i,i),ldx12 ) call stdlib${ii}$_${ci}$lacgv( m-q-i+1, x12(i,i), ldx12 ) if ( i >= m-q ) then call stdlib${ii}$_${ci}$larfgp( m-q-i+1, x12(i,i), x12(i,i), ldx12,tauq2(i) ) else call stdlib${ii}$_${ci}$larfgp( m-q-i+1, x12(i,i), x12(i,i+1), ldx12,tauq2(i) ) end if x12(i,i) = cone if ( p > i ) then call stdlib${ii}$_${ci}$larf( 'R', p-i, m-q-i+1, x12(i,i), ldx12, tauq2(i),x12(i+1,i), & ldx12, work ) end if if( m-p-q >= 1_${ik}$ )call stdlib${ii}$_${ci}$larf( 'R', m-p-q, m-q-i+1, x12(i,i), ldx12,tauq2(i),& x22(q+1,i), ldx22, work ) call stdlib${ii}$_${ci}$lacgv( m-q-i+1, x12(i,i), ldx12 ) end do ! reduce columns p + 1, ..., m - q of x12, x22 do i = 1, m - p - q call stdlib${ii}$_${ci}$scal( m-p-q-i+1, cmplx( z2*z4, 0.0_${ck}$,KIND=${ck}$),x22(q+i,p+i), ldx22 ) call stdlib${ii}$_${ci}$lacgv( m-p-q-i+1, x22(q+i,p+i), ldx22 ) call stdlib${ii}$_${ci}$larfgp( m-p-q-i+1, x22(q+i,p+i), x22(q+i,p+i+1),ldx22, tauq2(p+i) ) x22(q+i,p+i) = cone call stdlib${ii}$_${ci}$larf( 'R', m-p-q-i, m-p-q-i+1, x22(q+i,p+i), ldx22,tauq2(p+i), x22(& q+i+1,p+i), ldx22, work ) call stdlib${ii}$_${ci}$lacgv( m-p-q-i+1, x22(q+i,p+i), ldx22 ) end do else ! reduce columns 1, ..., q of x11, x12, x21, x22 do i = 1, q if( i == 1_${ik}$ ) then call stdlib${ii}$_${ci}$scal( p-i+1, cmplx( z1, 0.0_${ck}$,KIND=${ck}$), x11(i,i),ldx11 ) else call stdlib${ii}$_${ci}$scal( p-i+1, cmplx( z1*cos(phi(i-1)), 0.0_${ck}$,KIND=${ck}$),x11(i,i), & ldx11 ) call stdlib${ii}$_${ci}$axpy( p-i+1, cmplx( -z1*z3*z4*sin(phi(i-1)),0.0_${ck}$,KIND=${ck}$), x12(& i-1,i), ldx12, x11(i,i), ldx11 ) end if if( i == 1_${ik}$ ) then call stdlib${ii}$_${ci}$scal( m-p-i+1, cmplx( z2, 0.0_${ck}$,KIND=${ck}$), x21(i,i),ldx21 ) else call stdlib${ii}$_${ci}$scal( m-p-i+1, cmplx( z2*cos(phi(i-1)), 0.0_${ck}$,KIND=${ck}$),x21(i,i),& ldx21 ) call stdlib${ii}$_${ci}$axpy( m-p-i+1, cmplx( -z2*z3*z4*sin(phi(i-1)),0.0_${ck}$,KIND=${ck}$), & x22(i-1,i), ldx22, x21(i,i), ldx21 ) end if theta(i) = atan2( stdlib${ii}$_${c2ri(ci)}$znrm2( m-p-i+1, x21(i,i), ldx21 ),stdlib${ii}$_${c2ri(ci)}$znrm2( p-i+1,& x11(i,i), ldx11 ) ) call stdlib${ii}$_${ci}$lacgv( p-i+1, x11(i,i), ldx11 ) call stdlib${ii}$_${ci}$lacgv( m-p-i+1, x21(i,i), ldx21 ) call stdlib${ii}$_${ci}$larfgp( p-i+1, x11(i,i), x11(i,i+1), ldx11, taup1(i) ) x11(i,i) = cone if ( i == m-p ) then call stdlib${ii}$_${ci}$larfgp( m-p-i+1, x21(i,i), x21(i,i), ldx21,taup2(i) ) else call stdlib${ii}$_${ci}$larfgp( m-p-i+1, x21(i,i), x21(i,i+1), ldx21,taup2(i) ) end if x21(i,i) = cone call stdlib${ii}$_${ci}$larf( 'R', q-i, p-i+1, x11(i,i), ldx11, taup1(i),x11(i+1,i), ldx11, & work ) call stdlib${ii}$_${ci}$larf( 'R', m-q-i+1, p-i+1, x11(i,i), ldx11, taup1(i),x12(i,i), & ldx12, work ) call stdlib${ii}$_${ci}$larf( 'R', q-i, m-p-i+1, x21(i,i), ldx21, taup2(i),x21(i+1,i), & ldx21, work ) call stdlib${ii}$_${ci}$larf( 'R', m-q-i+1, m-p-i+1, x21(i,i), ldx21,taup2(i), x22(i,i), & ldx22, work ) call stdlib${ii}$_${ci}$lacgv( p-i+1, x11(i,i), ldx11 ) call stdlib${ii}$_${ci}$lacgv( m-p-i+1, x21(i,i), ldx21 ) if( i < q ) then call stdlib${ii}$_${ci}$scal( q-i, cmplx( -z1*z3*sin(theta(i)), 0.0_${ck}$,KIND=${ck}$),x11(i+1,& i), 1_${ik}$ ) call stdlib${ii}$_${ci}$axpy( q-i, cmplx( z2*z3*cos(theta(i)), 0.0_${ck}$,KIND=${ck}$),x21(i+1,i)& , 1_${ik}$, x11(i+1,i), 1_${ik}$ ) end if call stdlib${ii}$_${ci}$scal( m-q-i+1, cmplx( -z1*z4*sin(theta(i)), 0.0_${ck}$,KIND=${ck}$),x12(i,i)& , 1_${ik}$ ) call stdlib${ii}$_${ci}$axpy( m-q-i+1, cmplx( z2*z4*cos(theta(i)), 0.0_${ck}$,KIND=${ck}$),x22(i,i),& 1_${ik}$, x12(i,i), 1_${ik}$ ) if( i < q )phi(i) = atan2( stdlib${ii}$_${c2ri(ci)}$znrm2( q-i, x11(i+1,i), 1_${ik}$ ),stdlib${ii}$_${c2ri(ci)}$znrm2( m-& q-i+1, x12(i,i), 1_${ik}$ ) ) if( i < q ) then call stdlib${ii}$_${ci}$larfgp( q-i, x11(i+1,i), x11(i+2,i), 1_${ik}$, tauq1(i) ) x11(i+1,i) = cone end if call stdlib${ii}$_${ci}$larfgp( m-q-i+1, x12(i,i), x12(i+1,i), 1_${ik}$, tauq2(i) ) x12(i,i) = cone if( i < q ) then call stdlib${ii}$_${ci}$larf( 'L', q-i, p-i, x11(i+1,i), 1_${ik}$,conjg(tauq1(i)), x11(i+1,i+1),& ldx11, work ) call stdlib${ii}$_${ci}$larf( 'L', q-i, m-p-i, x11(i+1,i), 1_${ik}$,conjg(tauq1(i)), x21(i+1,i+& 1_${ik}$), ldx21, work ) end if call stdlib${ii}$_${ci}$larf( 'L', m-q-i+1, p-i, x12(i,i), 1_${ik}$,conjg(tauq2(i)), x12(i,i+1), & ldx12, work ) if ( m-p > i ) then call stdlib${ii}$_${ci}$larf( 'L', m-q-i+1, m-p-i, x12(i,i), 1_${ik}$,conjg(tauq2(i)), x22(i,i+& 1_${ik}$), ldx22, work ) end if end do ! reduce columns q + 1, ..., p of x12, x22 do i = q + 1, p call stdlib${ii}$_${ci}$scal( m-q-i+1, cmplx( -z1*z4, 0.0_${ck}$,KIND=${ck}$), x12(i,i), 1_${ik}$ ) call stdlib${ii}$_${ci}$larfgp( m-q-i+1, x12(i,i), x12(i+1,i), 1_${ik}$, tauq2(i) ) x12(i,i) = cone if ( p > i ) then call stdlib${ii}$_${ci}$larf( 'L', m-q-i+1, p-i, x12(i,i), 1_${ik}$,conjg(tauq2(i)), x12(i,i+1),& ldx12, work ) end if if( m-p-q >= 1_${ik}$ )call stdlib${ii}$_${ci}$larf( 'L', m-q-i+1, m-p-q, x12(i,i), 1_${ik}$,conjg(tauq2(& i)), x22(i,q+1), ldx22, work ) end do ! reduce columns p + 1, ..., m - q of x12, x22 do i = 1, m - p - q call stdlib${ii}$_${ci}$scal( m-p-q-i+1, cmplx( z2*z4, 0.0_${ck}$,KIND=${ck}$),x22(p+i,q+i), 1_${ik}$ ) call stdlib${ii}$_${ci}$larfgp( m-p-q-i+1, x22(p+i,q+i), x22(p+i+1,q+i), 1_${ik}$,tauq2(p+i) ) x22(p+i,q+i) = cone if ( m-p-q /= i ) then call stdlib${ii}$_${ci}$larf( 'L', m-p-q-i+1, m-p-q-i, x22(p+i,q+i), 1_${ik}$,conjg(tauq2(p+i)),& x22(p+i,q+i+1), ldx22,work ) end if end do end if return end subroutine stdlib${ii}$_${ci}$unbdb #:endif #:endfor module subroutine stdlib${ii}$_cunbdb1( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & !! CUNBDB1 simultaneously bidiagonalizes the blocks of a tall and skinny !! matrix X with orthonomal columns: !! [ B11 ] !! [ X11 ] [ P1 | ] [ 0 ] !! [-----] = [---------] [-----] Q1**T . !! [ X21 ] [ | P2 ] [ B21 ] !! [ 0 ] !! X11 is P-by-Q, and X21 is (M-P)-by-Q. Q must be no larger than P, !! M-P, or M-Q. Routines CUNBDB2, CUNBDB3, and CUNBDB4 handle cases in !! which Q is not the minimum dimension. !! The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), !! and (M-Q)-by-(M-Q), respectively. They are represented implicitly by !! Householder vectors. !! B11 and B12 are Q-by-Q bidiagonal matrices represented implicitly by !! angles THETA, PHI. work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lwork, m, p, q, ldx11, ldx21 ! Array Arguments real(sp), intent(out) :: phi(*), theta(*) complex(sp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) complex(sp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) ! ==================================================================== ! Local Scalars real(sp) :: c, s integer(${ik}$) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & lworkopt logical(lk) :: lquery ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lwork == -1_${ik}$ if( m < 0_${ik}$ ) then info = -1_${ik}$ else if( p < q .or. m-p < q ) then info = -2_${ik}$ else if( q < 0_${ik}$ .or. m-q < q ) then info = -3_${ik}$ else if( ldx11 < max( 1_${ik}$, p ) ) then info = -5_${ik}$ else if( ldx21 < max( 1_${ik}$, m-p ) ) then info = -7_${ik}$ end if ! compute workspace if( info == 0_${ik}$ ) then ilarf = 2_${ik}$ llarf = max( p-1, m-p-1, q-1 ) iorbdb5 = 2_${ik}$ lorbdb5 = q-2 lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) lworkmin = lworkopt work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not.lquery ) then info = -14_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CUNBDB1', -info ) return else if( lquery ) then return end if ! reduce columns 1, ..., q of x11 and x21 do i = 1, q call stdlib${ii}$_clarfgp( p-i+1, x11(i,i), x11(i+1,i), 1_${ik}$, taup1(i) ) call stdlib${ii}$_clarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1_${ik}$, taup2(i) ) theta(i) = atan2( real( x21(i,i),KIND=sp), real( x11(i,i),KIND=sp) ) c = cos( theta(i) ) s = sin( theta(i) ) x11(i,i) = cone x21(i,i) = cone call stdlib${ii}$_clarf( 'L', p-i+1, q-i, x11(i,i), 1_${ik}$, conjg(taup1(i)),x11(i,i+1), ldx11, & work(ilarf) ) call stdlib${ii}$_clarf( 'L', m-p-i+1, q-i, x21(i,i), 1_${ik}$, conjg(taup2(i)),x21(i,i+1), & ldx21, work(ilarf) ) if( i < q ) then call stdlib${ii}$_csrot( q-i, x11(i,i+1), ldx11, x21(i,i+1), ldx21, c,s ) call stdlib${ii}$_clacgv( q-i, x21(i,i+1), ldx21 ) call stdlib${ii}$_clarfgp( q-i, x21(i,i+1), x21(i,i+2), ldx21, tauq1(i) ) s = real( x21(i,i+1),KIND=sp) x21(i,i+1) = cone call stdlib${ii}$_clarf( 'R', p-i, q-i, x21(i,i+1), ldx21, tauq1(i),x11(i+1,i+1), & ldx11, work(ilarf) ) call stdlib${ii}$_clarf( 'R', m-p-i, q-i, x21(i,i+1), ldx21, tauq1(i),x21(i+1,i+1), & ldx21, work(ilarf) ) call stdlib${ii}$_clacgv( q-i, x21(i,i+1), ldx21 ) c = sqrt( stdlib${ii}$_scnrm2( p-i, x11(i+1,i+1), 1_${ik}$ )**2_${ik}$+ stdlib${ii}$_scnrm2( m-p-i, x21(i+& 1_${ik}$,i+1), 1_${ik}$ )**2_${ik}$ ) phi(i) = atan2( s, c ) call stdlib${ii}$_cunbdb5( p-i, m-p-i, q-i-1, x11(i+1,i+1), 1_${ik}$,x21(i+1,i+1), 1_${ik}$, x11(i+1,& i+2), ldx11,x21(i+1,i+2), ldx21, work(iorbdb5), lorbdb5,childinfo ) end if end do return end subroutine stdlib${ii}$_cunbdb1 module subroutine stdlib${ii}$_zunbdb1( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & !! ZUNBDB1 simultaneously bidiagonalizes the blocks of a tall and skinny !! matrix X with orthonomal columns: !! [ B11 ] !! [ X11 ] [ P1 | ] [ 0 ] !! [-----] = [---------] [-----] Q1**T . !! [ X21 ] [ | P2 ] [ B21 ] !! [ 0 ] !! X11 is P-by-Q, and X21 is (M-P)-by-Q. Q must be no larger than P, !! M-P, or M-Q. Routines ZUNBDB2, ZUNBDB3, and ZUNBDB4 handle cases in !! which Q is not the minimum dimension. !! The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), !! and (M-Q)-by-(M-Q), respectively. They are represented implicitly by !! Householder vectors. !! B11 and B12 are Q-by-Q bidiagonal matrices represented implicitly by !! angles THETA, PHI. work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lwork, m, p, q, ldx11, ldx21 ! Array Arguments real(dp), intent(out) :: phi(*), theta(*) complex(dp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) complex(dp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) ! ==================================================================== ! Local Scalars real(dp) :: c, s integer(${ik}$) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & lworkopt logical(lk) :: lquery ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lwork == -1_${ik}$ if( m < 0_${ik}$ ) then info = -1_${ik}$ else if( p < q .or. m-p < q ) then info = -2_${ik}$ else if( q < 0_${ik}$ .or. m-q < q ) then info = -3_${ik}$ else if( ldx11 < max( 1_${ik}$, p ) ) then info = -5_${ik}$ else if( ldx21 < max( 1_${ik}$, m-p ) ) then info = -7_${ik}$ end if ! compute workspace if( info == 0_${ik}$ ) then ilarf = 2_${ik}$ llarf = max( p-1, m-p-1, q-1 ) iorbdb5 = 2_${ik}$ lorbdb5 = q-2 lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) lworkmin = lworkopt work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not.lquery ) then info = -14_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNBDB1', -info ) return else if( lquery ) then return end if ! reduce columns 1, ..., q of x11 and x21 do i = 1, q call stdlib${ii}$_zlarfgp( p-i+1, x11(i,i), x11(i+1,i), 1_${ik}$, taup1(i) ) call stdlib${ii}$_zlarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1_${ik}$, taup2(i) ) theta(i) = atan2( real( x21(i,i),KIND=dp), real( x11(i,i),KIND=dp) ) c = cos( theta(i) ) s = sin( theta(i) ) x11(i,i) = cone x21(i,i) = cone call stdlib${ii}$_zlarf( 'L', p-i+1, q-i, x11(i,i), 1_${ik}$, conjg(taup1(i)),x11(i,i+1), ldx11, & work(ilarf) ) call stdlib${ii}$_zlarf( 'L', m-p-i+1, q-i, x21(i,i), 1_${ik}$, conjg(taup2(i)),x21(i,i+1), & ldx21, work(ilarf) ) if( i < q ) then call stdlib${ii}$_zdrot( q-i, x11(i,i+1), ldx11, x21(i,i+1), ldx21, c,s ) call stdlib${ii}$_zlacgv( q-i, x21(i,i+1), ldx21 ) call stdlib${ii}$_zlarfgp( q-i, x21(i,i+1), x21(i,i+2), ldx21, tauq1(i) ) s = real( x21(i,i+1),KIND=dp) x21(i,i+1) = cone call stdlib${ii}$_zlarf( 'R', p-i, q-i, x21(i,i+1), ldx21, tauq1(i),x11(i+1,i+1), & ldx11, work(ilarf) ) call stdlib${ii}$_zlarf( 'R', m-p-i, q-i, x21(i,i+1), ldx21, tauq1(i),x21(i+1,i+1), & ldx21, work(ilarf) ) call stdlib${ii}$_zlacgv( q-i, x21(i,i+1), ldx21 ) c = sqrt( stdlib${ii}$_dznrm2( p-i, x11(i+1,i+1), 1_${ik}$ )**2_${ik}$+ stdlib${ii}$_dznrm2( m-p-i, x21(i+& 1_${ik}$,i+1), 1_${ik}$ )**2_${ik}$ ) phi(i) = atan2( s, c ) call stdlib${ii}$_zunbdb5( p-i, m-p-i, q-i-1, x11(i+1,i+1), 1_${ik}$,x21(i+1,i+1), 1_${ik}$, x11(i+1,& i+2), ldx11,x21(i+1,i+2), ldx21, work(iorbdb5), lorbdb5,childinfo ) end if end do return end subroutine stdlib${ii}$_zunbdb1 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] module subroutine stdlib${ii}$_${ci}$unbdb1( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & !! ZUNBDB1: simultaneously bidiagonalizes the blocks of a tall and skinny !! matrix X with orthonomal columns: !! [ B11 ] !! [ X11 ] [ P1 | ] [ 0 ] !! [-----] = [---------] [-----] Q1**T . !! [ X21 ] [ | P2 ] [ B21 ] !! [ 0 ] !! X11 is P-by-Q, and X21 is (M-P)-by-Q. Q must be no larger than P, !! M-P, or M-Q. Routines ZUNBDB2, ZUNBDB3, and ZUNBDB4 handle cases in !! which Q is not the minimum dimension. !! The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), !! and (M-Q)-by-(M-Q), respectively. They are represented implicitly by !! Householder vectors. !! B11 and B12 are Q-by-Q bidiagonal matrices represented implicitly by !! angles THETA, PHI. work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lwork, m, p, q, ldx11, ldx21 ! Array Arguments real(${ck}$), intent(out) :: phi(*), theta(*) complex(${ck}$), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) complex(${ck}$), intent(inout) :: x11(ldx11,*), x21(ldx21,*) ! ==================================================================== ! Local Scalars real(${ck}$) :: c, s integer(${ik}$) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & lworkopt logical(lk) :: lquery ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lwork == -1_${ik}$ if( m < 0_${ik}$ ) then info = -1_${ik}$ else if( p < q .or. m-p < q ) then info = -2_${ik}$ else if( q < 0_${ik}$ .or. m-q < q ) then info = -3_${ik}$ else if( ldx11 < max( 1_${ik}$, p ) ) then info = -5_${ik}$ else if( ldx21 < max( 1_${ik}$, m-p ) ) then info = -7_${ik}$ end if ! compute workspace if( info == 0_${ik}$ ) then ilarf = 2_${ik}$ llarf = max( p-1, m-p-1, q-1 ) iorbdb5 = 2_${ik}$ lorbdb5 = q-2 lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) lworkmin = lworkopt work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not.lquery ) then info = -14_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNBDB1', -info ) return else if( lquery ) then return end if ! reduce columns 1, ..., q of x11 and x21 do i = 1, q call stdlib${ii}$_${ci}$larfgp( p-i+1, x11(i,i), x11(i+1,i), 1_${ik}$, taup1(i) ) call stdlib${ii}$_${ci}$larfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1_${ik}$, taup2(i) ) theta(i) = atan2( real( x21(i,i),KIND=${ck}$), real( x11(i,i),KIND=${ck}$) ) c = cos( theta(i) ) s = sin( theta(i) ) x11(i,i) = cone x21(i,i) = cone call stdlib${ii}$_${ci}$larf( 'L', p-i+1, q-i, x11(i,i), 1_${ik}$, conjg(taup1(i)),x11(i,i+1), ldx11, & work(ilarf) ) call stdlib${ii}$_${ci}$larf( 'L', m-p-i+1, q-i, x21(i,i), 1_${ik}$, conjg(taup2(i)),x21(i,i+1), & ldx21, work(ilarf) ) if( i < q ) then call stdlib${ii}$_${ci}$drot( q-i, x11(i,i+1), ldx11, x21(i,i+1), ldx21, c,s ) call stdlib${ii}$_${ci}$lacgv( q-i, x21(i,i+1), ldx21 ) call stdlib${ii}$_${ci}$larfgp( q-i, x21(i,i+1), x21(i,i+2), ldx21, tauq1(i) ) s = real( x21(i,i+1),KIND=${ck}$) x21(i,i+1) = cone call stdlib${ii}$_${ci}$larf( 'R', p-i, q-i, x21(i,i+1), ldx21, tauq1(i),x11(i+1,i+1), & ldx11, work(ilarf) ) call stdlib${ii}$_${ci}$larf( 'R', m-p-i, q-i, x21(i,i+1), ldx21, tauq1(i),x21(i+1,i+1), & ldx21, work(ilarf) ) call stdlib${ii}$_${ci}$lacgv( q-i, x21(i,i+1), ldx21 ) c = sqrt( stdlib${ii}$_${c2ri(ci)}$znrm2( p-i, x11(i+1,i+1), 1_${ik}$ )**2_${ik}$+ stdlib${ii}$_${c2ri(ci)}$znrm2( m-p-i, x21(i+& 1_${ik}$,i+1), 1_${ik}$ )**2_${ik}$ ) phi(i) = atan2( s, c ) call stdlib${ii}$_${ci}$unbdb5( p-i, m-p-i, q-i-1, x11(i+1,i+1), 1_${ik}$,x21(i+1,i+1), 1_${ik}$, x11(i+1,& i+2), ldx11,x21(i+1,i+2), ldx21, work(iorbdb5), lorbdb5,childinfo ) end if end do return end subroutine stdlib${ii}$_${ci}$unbdb1 #:endif #:endfor module subroutine stdlib${ii}$_cunbdb2( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & !! CUNBDB2 simultaneously bidiagonalizes the blocks of a tall and skinny !! matrix X with orthonomal columns: !! [ B11 ] !! [ X11 ] [ P1 | ] [ 0 ] !! [-----] = [---------] [-----] Q1**T . !! [ X21 ] [ | P2 ] [ B21 ] !! [ 0 ] !! X11 is P-by-Q, and X21 is (M-P)-by-Q. P must be no larger than M-P, !! Q, or M-Q. Routines CUNBDB1, CUNBDB3, and CUNBDB4 handle cases in !! which P is not the minimum dimension. !! The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), !! and (M-Q)-by-(M-Q), respectively. They are represented implicitly by !! Householder vectors. !! B11 and B12 are P-by-P bidiagonal matrices represented implicitly by !! angles THETA, PHI. work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lwork, m, p, q, ldx11, ldx21 ! Array Arguments real(sp), intent(out) :: phi(*), theta(*) complex(sp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) complex(sp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) ! ==================================================================== ! Local Scalars real(sp) :: c, s integer(${ik}$) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & lworkopt logical(lk) :: lquery ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lwork == -1_${ik}$ if( m < 0_${ik}$ ) then info = -1_${ik}$ else if( p < 0_${ik}$ .or. p > m-p ) then info = -2_${ik}$ else if( q < 0_${ik}$ .or. q < p .or. m-q < p ) then info = -3_${ik}$ else if( ldx11 < max( 1_${ik}$, p ) ) then info = -5_${ik}$ else if( ldx21 < max( 1_${ik}$, m-p ) ) then info = -7_${ik}$ end if ! compute workspace if( info == 0_${ik}$ ) then ilarf = 2_${ik}$ llarf = max( p-1, m-p, q-1 ) iorbdb5 = 2_${ik}$ lorbdb5 = q-1 lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) lworkmin = lworkopt work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not.lquery ) then info = -14_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CUNBDB2', -info ) return else if( lquery ) then return end if ! reduce rows 1, ..., p of x11 and x21 do i = 1, p if( i > 1_${ik}$ ) then call stdlib${ii}$_csrot( q-i+1, x11(i,i), ldx11, x21(i-1,i), ldx21, c,s ) end if call stdlib${ii}$_clacgv( q-i+1, x11(i,i), ldx11 ) call stdlib${ii}$_clarfgp( q-i+1, x11(i,i), x11(i,i+1), ldx11, tauq1(i) ) c = real( x11(i,i),KIND=sp) x11(i,i) = cone call stdlib${ii}$_clarf( 'R', p-i, q-i+1, x11(i,i), ldx11, tauq1(i),x11(i+1,i), ldx11, & work(ilarf) ) call stdlib${ii}$_clarf( 'R', m-p-i+1, q-i+1, x11(i,i), ldx11, tauq1(i),x21(i,i), ldx21, & work(ilarf) ) call stdlib${ii}$_clacgv( q-i+1, x11(i,i), ldx11 ) s = sqrt( stdlib${ii}$_scnrm2( p-i, x11(i+1,i), 1_${ik}$ )**2_${ik}$+ stdlib${ii}$_scnrm2( m-p-i+1, x21(i,i), & 1_${ik}$ )**2_${ik}$ ) theta(i) = atan2( s, c ) call stdlib${ii}$_cunbdb5( p-i, m-p-i+1, q-i, x11(i+1,i), 1_${ik}$, x21(i,i), 1_${ik}$,x11(i+1,i+1), & ldx11, x21(i,i+1), ldx21,work(iorbdb5), lorbdb5, childinfo ) call stdlib${ii}$_cscal( p-i, cnegone, x11(i+1,i), 1_${ik}$ ) call stdlib${ii}$_clarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1_${ik}$, taup2(i) ) if( i < p ) then call stdlib${ii}$_clarfgp( p-i, x11(i+1,i), x11(i+2,i), 1_${ik}$, taup1(i) ) phi(i) = atan2( real( x11(i+1,i),KIND=sp), real( x21(i,i),KIND=sp) ) c = cos( phi(i) ) s = sin( phi(i) ) x11(i+1,i) = cone call stdlib${ii}$_clarf( 'L', p-i, q-i, x11(i+1,i), 1_${ik}$, conjg(taup1(i)),x11(i+1,i+1), & ldx11, work(ilarf) ) end if x21(i,i) = cone call stdlib${ii}$_clarf( 'L', m-p-i+1, q-i, x21(i,i), 1_${ik}$, conjg(taup2(i)),x21(i,i+1), & ldx21, work(ilarf) ) end do ! reduce the bottom-right portion of x21 to the identity matrix do i = p + 1, q call stdlib${ii}$_clarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1_${ik}$, taup2(i) ) x21(i,i) = cone call stdlib${ii}$_clarf( 'L', m-p-i+1, q-i, x21(i,i), 1_${ik}$, conjg(taup2(i)),x21(i,i+1), & ldx21, work(ilarf) ) end do return end subroutine stdlib${ii}$_cunbdb2 module subroutine stdlib${ii}$_zunbdb2( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & !! ZUNBDB2 simultaneously bidiagonalizes the blocks of a tall and skinny !! matrix X with orthonomal columns: !! [ B11 ] !! [ X11 ] [ P1 | ] [ 0 ] !! [-----] = [---------] [-----] Q1**T . !! [ X21 ] [ | P2 ] [ B21 ] !! [ 0 ] !! X11 is P-by-Q, and X21 is (M-P)-by-Q. P must be no larger than M-P, !! Q, or M-Q. Routines ZUNBDB1, ZUNBDB3, and ZUNBDB4 handle cases in !! which P is not the minimum dimension. !! The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), !! and (M-Q)-by-(M-Q), respectively. They are represented implicitly by !! Householder vectors. !! B11 and B12 are P-by-P bidiagonal matrices represented implicitly by !! angles THETA, PHI. work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lwork, m, p, q, ldx11, ldx21 ! Array Arguments real(dp), intent(out) :: phi(*), theta(*) complex(dp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) complex(dp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) ! ==================================================================== ! Local Scalars real(dp) :: c, s integer(${ik}$) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & lworkopt logical(lk) :: lquery ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lwork == -1_${ik}$ if( m < 0_${ik}$ ) then info = -1_${ik}$ else if( p < 0_${ik}$ .or. p > m-p ) then info = -2_${ik}$ else if( q < 0_${ik}$ .or. q < p .or. m-q < p ) then info = -3_${ik}$ else if( ldx11 < max( 1_${ik}$, p ) ) then info = -5_${ik}$ else if( ldx21 < max( 1_${ik}$, m-p ) ) then info = -7_${ik}$ end if ! compute workspace if( info == 0_${ik}$ ) then ilarf = 2_${ik}$ llarf = max( p-1, m-p, q-1 ) iorbdb5 = 2_${ik}$ lorbdb5 = q-1 lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) lworkmin = lworkopt work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not.lquery ) then info = -14_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNBDB2', -info ) return else if( lquery ) then return end if ! reduce rows 1, ..., p of x11 and x21 do i = 1, p if( i > 1_${ik}$ ) then call stdlib${ii}$_zdrot( q-i+1, x11(i,i), ldx11, x21(i-1,i), ldx21, c,s ) end if call stdlib${ii}$_zlacgv( q-i+1, x11(i,i), ldx11 ) call stdlib${ii}$_zlarfgp( q-i+1, x11(i,i), x11(i,i+1), ldx11, tauq1(i) ) c = real( x11(i,i),KIND=dp) x11(i,i) = cone call stdlib${ii}$_zlarf( 'R', p-i, q-i+1, x11(i,i), ldx11, tauq1(i),x11(i+1,i), ldx11, & work(ilarf) ) call stdlib${ii}$_zlarf( 'R', m-p-i+1, q-i+1, x11(i,i), ldx11, tauq1(i),x21(i,i), ldx21, & work(ilarf) ) call stdlib${ii}$_zlacgv( q-i+1, x11(i,i), ldx11 ) s = sqrt( stdlib${ii}$_dznrm2( p-i, x11(i+1,i), 1_${ik}$ )**2_${ik}$+ stdlib${ii}$_dznrm2( m-p-i+1, x21(i,i), & 1_${ik}$ )**2_${ik}$ ) theta(i) = atan2( s, c ) call stdlib${ii}$_zunbdb5( p-i, m-p-i+1, q-i, x11(i+1,i), 1_${ik}$, x21(i,i), 1_${ik}$,x11(i+1,i+1), & ldx11, x21(i,i+1), ldx21,work(iorbdb5), lorbdb5, childinfo ) call stdlib${ii}$_zscal( p-i, cnegone, x11(i+1,i), 1_${ik}$ ) call stdlib${ii}$_zlarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1_${ik}$, taup2(i) ) if( i < p ) then call stdlib${ii}$_zlarfgp( p-i, x11(i+1,i), x11(i+2,i), 1_${ik}$, taup1(i) ) phi(i) = atan2( real( x11(i+1,i),KIND=dp), real( x21(i,i),KIND=dp) ) c = cos( phi(i) ) s = sin( phi(i) ) x11(i+1,i) = cone call stdlib${ii}$_zlarf( 'L', p-i, q-i, x11(i+1,i), 1_${ik}$, conjg(taup1(i)),x11(i+1,i+1), & ldx11, work(ilarf) ) end if x21(i,i) = cone call stdlib${ii}$_zlarf( 'L', m-p-i+1, q-i, x21(i,i), 1_${ik}$, conjg(taup2(i)),x21(i,i+1), & ldx21, work(ilarf) ) end do ! reduce the bottom-right portion of x21 to the identity matrix do i = p + 1, q call stdlib${ii}$_zlarfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1_${ik}$, taup2(i) ) x21(i,i) = cone call stdlib${ii}$_zlarf( 'L', m-p-i+1, q-i, x21(i,i), 1_${ik}$, conjg(taup2(i)),x21(i,i+1), & ldx21, work(ilarf) ) end do return end subroutine stdlib${ii}$_zunbdb2 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] module subroutine stdlib${ii}$_${ci}$unbdb2( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & !! ZUNBDB2: simultaneously bidiagonalizes the blocks of a tall and skinny !! matrix X with orthonomal columns: !! [ B11 ] !! [ X11 ] [ P1 | ] [ 0 ] !! [-----] = [---------] [-----] Q1**T . !! [ X21 ] [ | P2 ] [ B21 ] !! [ 0 ] !! X11 is P-by-Q, and X21 is (M-P)-by-Q. P must be no larger than M-P, !! Q, or M-Q. Routines ZUNBDB1, ZUNBDB3, and ZUNBDB4 handle cases in !! which P is not the minimum dimension. !! The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), !! and (M-Q)-by-(M-Q), respectively. They are represented implicitly by !! Householder vectors. !! B11 and B12 are P-by-P bidiagonal matrices represented implicitly by !! angles THETA, PHI. work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lwork, m, p, q, ldx11, ldx21 ! Array Arguments real(${ck}$), intent(out) :: phi(*), theta(*) complex(${ck}$), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) complex(${ck}$), intent(inout) :: x11(ldx11,*), x21(ldx21,*) ! ==================================================================== ! Local Scalars real(${ck}$) :: c, s integer(${ik}$) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & lworkopt logical(lk) :: lquery ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lwork == -1_${ik}$ if( m < 0_${ik}$ ) then info = -1_${ik}$ else if( p < 0_${ik}$ .or. p > m-p ) then info = -2_${ik}$ else if( q < 0_${ik}$ .or. q < p .or. m-q < p ) then info = -3_${ik}$ else if( ldx11 < max( 1_${ik}$, p ) ) then info = -5_${ik}$ else if( ldx21 < max( 1_${ik}$, m-p ) ) then info = -7_${ik}$ end if ! compute workspace if( info == 0_${ik}$ ) then ilarf = 2_${ik}$ llarf = max( p-1, m-p, q-1 ) iorbdb5 = 2_${ik}$ lorbdb5 = q-1 lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) lworkmin = lworkopt work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not.lquery ) then info = -14_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNBDB2', -info ) return else if( lquery ) then return end if ! reduce rows 1, ..., p of x11 and x21 do i = 1, p if( i > 1_${ik}$ ) then call stdlib${ii}$_${ci}$drot( q-i+1, x11(i,i), ldx11, x21(i-1,i), ldx21, c,s ) end if call stdlib${ii}$_${ci}$lacgv( q-i+1, x11(i,i), ldx11 ) call stdlib${ii}$_${ci}$larfgp( q-i+1, x11(i,i), x11(i,i+1), ldx11, tauq1(i) ) c = real( x11(i,i),KIND=${ck}$) x11(i,i) = cone call stdlib${ii}$_${ci}$larf( 'R', p-i, q-i+1, x11(i,i), ldx11, tauq1(i),x11(i+1,i), ldx11, & work(ilarf) ) call stdlib${ii}$_${ci}$larf( 'R', m-p-i+1, q-i+1, x11(i,i), ldx11, tauq1(i),x21(i,i), ldx21, & work(ilarf) ) call stdlib${ii}$_${ci}$lacgv( q-i+1, x11(i,i), ldx11 ) s = sqrt( stdlib${ii}$_${c2ri(ci)}$znrm2( p-i, x11(i+1,i), 1_${ik}$ )**2_${ik}$+ stdlib${ii}$_${c2ri(ci)}$znrm2( m-p-i+1, x21(i,i), & 1_${ik}$ )**2_${ik}$ ) theta(i) = atan2( s, c ) call stdlib${ii}$_${ci}$unbdb5( p-i, m-p-i+1, q-i, x11(i+1,i), 1_${ik}$, x21(i,i), 1_${ik}$,x11(i+1,i+1), & ldx11, x21(i,i+1), ldx21,work(iorbdb5), lorbdb5, childinfo ) call stdlib${ii}$_${ci}$scal( p-i, cnegone, x11(i+1,i), 1_${ik}$ ) call stdlib${ii}$_${ci}$larfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1_${ik}$, taup2(i) ) if( i < p ) then call stdlib${ii}$_${ci}$larfgp( p-i, x11(i+1,i), x11(i+2,i), 1_${ik}$, taup1(i) ) phi(i) = atan2( real( x11(i+1,i),KIND=${ck}$), real( x21(i,i),KIND=${ck}$) ) c = cos( phi(i) ) s = sin( phi(i) ) x11(i+1,i) = cone call stdlib${ii}$_${ci}$larf( 'L', p-i, q-i, x11(i+1,i), 1_${ik}$, conjg(taup1(i)),x11(i+1,i+1), & ldx11, work(ilarf) ) end if x21(i,i) = cone call stdlib${ii}$_${ci}$larf( 'L', m-p-i+1, q-i, x21(i,i), 1_${ik}$, conjg(taup2(i)),x21(i,i+1), & ldx21, work(ilarf) ) end do ! reduce the bottom-right portion of x21 to the identity matrix do i = p + 1, q call stdlib${ii}$_${ci}$larfgp( m-p-i+1, x21(i,i), x21(i+1,i), 1_${ik}$, taup2(i) ) x21(i,i) = cone call stdlib${ii}$_${ci}$larf( 'L', m-p-i+1, q-i, x21(i,i), 1_${ik}$, conjg(taup2(i)),x21(i,i+1), & ldx21, work(ilarf) ) end do return end subroutine stdlib${ii}$_${ci}$unbdb2 #:endif #:endfor module subroutine stdlib${ii}$_cunbdb3( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & !! CUNBDB3 simultaneously bidiagonalizes the blocks of a tall and skinny !! matrix X with orthonomal columns: !! [ B11 ] !! [ X11 ] [ P1 | ] [ 0 ] !! [-----] = [---------] [-----] Q1**T . !! [ X21 ] [ | P2 ] [ B21 ] !! [ 0 ] !! X11 is P-by-Q, and X21 is (M-P)-by-Q. M-P must be no larger than P, !! Q, or M-Q. Routines CUNBDB1, CUNBDB2, and CUNBDB4 handle cases in !! which M-P is not the minimum dimension. !! The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), !! and (M-Q)-by-(M-Q), respectively. They are represented implicitly by !! Householder vectors. !! B11 and B12 are (M-P)-by-(M-P) bidiagonal matrices represented !! implicitly by angles THETA, PHI. work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lwork, m, p, q, ldx11, ldx21 ! Array Arguments real(sp), intent(out) :: phi(*), theta(*) complex(sp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) complex(sp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) ! ==================================================================== ! Local Scalars real(sp) :: c, s integer(${ik}$) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & lworkopt logical(lk) :: lquery ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lwork == -1_${ik}$ if( m < 0_${ik}$ ) then info = -1_${ik}$ else if( 2_${ik}$*p < m .or. p > m ) then info = -2_${ik}$ else if( q < m-p .or. m-q < m-p ) then info = -3_${ik}$ else if( ldx11 < max( 1_${ik}$, p ) ) then info = -5_${ik}$ else if( ldx21 < max( 1_${ik}$, m-p ) ) then info = -7_${ik}$ end if ! compute workspace if( info == 0_${ik}$ ) then ilarf = 2_${ik}$ llarf = max( p, m-p-1, q-1 ) iorbdb5 = 2_${ik}$ lorbdb5 = q-1 lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) lworkmin = lworkopt work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not.lquery ) then info = -14_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CUNBDB3', -info ) return else if( lquery ) then return end if ! reduce rows 1, ..., m-p of x11 and x21 do i = 1, m-p if( i > 1_${ik}$ ) then call stdlib${ii}$_csrot( q-i+1, x11(i-1,i), ldx11, x21(i,i), ldx11, c,s ) end if call stdlib${ii}$_clacgv( q-i+1, x21(i,i), ldx21 ) call stdlib${ii}$_clarfgp( q-i+1, x21(i,i), x21(i,i+1), ldx21, tauq1(i) ) s = real( x21(i,i),KIND=sp) x21(i,i) = cone call stdlib${ii}$_clarf( 'R', p-i+1, q-i+1, x21(i,i), ldx21, tauq1(i),x11(i,i), ldx11, & work(ilarf) ) call stdlib${ii}$_clarf( 'R', m-p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x21(i+1,i), ldx21, & work(ilarf) ) call stdlib${ii}$_clacgv( q-i+1, x21(i,i), ldx21 ) c = sqrt( stdlib${ii}$_scnrm2( p-i+1, x11(i,i), 1_${ik}$ )**2_${ik}$+ stdlib${ii}$_scnrm2( m-p-i, x21(i+1,i), & 1_${ik}$ )**2_${ik}$ ) theta(i) = atan2( s, c ) call stdlib${ii}$_cunbdb5( p-i+1, m-p-i, q-i, x11(i,i), 1_${ik}$, x21(i+1,i), 1_${ik}$,x11(i,i+1), & ldx11, x21(i+1,i+1), ldx21,work(iorbdb5), lorbdb5, childinfo ) call stdlib${ii}$_clarfgp( p-i+1, x11(i,i), x11(i+1,i), 1_${ik}$, taup1(i) ) if( i < m-p ) then call stdlib${ii}$_clarfgp( m-p-i, x21(i+1,i), x21(i+2,i), 1_${ik}$, taup2(i) ) phi(i) = atan2( real( x21(i+1,i),KIND=sp), real( x11(i,i),KIND=sp) ) c = cos( phi(i) ) s = sin( phi(i) ) x21(i+1,i) = cone call stdlib${ii}$_clarf( 'L', m-p-i, q-i, x21(i+1,i), 1_${ik}$, conjg(taup2(i)),x21(i+1,i+1), & ldx21, work(ilarf) ) end if x11(i,i) = cone call stdlib${ii}$_clarf( 'L', p-i+1, q-i, x11(i,i), 1_${ik}$, conjg(taup1(i)),x11(i,i+1), ldx11, & work(ilarf) ) end do ! reduce the bottom-right portion of x11 to the identity matrix do i = m-p + 1, q call stdlib${ii}$_clarfgp( p-i+1, x11(i,i), x11(i+1,i), 1_${ik}$, taup1(i) ) x11(i,i) = cone call stdlib${ii}$_clarf( 'L', p-i+1, q-i, x11(i,i), 1_${ik}$, conjg(taup1(i)),x11(i,i+1), ldx11, & work(ilarf) ) end do return end subroutine stdlib${ii}$_cunbdb3 module subroutine stdlib${ii}$_zunbdb3( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & !! ZUNBDB3 simultaneously bidiagonalizes the blocks of a tall and skinny !! matrix X with orthonomal columns: !! [ B11 ] !! [ X11 ] [ P1 | ] [ 0 ] !! [-----] = [---------] [-----] Q1**T . !! [ X21 ] [ | P2 ] [ B21 ] !! [ 0 ] !! X11 is P-by-Q, and X21 is (M-P)-by-Q. M-P must be no larger than P, !! Q, or M-Q. Routines ZUNBDB1, ZUNBDB2, and ZUNBDB4 handle cases in !! which M-P is not the minimum dimension. !! The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), !! and (M-Q)-by-(M-Q), respectively. They are represented implicitly by !! Householder vectors. !! B11 and B12 are (M-P)-by-(M-P) bidiagonal matrices represented !! implicitly by angles THETA, PHI. work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lwork, m, p, q, ldx11, ldx21 ! Array Arguments real(dp), intent(out) :: phi(*), theta(*) complex(dp), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) complex(dp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) ! ==================================================================== ! Local Scalars real(dp) :: c, s integer(${ik}$) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & lworkopt logical(lk) :: lquery ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lwork == -1_${ik}$ if( m < 0_${ik}$ ) then info = -1_${ik}$ else if( 2_${ik}$*p < m .or. p > m ) then info = -2_${ik}$ else if( q < m-p .or. m-q < m-p ) then info = -3_${ik}$ else if( ldx11 < max( 1_${ik}$, p ) ) then info = -5_${ik}$ else if( ldx21 < max( 1_${ik}$, m-p ) ) then info = -7_${ik}$ end if ! compute workspace if( info == 0_${ik}$ ) then ilarf = 2_${ik}$ llarf = max( p, m-p-1, q-1 ) iorbdb5 = 2_${ik}$ lorbdb5 = q-1 lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) lworkmin = lworkopt work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not.lquery ) then info = -14_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNBDB3', -info ) return else if( lquery ) then return end if ! reduce rows 1, ..., m-p of x11 and x21 do i = 1, m-p if( i > 1_${ik}$ ) then call stdlib${ii}$_zdrot( q-i+1, x11(i-1,i), ldx11, x21(i,i), ldx11, c,s ) end if call stdlib${ii}$_zlacgv( q-i+1, x21(i,i), ldx21 ) call stdlib${ii}$_zlarfgp( q-i+1, x21(i,i), x21(i,i+1), ldx21, tauq1(i) ) s = real( x21(i,i),KIND=dp) x21(i,i) = cone call stdlib${ii}$_zlarf( 'R', p-i+1, q-i+1, x21(i,i), ldx21, tauq1(i),x11(i,i), ldx11, & work(ilarf) ) call stdlib${ii}$_zlarf( 'R', m-p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x21(i+1,i), ldx21, & work(ilarf) ) call stdlib${ii}$_zlacgv( q-i+1, x21(i,i), ldx21 ) c = sqrt( stdlib${ii}$_dznrm2( p-i+1, x11(i,i), 1_${ik}$ )**2_${ik}$+ stdlib${ii}$_dznrm2( m-p-i, x21(i+1,i), & 1_${ik}$ )**2_${ik}$ ) theta(i) = atan2( s, c ) call stdlib${ii}$_zunbdb5( p-i+1, m-p-i, q-i, x11(i,i), 1_${ik}$, x21(i+1,i), 1_${ik}$,x11(i,i+1), & ldx11, x21(i+1,i+1), ldx21,work(iorbdb5), lorbdb5, childinfo ) call stdlib${ii}$_zlarfgp( p-i+1, x11(i,i), x11(i+1,i), 1_${ik}$, taup1(i) ) if( i < m-p ) then call stdlib${ii}$_zlarfgp( m-p-i, x21(i+1,i), x21(i+2,i), 1_${ik}$, taup2(i) ) phi(i) = atan2( real( x21(i+1,i),KIND=dp), real( x11(i,i),KIND=dp) ) c = cos( phi(i) ) s = sin( phi(i) ) x21(i+1,i) = cone call stdlib${ii}$_zlarf( 'L', m-p-i, q-i, x21(i+1,i), 1_${ik}$,conjg(taup2(i)), x21(i+1,i+1), & ldx21,work(ilarf) ) end if x11(i,i) = cone call stdlib${ii}$_zlarf( 'L', p-i+1, q-i, x11(i,i), 1_${ik}$, conjg(taup1(i)),x11(i,i+1), ldx11, & work(ilarf) ) end do ! reduce the bottom-right portion of x11 to the identity matrix do i = m-p + 1, q call stdlib${ii}$_zlarfgp( p-i+1, x11(i,i), x11(i+1,i), 1_${ik}$, taup1(i) ) x11(i,i) = cone call stdlib${ii}$_zlarf( 'L', p-i+1, q-i, x11(i,i), 1_${ik}$, conjg(taup1(i)),x11(i,i+1), ldx11, & work(ilarf) ) end do return end subroutine stdlib${ii}$_zunbdb3 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] module subroutine stdlib${ii}$_${ci}$unbdb3( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & !! ZUNBDB3: simultaneously bidiagonalizes the blocks of a tall and skinny !! matrix X with orthonomal columns: !! [ B11 ] !! [ X11 ] [ P1 | ] [ 0 ] !! [-----] = [---------] [-----] Q1**T . !! [ X21 ] [ | P2 ] [ B21 ] !! [ 0 ] !! X11 is P-by-Q, and X21 is (M-P)-by-Q. M-P must be no larger than P, !! Q, or M-Q. Routines ZUNBDB1, ZUNBDB2, and ZUNBDB4 handle cases in !! which M-P is not the minimum dimension. !! The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), !! and (M-Q)-by-(M-Q), respectively. They are represented implicitly by !! Householder vectors. !! B11 and B12 are (M-P)-by-(M-P) bidiagonal matrices represented !! implicitly by angles THETA, PHI. work, lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lwork, m, p, q, ldx11, ldx21 ! Array Arguments real(${ck}$), intent(out) :: phi(*), theta(*) complex(${ck}$), intent(out) :: taup1(*), taup2(*), tauq1(*), work(*) complex(${ck}$), intent(inout) :: x11(ldx11,*), x21(ldx21,*) ! ==================================================================== ! Local Scalars real(${ck}$) :: c, s integer(${ik}$) :: childinfo, i, ilarf, iorbdb5, llarf, lorbdb5, lworkmin, & lworkopt logical(lk) :: lquery ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lwork == -1_${ik}$ if( m < 0_${ik}$ ) then info = -1_${ik}$ else if( 2_${ik}$*p < m .or. p > m ) then info = -2_${ik}$ else if( q < m-p .or. m-q < m-p ) then info = -3_${ik}$ else if( ldx11 < max( 1_${ik}$, p ) ) then info = -5_${ik}$ else if( ldx21 < max( 1_${ik}$, m-p ) ) then info = -7_${ik}$ end if ! compute workspace if( info == 0_${ik}$ ) then ilarf = 2_${ik}$ llarf = max( p, m-p-1, q-1 ) iorbdb5 = 2_${ik}$ lorbdb5 = q-1 lworkopt = max( ilarf+llarf-1, iorbdb5+lorbdb5-1 ) lworkmin = lworkopt work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not.lquery ) then info = -14_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNBDB3', -info ) return else if( lquery ) then return end if ! reduce rows 1, ..., m-p of x11 and x21 do i = 1, m-p if( i > 1_${ik}$ ) then call stdlib${ii}$_${ci}$drot( q-i+1, x11(i-1,i), ldx11, x21(i,i), ldx11, c,s ) end if call stdlib${ii}$_${ci}$lacgv( q-i+1, x21(i,i), ldx21 ) call stdlib${ii}$_${ci}$larfgp( q-i+1, x21(i,i), x21(i,i+1), ldx21, tauq1(i) ) s = real( x21(i,i),KIND=${ck}$) x21(i,i) = cone call stdlib${ii}$_${ci}$larf( 'R', p-i+1, q-i+1, x21(i,i), ldx21, tauq1(i),x11(i,i), ldx11, & work(ilarf) ) call stdlib${ii}$_${ci}$larf( 'R', m-p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x21(i+1,i), ldx21, & work(ilarf) ) call stdlib${ii}$_${ci}$lacgv( q-i+1, x21(i,i), ldx21 ) c = sqrt( stdlib${ii}$_${c2ri(ci)}$znrm2( p-i+1, x11(i,i), 1_${ik}$ )**2_${ik}$+ stdlib${ii}$_${c2ri(ci)}$znrm2( m-p-i, x21(i+1,i), & 1_${ik}$ )**2_${ik}$ ) theta(i) = atan2( s, c ) call stdlib${ii}$_${ci}$unbdb5( p-i+1, m-p-i, q-i, x11(i,i), 1_${ik}$, x21(i+1,i), 1_${ik}$,x11(i,i+1), & ldx11, x21(i+1,i+1), ldx21,work(iorbdb5), lorbdb5, childinfo ) call stdlib${ii}$_${ci}$larfgp( p-i+1, x11(i,i), x11(i+1,i), 1_${ik}$, taup1(i) ) if( i < m-p ) then call stdlib${ii}$_${ci}$larfgp( m-p-i, x21(i+1,i), x21(i+2,i), 1_${ik}$, taup2(i) ) phi(i) = atan2( real( x21(i+1,i),KIND=${ck}$), real( x11(i,i),KIND=${ck}$) ) c = cos( phi(i) ) s = sin( phi(i) ) x21(i+1,i) = cone call stdlib${ii}$_${ci}$larf( 'L', m-p-i, q-i, x21(i+1,i), 1_${ik}$,conjg(taup2(i)), x21(i+1,i+1), & ldx21,work(ilarf) ) end if x11(i,i) = cone call stdlib${ii}$_${ci}$larf( 'L', p-i+1, q-i, x11(i,i), 1_${ik}$, conjg(taup1(i)),x11(i,i+1), ldx11, & work(ilarf) ) end do ! reduce the bottom-right portion of x11 to the identity matrix do i = m-p + 1, q call stdlib${ii}$_${ci}$larfgp( p-i+1, x11(i,i), x11(i+1,i), 1_${ik}$, taup1(i) ) x11(i,i) = cone call stdlib${ii}$_${ci}$larf( 'L', p-i+1, q-i, x11(i,i), 1_${ik}$, conjg(taup1(i)),x11(i,i+1), ldx11, & work(ilarf) ) end do return end subroutine stdlib${ii}$_${ci}$unbdb3 #:endif #:endfor module subroutine stdlib${ii}$_cunbdb4( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & !! CUNBDB4 simultaneously bidiagonalizes the blocks of a tall and skinny !! matrix X with orthonomal columns: !! [ B11 ] !! [ X11 ] [ P1 | ] [ 0 ] !! [-----] = [---------] [-----] Q1**T . !! [ X21 ] [ | P2 ] [ B21 ] !! [ 0 ] !! X11 is P-by-Q, and X21 is (M-P)-by-Q. M-Q must be no larger than P, !! M-P, or Q. Routines CUNBDB1, CUNBDB2, and CUNBDB3 handle cases in !! which M-Q is not the minimum dimension. !! The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), !! and (M-Q)-by-(M-Q), respectively. They are represented implicitly by !! Householder vectors. !! B11 and B12 are (M-Q)-by-(M-Q) bidiagonal matrices represented !! implicitly by angles THETA, PHI. phantom, work, lwork,info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lwork, m, p, q, ldx11, ldx21 ! Array Arguments real(sp), intent(out) :: phi(*), theta(*) complex(sp), intent(out) :: phantom(*), taup1(*), taup2(*), tauq1(*), work(*) complex(sp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) ! ==================================================================== ! Local Scalars real(sp) :: c, s integer(${ik}$) :: childinfo, i, ilarf, iorbdb5, j, llarf, lorbdb5, lworkmin, & lworkopt logical(lk) :: lquery ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lwork == -1_${ik}$ if( m < 0_${ik}$ ) then info = -1_${ik}$ else if( p < m-q .or. m-p < m-q ) then info = -2_${ik}$ else if( q < m-q .or. q > m ) then info = -3_${ik}$ else if( ldx11 < max( 1_${ik}$, p ) ) then info = -5_${ik}$ else if( ldx21 < max( 1_${ik}$, m-p ) ) then info = -7_${ik}$ end if ! compute workspace if( info == 0_${ik}$ ) then ilarf = 2_${ik}$ llarf = max( q-1, p-1, m-p-1 ) iorbdb5 = 2_${ik}$ lorbdb5 = q lworkopt = ilarf + llarf - 1_${ik}$ lworkopt = max( lworkopt, iorbdb5 + lorbdb5 - 1_${ik}$ ) lworkmin = lworkopt work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not.lquery ) then info = -14_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CUNBDB4', -info ) return else if( lquery ) then return end if ! reduce columns 1, ..., m-q of x11 and x21 do i = 1, m-q if( i == 1_${ik}$ ) then do j = 1, m phantom(j) = czero end do call stdlib${ii}$_cunbdb5( p, m-p, q, phantom(1_${ik}$), 1_${ik}$, phantom(p+1), 1_${ik}$,x11, ldx11, x21, & ldx21, work(iorbdb5),lorbdb5, childinfo ) call stdlib${ii}$_cscal( p, cnegone, phantom(1_${ik}$), 1_${ik}$ ) call stdlib${ii}$_clarfgp( p, phantom(1_${ik}$), phantom(2_${ik}$), 1_${ik}$, taup1(1_${ik}$) ) call stdlib${ii}$_clarfgp( m-p, phantom(p+1), phantom(p+2), 1_${ik}$, taup2(1_${ik}$) ) theta(i) = atan2( real( phantom(1_${ik}$),KIND=sp), real( phantom(p+1),KIND=sp) ) c = cos( theta(i) ) s = sin( theta(i) ) phantom(1_${ik}$) = cone phantom(p+1) = cone call stdlib${ii}$_clarf( 'L', p, q, phantom(1_${ik}$), 1_${ik}$, conjg(taup1(1_${ik}$)), x11,ldx11, work(& ilarf) ) call stdlib${ii}$_clarf( 'L', m-p, q, phantom(p+1), 1_${ik}$, conjg(taup2(1_${ik}$)),x21, ldx21, & work(ilarf) ) else call stdlib${ii}$_cunbdb5( p-i+1, m-p-i+1, q-i+1, x11(i,i-1), 1_${ik}$,x21(i,i-1), 1_${ik}$, x11(i,i)& , ldx11, x21(i,i),ldx21, work(iorbdb5), lorbdb5, childinfo ) call stdlib${ii}$_cscal( p-i+1, cnegone, x11(i,i-1), 1_${ik}$ ) call stdlib${ii}$_clarfgp( p-i+1, x11(i,i-1), x11(i+1,i-1), 1_${ik}$, taup1(i) ) call stdlib${ii}$_clarfgp( m-p-i+1, x21(i,i-1), x21(i+1,i-1), 1_${ik}$,taup2(i) ) theta(i) = atan2( real( x11(i,i-1),KIND=sp), real( x21(i,i-1),KIND=sp) ) c = cos( theta(i) ) s = sin( theta(i) ) x11(i,i-1) = cone x21(i,i-1) = cone call stdlib${ii}$_clarf( 'L', p-i+1, q-i+1, x11(i,i-1), 1_${ik}$,conjg(taup1(i)), x11(i,i), & ldx11, work(ilarf) ) call stdlib${ii}$_clarf( 'L', m-p-i+1, q-i+1, x21(i,i-1), 1_${ik}$,conjg(taup2(i)), x21(i,i), & ldx21, work(ilarf) ) end if call stdlib${ii}$_csrot( q-i+1, x11(i,i), ldx11, x21(i,i), ldx21, s, -c ) call stdlib${ii}$_clacgv( q-i+1, x21(i,i), ldx21 ) call stdlib${ii}$_clarfgp( q-i+1, x21(i,i), x21(i,i+1), ldx21, tauq1(i) ) c = real( x21(i,i),KIND=sp) x21(i,i) = cone call stdlib${ii}$_clarf( 'R', p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x11(i+1,i), ldx11, & work(ilarf) ) call stdlib${ii}$_clarf( 'R', m-p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x21(i+1,i), ldx21, & work(ilarf) ) call stdlib${ii}$_clacgv( q-i+1, x21(i,i), ldx21 ) if( i < m-q ) then s = sqrt( stdlib${ii}$_scnrm2( p-i, x11(i+1,i), 1_${ik}$ )**2_${ik}$+ stdlib${ii}$_scnrm2( m-p-i, x21(i+1,& i), 1_${ik}$ )**2_${ik}$ ) phi(i) = atan2( s, c ) end if end do ! reduce the bottom-right portion of x11 to [ i 0 ] do i = m - q + 1, p call stdlib${ii}$_clacgv( q-i+1, x11(i,i), ldx11 ) call stdlib${ii}$_clarfgp( q-i+1, x11(i,i), x11(i,i+1), ldx11, tauq1(i) ) x11(i,i) = cone call stdlib${ii}$_clarf( 'R', p-i, q-i+1, x11(i,i), ldx11, tauq1(i),x11(i+1,i), ldx11, & work(ilarf) ) call stdlib${ii}$_clarf( 'R', q-p, q-i+1, x11(i,i), ldx11, tauq1(i),x21(m-q+1,i), ldx21, & work(ilarf) ) call stdlib${ii}$_clacgv( q-i+1, x11(i,i), ldx11 ) end do ! reduce the bottom-right portion of x21 to [ 0 i ] do i = p + 1, q call stdlib${ii}$_clacgv( q-i+1, x21(m-q+i-p,i), ldx21 ) call stdlib${ii}$_clarfgp( q-i+1, x21(m-q+i-p,i), x21(m-q+i-p,i+1), ldx21,tauq1(i) ) x21(m-q+i-p,i) = cone call stdlib${ii}$_clarf( 'R', q-i, q-i+1, x21(m-q+i-p,i), ldx21, tauq1(i),x21(m-q+i-p+1,i)& , ldx21, work(ilarf) ) call stdlib${ii}$_clacgv( q-i+1, x21(m-q+i-p,i), ldx21 ) end do return end subroutine stdlib${ii}$_cunbdb4 module subroutine stdlib${ii}$_zunbdb4( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & !! ZUNBDB4 simultaneously bidiagonalizes the blocks of a tall and skinny !! matrix X with orthonomal columns: !! [ B11 ] !! [ X11 ] [ P1 | ] [ 0 ] !! [-----] = [---------] [-----] Q1**T . !! [ X21 ] [ | P2 ] [ B21 ] !! [ 0 ] !! X11 is P-by-Q, and X21 is (M-P)-by-Q. M-Q must be no larger than P, !! M-P, or Q. Routines ZUNBDB1, ZUNBDB2, and ZUNBDB3 handle cases in !! which M-Q is not the minimum dimension. !! The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), !! and (M-Q)-by-(M-Q), respectively. They are represented implicitly by !! Householder vectors. !! B11 and B12 are (M-Q)-by-(M-Q) bidiagonal matrices represented !! implicitly by angles THETA, PHI. phantom, work, lwork,info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lwork, m, p, q, ldx11, ldx21 ! Array Arguments real(dp), intent(out) :: phi(*), theta(*) complex(dp), intent(out) :: phantom(*), taup1(*), taup2(*), tauq1(*), work(*) complex(dp), intent(inout) :: x11(ldx11,*), x21(ldx21,*) ! ==================================================================== ! Local Scalars real(dp) :: c, s integer(${ik}$) :: childinfo, i, ilarf, iorbdb5, j, llarf, lorbdb5, lworkmin, & lworkopt logical(lk) :: lquery ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lwork == -1_${ik}$ if( m < 0_${ik}$ ) then info = -1_${ik}$ else if( p < m-q .or. m-p < m-q ) then info = -2_${ik}$ else if( q < m-q .or. q > m ) then info = -3_${ik}$ else if( ldx11 < max( 1_${ik}$, p ) ) then info = -5_${ik}$ else if( ldx21 < max( 1_${ik}$, m-p ) ) then info = -7_${ik}$ end if ! compute workspace if( info == 0_${ik}$ ) then ilarf = 2_${ik}$ llarf = max( q-1, p-1, m-p-1 ) iorbdb5 = 2_${ik}$ lorbdb5 = q lworkopt = ilarf + llarf - 1_${ik}$ lworkopt = max( lworkopt, iorbdb5 + lorbdb5 - 1_${ik}$ ) lworkmin = lworkopt work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not.lquery ) then info = -14_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNBDB4', -info ) return else if( lquery ) then return end if ! reduce columns 1, ..., m-q of x11 and x21 do i = 1, m-q if( i == 1_${ik}$ ) then do j = 1, m phantom(j) = czero end do call stdlib${ii}$_zunbdb5( p, m-p, q, phantom(1_${ik}$), 1_${ik}$, phantom(p+1), 1_${ik}$,x11, ldx11, x21, & ldx21, work(iorbdb5),lorbdb5, childinfo ) call stdlib${ii}$_zscal( p, cnegone, phantom(1_${ik}$), 1_${ik}$ ) call stdlib${ii}$_zlarfgp( p, phantom(1_${ik}$), phantom(2_${ik}$), 1_${ik}$, taup1(1_${ik}$) ) call stdlib${ii}$_zlarfgp( m-p, phantom(p+1), phantom(p+2), 1_${ik}$, taup2(1_${ik}$) ) theta(i) = atan2( real( phantom(1_${ik}$),KIND=dp), real( phantom(p+1),KIND=dp) ) c = cos( theta(i) ) s = sin( theta(i) ) phantom(1_${ik}$) = cone phantom(p+1) = cone call stdlib${ii}$_zlarf( 'L', p, q, phantom(1_${ik}$), 1_${ik}$, conjg(taup1(1_${ik}$)), x11,ldx11, work(& ilarf) ) call stdlib${ii}$_zlarf( 'L', m-p, q, phantom(p+1), 1_${ik}$, conjg(taup2(1_${ik}$)),x21, ldx21, & work(ilarf) ) else call stdlib${ii}$_zunbdb5( p-i+1, m-p-i+1, q-i+1, x11(i,i-1), 1_${ik}$,x21(i,i-1), 1_${ik}$, x11(i,i)& , ldx11, x21(i,i),ldx21, work(iorbdb5), lorbdb5, childinfo ) call stdlib${ii}$_zscal( p-i+1, cnegone, x11(i,i-1), 1_${ik}$ ) call stdlib${ii}$_zlarfgp( p-i+1, x11(i,i-1), x11(i+1,i-1), 1_${ik}$, taup1(i) ) call stdlib${ii}$_zlarfgp( m-p-i+1, x21(i,i-1), x21(i+1,i-1), 1_${ik}$,taup2(i) ) theta(i) = atan2( real( x11(i,i-1),KIND=dp), real( x21(i,i-1),KIND=dp) ) c = cos( theta(i) ) s = sin( theta(i) ) x11(i,i-1) = cone x21(i,i-1) = cone call stdlib${ii}$_zlarf( 'L', p-i+1, q-i+1, x11(i,i-1), 1_${ik}$,conjg(taup1(i)), x11(i,i), & ldx11, work(ilarf) ) call stdlib${ii}$_zlarf( 'L', m-p-i+1, q-i+1, x21(i,i-1), 1_${ik}$,conjg(taup2(i)), x21(i,i), & ldx21, work(ilarf) ) end if call stdlib${ii}$_zdrot( q-i+1, x11(i,i), ldx11, x21(i,i), ldx21, s, -c ) call stdlib${ii}$_zlacgv( q-i+1, x21(i,i), ldx21 ) call stdlib${ii}$_zlarfgp( q-i+1, x21(i,i), x21(i,i+1), ldx21, tauq1(i) ) c = real( x21(i,i),KIND=dp) x21(i,i) = cone call stdlib${ii}$_zlarf( 'R', p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x11(i+1,i), ldx11, & work(ilarf) ) call stdlib${ii}$_zlarf( 'R', m-p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x21(i+1,i), ldx21, & work(ilarf) ) call stdlib${ii}$_zlacgv( q-i+1, x21(i,i), ldx21 ) if( i < m-q ) then s = sqrt( stdlib${ii}$_dznrm2( p-i, x11(i+1,i), 1_${ik}$ )**2_${ik}$+ stdlib${ii}$_dznrm2( m-p-i, x21(i+1,& i), 1_${ik}$ )**2_${ik}$ ) phi(i) = atan2( s, c ) end if end do ! reduce the bottom-right portion of x11 to [ i 0 ] do i = m - q + 1, p call stdlib${ii}$_zlacgv( q-i+1, x11(i,i), ldx11 ) call stdlib${ii}$_zlarfgp( q-i+1, x11(i,i), x11(i,i+1), ldx11, tauq1(i) ) x11(i,i) = cone call stdlib${ii}$_zlarf( 'R', p-i, q-i+1, x11(i,i), ldx11, tauq1(i),x11(i+1,i), ldx11, & work(ilarf) ) call stdlib${ii}$_zlarf( 'R', q-p, q-i+1, x11(i,i), ldx11, tauq1(i),x21(m-q+1,i), ldx21, & work(ilarf) ) call stdlib${ii}$_zlacgv( q-i+1, x11(i,i), ldx11 ) end do ! reduce the bottom-right portion of x21 to [ 0 i ] do i = p + 1, q call stdlib${ii}$_zlacgv( q-i+1, x21(m-q+i-p,i), ldx21 ) call stdlib${ii}$_zlarfgp( q-i+1, x21(m-q+i-p,i), x21(m-q+i-p,i+1), ldx21,tauq1(i) ) x21(m-q+i-p,i) = cone call stdlib${ii}$_zlarf( 'R', q-i, q-i+1, x21(m-q+i-p,i), ldx21, tauq1(i),x21(m-q+i-p+1,i)& , ldx21, work(ilarf) ) call stdlib${ii}$_zlacgv( q-i+1, x21(m-q+i-p,i), ldx21 ) end do return end subroutine stdlib${ii}$_zunbdb4 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] module subroutine stdlib${ii}$_${ci}$unbdb4( m, p, q, x11, ldx11, x21, ldx21, theta, phi,taup1, taup2, tauq1, & !! ZUNBDB4: simultaneously bidiagonalizes the blocks of a tall and skinny !! matrix X with orthonomal columns: !! [ B11 ] !! [ X11 ] [ P1 | ] [ 0 ] !! [-----] = [---------] [-----] Q1**T . !! [ X21 ] [ | P2 ] [ B21 ] !! [ 0 ] !! X11 is P-by-Q, and X21 is (M-P)-by-Q. M-Q must be no larger than P, !! M-P, or Q. Routines ZUNBDB1, ZUNBDB2, and ZUNBDB3 handle cases in !! which M-Q is not the minimum dimension. !! The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), !! and (M-Q)-by-(M-Q), respectively. They are represented implicitly by !! Householder vectors. !! B11 and B12 are (M-Q)-by-(M-Q) bidiagonal matrices represented !! implicitly by angles THETA, PHI. phantom, work, lwork,info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: lwork, m, p, q, ldx11, ldx21 ! Array Arguments real(${ck}$), intent(out) :: phi(*), theta(*) complex(${ck}$), intent(out) :: phantom(*), taup1(*), taup2(*), tauq1(*), work(*) complex(${ck}$), intent(inout) :: x11(ldx11,*), x21(ldx21,*) ! ==================================================================== ! Local Scalars real(${ck}$) :: c, s integer(${ik}$) :: childinfo, i, ilarf, iorbdb5, j, llarf, lorbdb5, lworkmin, & lworkopt logical(lk) :: lquery ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lwork == -1_${ik}$ if( m < 0_${ik}$ ) then info = -1_${ik}$ else if( p < m-q .or. m-p < m-q ) then info = -2_${ik}$ else if( q < m-q .or. q > m ) then info = -3_${ik}$ else if( ldx11 < max( 1_${ik}$, p ) ) then info = -5_${ik}$ else if( ldx21 < max( 1_${ik}$, m-p ) ) then info = -7_${ik}$ end if ! compute workspace if( info == 0_${ik}$ ) then ilarf = 2_${ik}$ llarf = max( q-1, p-1, m-p-1 ) iorbdb5 = 2_${ik}$ lorbdb5 = q lworkopt = ilarf + llarf - 1_${ik}$ lworkopt = max( lworkopt, iorbdb5 + lorbdb5 - 1_${ik}$ ) lworkmin = lworkopt work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not.lquery ) then info = -14_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNBDB4', -info ) return else if( lquery ) then return end if ! reduce columns 1, ..., m-q of x11 and x21 do i = 1, m-q if( i == 1_${ik}$ ) then do j = 1, m phantom(j) = czero end do call stdlib${ii}$_${ci}$unbdb5( p, m-p, q, phantom(1_${ik}$), 1_${ik}$, phantom(p+1), 1_${ik}$,x11, ldx11, x21, & ldx21, work(iorbdb5),lorbdb5, childinfo ) call stdlib${ii}$_${ci}$scal( p, cnegone, phantom(1_${ik}$), 1_${ik}$ ) call stdlib${ii}$_${ci}$larfgp( p, phantom(1_${ik}$), phantom(2_${ik}$), 1_${ik}$, taup1(1_${ik}$) ) call stdlib${ii}$_${ci}$larfgp( m-p, phantom(p+1), phantom(p+2), 1_${ik}$, taup2(1_${ik}$) ) theta(i) = atan2( real( phantom(1_${ik}$),KIND=${ck}$), real( phantom(p+1),KIND=${ck}$) ) c = cos( theta(i) ) s = sin( theta(i) ) phantom(1_${ik}$) = cone phantom(p+1) = cone call stdlib${ii}$_${ci}$larf( 'L', p, q, phantom(1_${ik}$), 1_${ik}$, conjg(taup1(1_${ik}$)), x11,ldx11, work(& ilarf) ) call stdlib${ii}$_${ci}$larf( 'L', m-p, q, phantom(p+1), 1_${ik}$, conjg(taup2(1_${ik}$)),x21, ldx21, & work(ilarf) ) else call stdlib${ii}$_${ci}$unbdb5( p-i+1, m-p-i+1, q-i+1, x11(i,i-1), 1_${ik}$,x21(i,i-1), 1_${ik}$, x11(i,i)& , ldx11, x21(i,i),ldx21, work(iorbdb5), lorbdb5, childinfo ) call stdlib${ii}$_${ci}$scal( p-i+1, cnegone, x11(i,i-1), 1_${ik}$ ) call stdlib${ii}$_${ci}$larfgp( p-i+1, x11(i,i-1), x11(i+1,i-1), 1_${ik}$, taup1(i) ) call stdlib${ii}$_${ci}$larfgp( m-p-i+1, x21(i,i-1), x21(i+1,i-1), 1_${ik}$,taup2(i) ) theta(i) = atan2( real( x11(i,i-1),KIND=${ck}$), real( x21(i,i-1),KIND=${ck}$) ) c = cos( theta(i) ) s = sin( theta(i) ) x11(i,i-1) = cone x21(i,i-1) = cone call stdlib${ii}$_${ci}$larf( 'L', p-i+1, q-i+1, x11(i,i-1), 1_${ik}$,conjg(taup1(i)), x11(i,i), & ldx11, work(ilarf) ) call stdlib${ii}$_${ci}$larf( 'L', m-p-i+1, q-i+1, x21(i,i-1), 1_${ik}$,conjg(taup2(i)), x21(i,i), & ldx21, work(ilarf) ) end if call stdlib${ii}$_${ci}$drot( q-i+1, x11(i,i), ldx11, x21(i,i), ldx21, s, -c ) call stdlib${ii}$_${ci}$lacgv( q-i+1, x21(i,i), ldx21 ) call stdlib${ii}$_${ci}$larfgp( q-i+1, x21(i,i), x21(i,i+1), ldx21, tauq1(i) ) c = real( x21(i,i),KIND=${ck}$) x21(i,i) = cone call stdlib${ii}$_${ci}$larf( 'R', p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x11(i+1,i), ldx11, & work(ilarf) ) call stdlib${ii}$_${ci}$larf( 'R', m-p-i, q-i+1, x21(i,i), ldx21, tauq1(i),x21(i+1,i), ldx21, & work(ilarf) ) call stdlib${ii}$_${ci}$lacgv( q-i+1, x21(i,i), ldx21 ) if( i < m-q ) then s = sqrt( stdlib${ii}$_${c2ri(ci)}$znrm2( p-i, x11(i+1,i), 1_${ik}$ )**2_${ik}$+ stdlib${ii}$_${c2ri(ci)}$znrm2( m-p-i, x21(i+1,& i), 1_${ik}$ )**2_${ik}$ ) phi(i) = atan2( s, c ) end if end do ! reduce the bottom-right portion of x11 to [ i 0 ] do i = m - q + 1, p call stdlib${ii}$_${ci}$lacgv( q-i+1, x11(i,i), ldx11 ) call stdlib${ii}$_${ci}$larfgp( q-i+1, x11(i,i), x11(i,i+1), ldx11, tauq1(i) ) x11(i,i) = cone call stdlib${ii}$_${ci}$larf( 'R', p-i, q-i+1, x11(i,i), ldx11, tauq1(i),x11(i+1,i), ldx11, & work(ilarf) ) call stdlib${ii}$_${ci}$larf( 'R', q-p, q-i+1, x11(i,i), ldx11, tauq1(i),x21(m-q+1,i), ldx21, & work(ilarf) ) call stdlib${ii}$_${ci}$lacgv( q-i+1, x11(i,i), ldx11 ) end do ! reduce the bottom-right portion of x21 to [ 0 i ] do i = p + 1, q call stdlib${ii}$_${ci}$lacgv( q-i+1, x21(m-q+i-p,i), ldx21 ) call stdlib${ii}$_${ci}$larfgp( q-i+1, x21(m-q+i-p,i), x21(m-q+i-p,i+1), ldx21,tauq1(i) ) x21(m-q+i-p,i) = cone call stdlib${ii}$_${ci}$larf( 'R', q-i, q-i+1, x21(m-q+i-p,i), ldx21, tauq1(i),x21(m-q+i-p+1,i)& , ldx21, work(ilarf) ) call stdlib${ii}$_${ci}$lacgv( q-i+1, x21(m-q+i-p,i), ldx21 ) end do return end subroutine stdlib${ii}$_${ci}$unbdb4 #:endif #:endfor pure module subroutine stdlib${ii}$_cunbdb5( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & !! CUNBDB5 orthogonalizes the column vector !! X = [ X1 ] !! [ X2 ] !! with respect to the columns of !! Q = [ Q1 ] . !! [ Q2 ] !! The columns of Q must be orthonormal. !! If the projection is zero according to Kahan's "twice is enough" !! criterion, then some other vector from the orthogonal complement !! is returned. This vector is chosen in an arbitrary but deterministic !! way. lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: incx1, incx2, ldq1, ldq2, lwork, m1, m2, n integer(${ik}$), intent(out) :: info ! Array Arguments complex(sp), intent(in) :: q1(ldq1,*), q2(ldq2,*) complex(sp), intent(out) :: work(*) complex(sp), intent(inout) :: x1(*), x2(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: childinfo, i, j ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ if( m1 < 0_${ik}$ ) then info = -1_${ik}$ else if( m2 < 0_${ik}$ ) then info = -2_${ik}$ else if( n < 0_${ik}$ ) then info = -3_${ik}$ else if( incx1 < 1_${ik}$ ) then info = -5_${ik}$ else if( incx2 < 1_${ik}$ ) then info = -7_${ik}$ else if( ldq1 < max( 1_${ik}$, m1 ) ) then info = -9_${ik}$ else if( ldq2 < max( 1_${ik}$, m2 ) ) then info = -11_${ik}$ else if( lwork < n ) then info = -13_${ik}$ end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CUNBDB5', -info ) return end if ! project x onto the orthogonal complement of q call stdlib${ii}$_cunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2, ldq2,work, lwork, & childinfo ) ! if the projection is nonzero, then return if( stdlib${ii}$_scnrm2(m1,x1,incx1) /= czero.or. stdlib${ii}$_scnrm2(m2,x2,incx2) /= czero ) & then return end if ! project each standard basis vector e_1,...,e_m1 in turn, stopping ! when a nonzero projection is found do i = 1, m1 do j = 1, m1 x1(j) = czero end do x1(i) = cone do j = 1, m2 x2(j) = czero end do call stdlib${ii}$_cunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & lwork, childinfo ) if( stdlib${ii}$_scnrm2(m1,x1,incx1) /= czero.or. stdlib${ii}$_scnrm2(m2,x2,incx2) /= czero ) & then return end if end do ! project each standard basis vector e_(m1+1),...,e_(m1+m2) in turn, ! stopping when a nonzero projection is found do i = 1, m2 do j = 1, m1 x1(j) = czero end do do j = 1, m2 x2(j) = czero end do x2(i) = cone call stdlib${ii}$_cunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & lwork, childinfo ) if( stdlib${ii}$_scnrm2(m1,x1,incx1) /= czero.or. stdlib${ii}$_scnrm2(m2,x2,incx2) /= czero ) & then return end if end do return end subroutine stdlib${ii}$_cunbdb5 pure module subroutine stdlib${ii}$_zunbdb5( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & !! ZUNBDB5 orthogonalizes the column vector !! X = [ X1 ] !! [ X2 ] !! with respect to the columns of !! Q = [ Q1 ] . !! [ Q2 ] !! The columns of Q must be orthonormal. !! If the projection is zero according to Kahan's "twice is enough" !! criterion, then some other vector from the orthogonal complement !! is returned. This vector is chosen in an arbitrary but deterministic !! way. lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: incx1, incx2, ldq1, ldq2, lwork, m1, m2, n integer(${ik}$), intent(out) :: info ! Array Arguments complex(dp), intent(in) :: q1(ldq1,*), q2(ldq2,*) complex(dp), intent(out) :: work(*) complex(dp), intent(inout) :: x1(*), x2(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: childinfo, i, j ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ if( m1 < 0_${ik}$ ) then info = -1_${ik}$ else if( m2 < 0_${ik}$ ) then info = -2_${ik}$ else if( n < 0_${ik}$ ) then info = -3_${ik}$ else if( incx1 < 1_${ik}$ ) then info = -5_${ik}$ else if( incx2 < 1_${ik}$ ) then info = -7_${ik}$ else if( ldq1 < max( 1_${ik}$, m1 ) ) then info = -9_${ik}$ else if( ldq2 < max( 1_${ik}$, m2 ) ) then info = -11_${ik}$ else if( lwork < n ) then info = -13_${ik}$ end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNBDB5', -info ) return end if ! project x onto the orthogonal complement of q call stdlib${ii}$_zunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2, ldq2,work, lwork, & childinfo ) ! if the projection is nonzero, then return if( stdlib${ii}$_dznrm2(m1,x1,incx1) /= czero.or. stdlib${ii}$_dznrm2(m2,x2,incx2) /= czero ) & then return end if ! project each standard basis vector e_1,...,e_m1 in turn, stopping ! when a nonzero projection is found do i = 1, m1 do j = 1, m1 x1(j) = czero end do x1(i) = cone do j = 1, m2 x2(j) = czero end do call stdlib${ii}$_zunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & lwork, childinfo ) if( stdlib${ii}$_dznrm2(m1,x1,incx1) /= czero.or. stdlib${ii}$_dznrm2(m2,x2,incx2) /= czero ) & then return end if end do ! project each standard basis vector e_(m1+1),...,e_(m1+m2) in turn, ! stopping when a nonzero projection is found do i = 1, m2 do j = 1, m1 x1(j) = czero end do do j = 1, m2 x2(j) = czero end do x2(i) = cone call stdlib${ii}$_zunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & lwork, childinfo ) if( stdlib${ii}$_dznrm2(m1,x1,incx1) /= czero.or. stdlib${ii}$_dznrm2(m2,x2,incx2) /= czero ) & then return end if end do return end subroutine stdlib${ii}$_zunbdb5 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$unbdb5( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & !! ZUNBDB5: orthogonalizes the column vector !! X = [ X1 ] !! [ X2 ] !! with respect to the columns of !! Q = [ Q1 ] . !! [ Q2 ] !! The columns of Q must be orthonormal. !! If the projection is zero according to Kahan's "twice is enough" !! criterion, then some other vector from the orthogonal complement !! is returned. This vector is chosen in an arbitrary but deterministic !! way. lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: incx1, incx2, ldq1, ldq2, lwork, m1, m2, n integer(${ik}$), intent(out) :: info ! Array Arguments complex(${ck}$), intent(in) :: q1(ldq1,*), q2(ldq2,*) complex(${ck}$), intent(out) :: work(*) complex(${ck}$), intent(inout) :: x1(*), x2(*) ! ===================================================================== ! Local Scalars integer(${ik}$) :: childinfo, i, j ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ if( m1 < 0_${ik}$ ) then info = -1_${ik}$ else if( m2 < 0_${ik}$ ) then info = -2_${ik}$ else if( n < 0_${ik}$ ) then info = -3_${ik}$ else if( incx1 < 1_${ik}$ ) then info = -5_${ik}$ else if( incx2 < 1_${ik}$ ) then info = -7_${ik}$ else if( ldq1 < max( 1_${ik}$, m1 ) ) then info = -9_${ik}$ else if( ldq2 < max( 1_${ik}$, m2 ) ) then info = -11_${ik}$ else if( lwork < n ) then info = -13_${ik}$ end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNBDB5', -info ) return end if ! project x onto the orthogonal complement of q call stdlib${ii}$_${ci}$unbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2, ldq2,work, lwork, & childinfo ) ! if the projection is nonzero, then return if( stdlib${ii}$_${c2ri(ci)}$znrm2(m1,x1,incx1) /= czero.or. stdlib${ii}$_${c2ri(ci)}$znrm2(m2,x2,incx2) /= czero ) & then return end if ! project each standard basis vector e_1,...,e_m1 in turn, stopping ! when a nonzero projection is found do i = 1, m1 do j = 1, m1 x1(j) = czero end do x1(i) = cone do j = 1, m2 x2(j) = czero end do call stdlib${ii}$_${ci}$unbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & lwork, childinfo ) if( stdlib${ii}$_${c2ri(ci)}$znrm2(m1,x1,incx1) /= czero.or. stdlib${ii}$_${c2ri(ci)}$znrm2(m2,x2,incx2) /= czero ) & then return end if end do ! project each standard basis vector e_(m1+1),...,e_(m1+m2) in turn, ! stopping when a nonzero projection is found do i = 1, m2 do j = 1, m1 x1(j) = czero end do do j = 1, m2 x2(j) = czero end do x2(i) = cone call stdlib${ii}$_${ci}$unbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & lwork, childinfo ) if( stdlib${ii}$_${c2ri(ci)}$znrm2(m1,x1,incx1) /= czero.or. stdlib${ii}$_${c2ri(ci)}$znrm2(m2,x2,incx2) /= czero ) & then return end if end do return end subroutine stdlib${ii}$_${ci}$unbdb5 #:endif #:endfor pure module subroutine stdlib${ii}$_cunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, lwork, info ) !! CUNBDB6 orthogonalizes the column vector !! X = [ X1 ] !! [ X2 ] !! with respect to the columns of !! Q = [ Q1 ] . !! [ Q2 ] !! The columns of Q must be orthonormal. !! If the projection is zero according to Kahan's "twice is enough" !! criterion, then the zero vector is returned. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: incx1, incx2, ldq1, ldq2, lwork, m1, m2, n integer(${ik}$), intent(out) :: info ! Array Arguments complex(sp), intent(in) :: q1(ldq1,*), q2(ldq2,*) complex(sp), intent(out) :: work(*) complex(sp), intent(inout) :: x1(*), x2(*) ! ===================================================================== ! Parameters real(sp), parameter :: alphasq = 0.01_sp ! Local Scalars integer(${ik}$) :: i real(sp) :: normsq1, normsq2, scl1, scl2, ssq1, ssq2 ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ if( m1 < 0_${ik}$ ) then info = -1_${ik}$ else if( m2 < 0_${ik}$ ) then info = -2_${ik}$ else if( n < 0_${ik}$ ) then info = -3_${ik}$ else if( incx1 < 1_${ik}$ ) then info = -5_${ik}$ else if( incx2 < 1_${ik}$ ) then info = -7_${ik}$ else if( ldq1 < max( 1_${ik}$, m1 ) ) then info = -9_${ik}$ else if( ldq2 < max( 1_${ik}$, m2 ) ) then info = -11_${ik}$ else if( lwork < n ) then info = -13_${ik}$ end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CUNBDB6', -info ) return end if ! first, project x onto the orthogonal complement of q's column ! space scl1 = zero ssq1 = one call stdlib${ii}$_classq( m1, x1, incx1, scl1, ssq1 ) scl2 = zero ssq2 = one call stdlib${ii}$_classq( m2, x2, incx2, scl2, ssq2 ) normsq1 = scl1**2_${ik}$*ssq1 + scl2**2_${ik}$*ssq2 if( m1 == 0_${ik}$ ) then do i = 1, n work(i) = czero end do else call stdlib${ii}$_cgemv( 'C', m1, n, cone, q1, ldq1, x1, incx1, czero, work,1_${ik}$ ) end if call stdlib${ii}$_cgemv( 'C', m2, n, cone, q2, ldq2, x2, incx2, cone, work, 1_${ik}$ ) call stdlib${ii}$_cgemv( 'N', m1, n, cnegone, q1, ldq1, work, 1_${ik}$, cone, x1,incx1 ) call stdlib${ii}$_cgemv( 'N', m2, n, cnegone, q2, ldq2, work, 1_${ik}$, cone, x2,incx2 ) scl1 = zero ssq1 = one call stdlib${ii}$_classq( m1, x1, incx1, scl1, ssq1 ) scl2 = zero ssq2 = one call stdlib${ii}$_classq( m2, x2, incx2, scl2, ssq2 ) normsq2 = scl1**2_${ik}$*ssq1 + scl2**2_${ik}$*ssq2 ! if projection is sufficiently large in norm, then stop. ! if projection is czero, then stop. ! otherwise, project again. if( normsq2 >= alphasq*normsq1 ) then return end if if( normsq2 == czero ) then return end if normsq1 = normsq2 do i = 1, n work(i) = czero end do if( m1 == 0_${ik}$ ) then do i = 1, n work(i) = czero end do else call stdlib${ii}$_cgemv( 'C', m1, n, cone, q1, ldq1, x1, incx1, czero, work,1_${ik}$ ) end if call stdlib${ii}$_cgemv( 'C', m2, n, cone, q2, ldq2, x2, incx2, cone, work, 1_${ik}$ ) call stdlib${ii}$_cgemv( 'N', m1, n, cnegone, q1, ldq1, work, 1_${ik}$, cone, x1,incx1 ) call stdlib${ii}$_cgemv( 'N', m2, n, cnegone, q2, ldq2, work, 1_${ik}$, cone, x2,incx2 ) scl1 = zero ssq1 = one call stdlib${ii}$_classq( m1, x1, incx1, scl1, ssq1 ) scl2 = zero ssq2 = one call stdlib${ii}$_classq( m1, x1, incx1, scl1, ssq1 ) normsq2 = scl1**2_${ik}$*ssq1 + scl2**2_${ik}$*ssq2 ! if second projection is sufficiently large in norm, then do ! nothing more. alternatively, if it shrunk significantly, then ! truncate it to czero. if( normsq2 < alphasq*normsq1 ) then do i = 1, m1 x1(i) = czero end do do i = 1, m2 x2(i) = czero end do end if return end subroutine stdlib${ii}$_cunbdb6 pure module subroutine stdlib${ii}$_zunbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & !! ZUNBDB6 orthogonalizes the column vector !! X = [ X1 ] !! [ X2 ] !! with respect to the columns of !! Q = [ Q1 ] . !! [ Q2 ] !! The columns of Q must be orthonormal. !! If the projection is zero according to Kahan's "twice is enough" !! criterion, then the zero vector is returned. lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: incx1, incx2, ldq1, ldq2, lwork, m1, m2, n integer(${ik}$), intent(out) :: info ! Array Arguments complex(dp), intent(in) :: q1(ldq1,*), q2(ldq2,*) complex(dp), intent(out) :: work(*) complex(dp), intent(inout) :: x1(*), x2(*) ! ===================================================================== ! Parameters real(dp), parameter :: alphasq = 0.01_dp ! Local Scalars integer(${ik}$) :: i real(dp) :: normsq1, normsq2, scl1, scl2, ssq1, ssq2 ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ if( m1 < 0_${ik}$ ) then info = -1_${ik}$ else if( m2 < 0_${ik}$ ) then info = -2_${ik}$ else if( n < 0_${ik}$ ) then info = -3_${ik}$ else if( incx1 < 1_${ik}$ ) then info = -5_${ik}$ else if( incx2 < 1_${ik}$ ) then info = -7_${ik}$ else if( ldq1 < max( 1_${ik}$, m1 ) ) then info = -9_${ik}$ else if( ldq2 < max( 1_${ik}$, m2 ) ) then info = -11_${ik}$ else if( lwork < n ) then info = -13_${ik}$ end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNBDB6', -info ) return end if ! first, project x onto the orthogonal complement of q's column ! space scl1 = zero ssq1 = one call stdlib${ii}$_zlassq( m1, x1, incx1, scl1, ssq1 ) scl2 = zero ssq2 = one call stdlib${ii}$_zlassq( m2, x2, incx2, scl2, ssq2 ) normsq1 = scl1**2_${ik}$*ssq1 + scl2**2_${ik}$*ssq2 if( m1 == 0_${ik}$ ) then do i = 1, n work(i) = czero end do else call stdlib${ii}$_zgemv( 'C', m1, n, cone, q1, ldq1, x1, incx1, czero, work,1_${ik}$ ) end if call stdlib${ii}$_zgemv( 'C', m2, n, cone, q2, ldq2, x2, incx2, cone, work, 1_${ik}$ ) call stdlib${ii}$_zgemv( 'N', m1, n, cnegone, q1, ldq1, work, 1_${ik}$, cone, x1,incx1 ) call stdlib${ii}$_zgemv( 'N', m2, n, cnegone, q2, ldq2, work, 1_${ik}$, cone, x2,incx2 ) scl1 = zero ssq1 = one call stdlib${ii}$_zlassq( m1, x1, incx1, scl1, ssq1 ) scl2 = zero ssq2 = one call stdlib${ii}$_zlassq( m2, x2, incx2, scl2, ssq2 ) normsq2 = scl1**2_${ik}$*ssq1 + scl2**2_${ik}$*ssq2 ! if projection is sufficiently large in norm, then stop. ! if projection is czero, then stop. ! otherwise, project again. if( normsq2 >= alphasq*normsq1 ) then return end if if( normsq2 == czero ) then return end if normsq1 = normsq2 do i = 1, n work(i) = czero end do if( m1 == 0_${ik}$ ) then do i = 1, n work(i) = czero end do else call stdlib${ii}$_zgemv( 'C', m1, n, cone, q1, ldq1, x1, incx1, czero, work,1_${ik}$ ) end if call stdlib${ii}$_zgemv( 'C', m2, n, cone, q2, ldq2, x2, incx2, cone, work, 1_${ik}$ ) call stdlib${ii}$_zgemv( 'N', m1, n, cnegone, q1, ldq1, work, 1_${ik}$, cone, x1,incx1 ) call stdlib${ii}$_zgemv( 'N', m2, n, cnegone, q2, ldq2, work, 1_${ik}$, cone, x2,incx2 ) scl1 = zero ssq1 = one call stdlib${ii}$_zlassq( m1, x1, incx1, scl1, ssq1 ) scl2 = zero ssq2 = one call stdlib${ii}$_zlassq( m1, x1, incx1, scl1, ssq1 ) normsq2 = scl1**2_${ik}$*ssq1 + scl2**2_${ik}$*ssq2 ! if second projection is sufficiently large in norm, then do ! nothing more. alternatively, if it shrunk significantly, then ! truncate it to czero. if( normsq2 < alphasq*normsq1 ) then do i = 1, m1 x1(i) = czero end do do i = 1, m2 x2(i) = czero end do end if return end subroutine stdlib${ii}$_zunbdb6 #:for ck,ct,ci in CMPLX_KINDS_TYPES #:if not ck in ["sp","dp"] pure module subroutine stdlib${ii}$_${ci}$unbdb6( m1, m2, n, x1, incx1, x2, incx2, q1, ldq1, q2,ldq2, work, & !! ZUNBDB6: orthogonalizes the column vector !! X = [ X1 ] !! [ X2 ] !! with respect to the columns of !! Q = [ Q1 ] . !! [ Q2 ] !! The columns of Q must be orthonormal. !! If the projection is zero according to Kahan's "twice is enough" !! criterion, then the zero vector is returned. lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${ck}$, only: negone, zero, half, one, two, three, four, eight, ten, czero, chalf, cone, cnegone ! Scalar Arguments integer(${ik}$), intent(in) :: incx1, incx2, ldq1, ldq2, lwork, m1, m2, n integer(${ik}$), intent(out) :: info ! Array Arguments complex(${ck}$), intent(in) :: q1(ldq1,*), q2(ldq2,*) complex(${ck}$), intent(out) :: work(*) complex(${ck}$), intent(inout) :: x1(*), x2(*) ! ===================================================================== ! Parameters real(${ck}$), parameter :: alphasq = 0.01_${ck}$ ! Local Scalars integer(${ik}$) :: i real(${ck}$) :: normsq1, normsq2, scl1, scl2, ssq1, ssq2 ! Intrinsic Function ! Executable Statements ! test input arguments info = 0_${ik}$ if( m1 < 0_${ik}$ ) then info = -1_${ik}$ else if( m2 < 0_${ik}$ ) then info = -2_${ik}$ else if( n < 0_${ik}$ ) then info = -3_${ik}$ else if( incx1 < 1_${ik}$ ) then info = -5_${ik}$ else if( incx2 < 1_${ik}$ ) then info = -7_${ik}$ else if( ldq1 < max( 1_${ik}$, m1 ) ) then info = -9_${ik}$ else if( ldq2 < max( 1_${ik}$, m2 ) ) then info = -11_${ik}$ else if( lwork < n ) then info = -13_${ik}$ end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'ZUNBDB6', -info ) return end if ! first, project x onto the orthogonal complement of q's column ! space scl1 = zero ssq1 = one call stdlib${ii}$_${ci}$lassq( m1, x1, incx1, scl1, ssq1 ) scl2 = zero ssq2 = one call stdlib${ii}$_${ci}$lassq( m2, x2, incx2, scl2, ssq2 ) normsq1 = scl1**2_${ik}$*ssq1 + scl2**2_${ik}$*ssq2 if( m1 == 0_${ik}$ ) then do i = 1, n work(i) = czero end do else call stdlib${ii}$_${ci}$gemv( 'C', m1, n, cone, q1, ldq1, x1, incx1, czero, work,1_${ik}$ ) end if call stdlib${ii}$_${ci}$gemv( 'C', m2, n, cone, q2, ldq2, x2, incx2, cone, work, 1_${ik}$ ) call stdlib${ii}$_${ci}$gemv( 'N', m1, n, cnegone, q1, ldq1, work, 1_${ik}$, cone, x1,incx1 ) call stdlib${ii}$_${ci}$gemv( 'N', m2, n, cnegone, q2, ldq2, work, 1_${ik}$, cone, x2,incx2 ) scl1 = zero ssq1 = one call stdlib${ii}$_${ci}$lassq( m1, x1, incx1, scl1, ssq1 ) scl2 = zero ssq2 = one call stdlib${ii}$_${ci}$lassq( m2, x2, incx2, scl2, ssq2 ) normsq2 = scl1**2_${ik}$*ssq1 + scl2**2_${ik}$*ssq2 ! if projection is sufficiently large in norm, then stop. ! if projection is czero, then stop. ! otherwise, project again. if( normsq2 >= alphasq*normsq1 ) then return end if if( normsq2 == czero ) then return end if normsq1 = normsq2 do i = 1, n work(i) = czero end do if( m1 == 0_${ik}$ ) then do i = 1, n work(i) = czero end do else call stdlib${ii}$_${ci}$gemv( 'C', m1, n, cone, q1, ldq1, x1, incx1, czero, work,1_${ik}$ ) end if call stdlib${ii}$_${ci}$gemv( 'C', m2, n, cone, q2, ldq2, x2, incx2, cone, work, 1_${ik}$ ) call stdlib${ii}$_${ci}$gemv( 'N', m1, n, cnegone, q1, ldq1, work, 1_${ik}$, cone, x1,incx1 ) call stdlib${ii}$_${ci}$gemv( 'N', m2, n, cnegone, q2, ldq2, work, 1_${ik}$, cone, x2,incx2 ) scl1 = zero ssq1 = one call stdlib${ii}$_${ci}$lassq( m1, x1, incx1, scl1, ssq1 ) scl2 = zero ssq2 = one call stdlib${ii}$_${ci}$lassq( m1, x1, incx1, scl1, ssq1 ) normsq2 = scl1**2_${ik}$*ssq1 + scl2**2_${ik}$*ssq2 ! if second projection is sufficiently large in norm, then do ! nothing more. alternatively, if it shrunk significantly, then ! truncate it to czero. if( normsq2 < alphasq*normsq1 ) then do i = 1, m1 x1(i) = czero end do do i = 1, m2 x2(i) = czero end do end if return end subroutine stdlib${ii}$_${ci}$unbdb6 #:endif #:endfor #:endfor end submodule stdlib_lapack_cosine_sine