#:include "common.fypp" submodule(stdlib_lapack_eig_svd_lsq) stdlib_lapack_cosine_sine implicit none contains #:for ik,it,ii in LINALG_INT_KINDS_TYPES pure module subroutine stdlib${ii}$_sbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, u1, & !! SBBCSD computes the CS decomposition of an orthogonal matrix in !! bidiagonal-block form, !! [ B11 | B12 0 0 ] !! [ 0 | 0 -I 0 ] !! X = [----------------] !! [ B21 | B22 0 0 ] !! [ 0 | 0 0 I ] !! [ C | -S 0 0 ] !! [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**T !! = [---------] [---------------] [---------] . !! [ | U2 ] [ S | C 0 0 ] [ | V2 ] !! [ 0 | 0 0 I ] !! X is M-by-M, its top-left block is P-by-Q, and Q must be no larger !! than P, M-P, or M-Q. (If Q is not the smallest index, then X must be !! transposed and/or permuted. This can be done in constant time using !! the TRANS and SIGNS options. See SORCSD for details.) !! The bidiagonal matrices B11, B12, B21, and B22 are represented !! implicitly by angles THETA(1:Q) and PHI(1:Q-1). !! The orthogonal matrices U1, U2, V1T, and V2T are input/output. !! The input matrices are pre- or post-multiplied by the appropriate !! singular vector matrices. ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d, b22e, work, & lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, one, ten, cnegone ! Scalar Arguments character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, lwork, m, p, q ! Array Arguments real(sp), intent(out) :: b11d(*), b11e(*), b12d(*), b12e(*), b21d(*), b21e(*), b22d(*),& b22e(*), work(*) real(sp), intent(inout) :: phi(*), theta(*) real(sp), intent(inout) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*) ! =================================================================== ! Parameters integer(${ik}$), parameter :: maxitr = 6_${ik}$ real(sp), parameter :: hundred = 100.0_sp real(sp), parameter :: meighth = -0.125_sp real(sp), parameter :: piover2 = 1.57079632679489661923132169163975144210_sp ! Local Scalars logical(lk) :: colmajor, lquery, restart11, restart12, restart21, restart22, wantu1, & wantu2, wantv1t, wantv2t integer(${ik}$) :: i, imin, imax, iter, iu1cs, iu1sn, iu2cs, iu2sn, iv1tcs, iv1tsn, & iv2tcs, iv2tsn, j, lworkmin, lworkopt, maxit, mini real(sp) :: b11bulge, b12bulge, b21bulge, b22bulge, dummy, eps, mu, nu, r, sigma11, & sigma21, temp, thetamax, thetamin, thresh, tol, tolmul, unfl, x1, x2, y1, y2 ! Intrinsic Functions ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lwork == -1_${ik}$ wantu1 = stdlib_lsame( jobu1, 'Y' ) wantu2 = stdlib_lsame( jobu2, 'Y' ) wantv1t = stdlib_lsame( jobv1t, 'Y' ) wantv2t = stdlib_lsame( jobv2t, 'Y' ) colmajor = .not. stdlib_lsame( trans, 'T' ) if( m < 0_${ik}$ ) then info = -6_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -7_${ik}$ else if( q < 0_${ik}$ .or. q > m ) then info = -8_${ik}$ else if( q > p .or. q > m-p .or. q > m-q ) then info = -8_${ik}$ else if( wantu1 .and. ldu1 < p ) then info = -12_${ik}$ else if( wantu2 .and. ldu2 < m-p ) then info = -14_${ik}$ else if( wantv1t .and. ldv1t < q ) then info = -16_${ik}$ else if( wantv2t .and. ldv2t < m-q ) then info = -18_${ik}$ end if ! quick return if q = 0 if( info == 0_${ik}$ .and. q == 0_${ik}$ ) then lworkmin = 1_${ik}$ work(1_${ik}$) = lworkmin return end if ! compute workspace if( info == 0_${ik}$ ) then iu1cs = 1_${ik}$ iu1sn = iu1cs + q iu2cs = iu1sn + q iu2sn = iu2cs + q iv1tcs = iu2sn + q iv1tsn = iv1tcs + q iv2tcs = iv1tsn + q iv2tsn = iv2tcs + q lworkopt = iv2tsn + q - 1_${ik}$ lworkmin = lworkopt work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not. lquery ) then info = -28_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'SBBCSD', -info ) return else if( lquery ) then return end if ! get machine constants eps = stdlib${ii}$_slamch( 'EPSILON' ) unfl = stdlib${ii}$_slamch( 'SAFE MINIMUM' ) tolmul = max( ten, min( hundred, eps**meighth ) ) tol = tolmul*eps thresh = max( tol, maxitr*q*q*unfl ) ! test for negligible sines or cosines do i = 1, q if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = 1, q-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! initial deflation imax = q do while( imax > 1 ) if( phi(imax-1) /= zero ) then exit end if imax = imax - 1_${ik}$ end do imin = imax - 1_${ik}$ if ( imin > 1_${ik}$ ) then do while( phi(imin-1) /= zero ) imin = imin - 1_${ik}$ if ( imin <= 1 ) exit end do end if ! initialize iteration counter maxit = maxitr*q*q iter = 0_${ik}$ ! begin main iteration loop do while( imax > 1 ) ! compute the matrix entries b11d(imin) = cos( theta(imin) ) b21d(imin) = -sin( theta(imin) ) do i = imin, imax - 1 b11e(i) = -sin( theta(i) ) * sin( phi(i) ) b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) ) b12d(i) = sin( theta(i) ) * cos( phi(i) ) b12e(i) = cos( theta(i+1) ) * sin( phi(i) ) b21e(i) = -cos( theta(i) ) * sin( phi(i) ) b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) ) b22d(i) = cos( theta(i) ) * cos( phi(i) ) b22e(i) = -sin( theta(i+1) ) * sin( phi(i) ) end do b12d(imax) = sin( theta(imax) ) b22d(imax) = cos( theta(imax) ) ! abort if not converging; otherwise, increment iter if( iter > maxit ) then info = 0_${ik}$ do i = 1, q if( phi(i) /= zero )info = info + 1_${ik}$ end do return end if iter = iter + imax - imin ! compute shifts thetamax = theta(imin) thetamin = theta(imin) do i = imin+1, imax if( theta(i) > thetamax )thetamax = theta(i) if( theta(i) < thetamin )thetamin = theta(i) end do if( thetamax > piover2 - thresh ) then ! zero on diagonals of b11 and b22; induce deflation with a ! zero shift mu = zero nu = one else if( thetamin < thresh ) then ! zero on diagonals of b12 and b22; induce deflation with a ! zero shift mu = one nu = zero else ! compute shifts for b11 and b21 and use the lesser call stdlib${ii}$_slas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,dummy ) call stdlib${ii}$_slas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,dummy ) if( sigma11 <= sigma21 ) then mu = sigma11 nu = sqrt( one - mu**2_${ik}$ ) if( mu < thresh ) then mu = zero nu = one end if else nu = sigma21 mu = sqrt( one - nu**2_${ik}$ ) if( nu < thresh ) then mu = one nu = zero end if end if end if ! rotate to produce bulges in b11 and b21 if( mu <= nu ) then call stdlib${ii}$_slartgs( b11d(imin), b11e(imin), mu,work(iv1tcs+imin-1), work(iv1tsn+& imin-1) ) else call stdlib${ii}$_slartgs( b21d(imin), b21e(imin), nu,work(iv1tcs+imin-1), work(iv1tsn+& imin-1) ) end if temp = work(iv1tcs+imin-1)*b11d(imin) +work(iv1tsn+imin-1)*b11e(imin) b11e(imin) = work(iv1tcs+imin-1)*b11e(imin) -work(iv1tsn+imin-1)*b11d(imin) b11d(imin) = temp b11bulge = work(iv1tsn+imin-1)*b11d(imin+1) b11d(imin+1) = work(iv1tcs+imin-1)*b11d(imin+1) temp = work(iv1tcs+imin-1)*b21d(imin) +work(iv1tsn+imin-1)*b21e(imin) b21e(imin) = work(iv1tcs+imin-1)*b21e(imin) -work(iv1tsn+imin-1)*b21d(imin) b21d(imin) = temp b21bulge = work(iv1tsn+imin-1)*b21d(imin+1) b21d(imin+1) = work(iv1tcs+imin-1)*b21d(imin+1) ! compute theta(imin) theta( imin ) = atan2( sqrt( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ ),sqrt( b11d(imin)**2_${ik}$+& b11bulge**2_${ik}$ ) ) ! chase the bulges in b11(imin+1,imin) and b21(imin+1,imin) if( b11d(imin)**2_${ik}$+b11bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_slartgp( b11bulge, b11d(imin), work(iu1sn+imin-1),work(iu1cs+imin-1),& r ) else if( mu <= nu ) then call stdlib${ii}$_slartgs( b11e( imin ), b11d( imin + 1_${ik}$ ), mu,work(iu1cs+imin-1), work(& iu1sn+imin-1) ) else call stdlib${ii}$_slartgs( b12d( imin ), b12e( imin ), nu,work(iu1cs+imin-1), work(& iu1sn+imin-1) ) end if if( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_slartgp( b21bulge, b21d(imin), work(iu2sn+imin-1),work(iu2cs+imin-1),& r ) else if( nu < mu ) then call stdlib${ii}$_slartgs( b21e( imin ), b21d( imin + 1_${ik}$ ), nu,work(iu2cs+imin-1), work(& iu2sn+imin-1) ) else call stdlib${ii}$_slartgs( b22d(imin), b22e(imin), mu,work(iu2cs+imin-1), work(iu2sn+& imin-1) ) end if work(iu2cs+imin-1) = -work(iu2cs+imin-1) work(iu2sn+imin-1) = -work(iu2sn+imin-1) temp = work(iu1cs+imin-1)*b11e(imin) +work(iu1sn+imin-1)*b11d(imin+1) b11d(imin+1) = work(iu1cs+imin-1)*b11d(imin+1) -work(iu1sn+imin-1)*b11e(imin) b11e(imin) = temp if( imax > imin+1 ) then b11bulge = work(iu1sn+imin-1)*b11e(imin+1) b11e(imin+1) = work(iu1cs+imin-1)*b11e(imin+1) end if temp = work(iu1cs+imin-1)*b12d(imin) +work(iu1sn+imin-1)*b12e(imin) b12e(imin) = work(iu1cs+imin-1)*b12e(imin) -work(iu1sn+imin-1)*b12d(imin) b12d(imin) = temp b12bulge = work(iu1sn+imin-1)*b12d(imin+1) b12d(imin+1) = work(iu1cs+imin-1)*b12d(imin+1) temp = work(iu2cs+imin-1)*b21e(imin) +work(iu2sn+imin-1)*b21d(imin+1) b21d(imin+1) = work(iu2cs+imin-1)*b21d(imin+1) -work(iu2sn+imin-1)*b21e(imin) b21e(imin) = temp if( imax > imin+1 ) then b21bulge = work(iu2sn+imin-1)*b21e(imin+1) b21e(imin+1) = work(iu2cs+imin-1)*b21e(imin+1) end if temp = work(iu2cs+imin-1)*b22d(imin) +work(iu2sn+imin-1)*b22e(imin) b22e(imin) = work(iu2cs+imin-1)*b22e(imin) -work(iu2sn+imin-1)*b22d(imin) b22d(imin) = temp b22bulge = work(iu2sn+imin-1)*b22d(imin+1) b22d(imin+1) = work(iu2cs+imin-1)*b22d(imin+1) ! inner loop: chase bulges from b11(imin,imin+2), ! b12(imin,imin+1), b21(imin,imin+2), and b22(imin,imin+1) to ! bottom-right do i = imin+1, imax-1 ! compute phi(i-1) x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1) x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1) y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge phi(i-1) = atan2( sqrt(x1**2_${ik}$+x2**2_${ik}$), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11e(i-1)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21e(i-1)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12d(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i-1,i+1), b12(i-1,i), ! b21(i-1,i+1), and b22(i-1,i). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart21 ) then call stdlib${ii}$_slartgp( x2, x1, work(iv1tsn+i-1), work(iv1tcs+i-1),r ) else if( .not. restart11 .and. restart21 ) then call stdlib${ii}$_slartgp( b11bulge, b11e(i-1), work(iv1tsn+i-1),work(iv1tcs+i-1), & r ) else if( restart11 .and. .not. restart21 ) then call stdlib${ii}$_slartgp( b21bulge, b21e(i-1), work(iv1tsn+i-1),work(iv1tcs+i-1), & r ) else if( mu <= nu ) then call stdlib${ii}$_slartgs( b11d(i), b11e(i), mu, work(iv1tcs+i-1),work(iv1tsn+i-1) ) else call stdlib${ii}$_slartgs( b21d(i), b21e(i), nu, work(iv1tcs+i-1),work(iv1tsn+i-1) ) end if work(iv1tcs+i-1) = -work(iv1tcs+i-1) work(iv1tsn+i-1) = -work(iv1tsn+i-1) if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( y2, y1, work(iv2tsn+i-1-1),work(iv2tcs+i-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_slartgp( b12bulge, b12d(i-1), work(iv2tsn+i-1-1),work(iv2tcs+i-1-& 1_${ik}$), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( b22bulge, b22d(i-1), work(iv2tsn+i-1-1),work(iv2tcs+i-1-& 1_${ik}$), r ) else if( nu < mu ) then call stdlib${ii}$_slartgs( b12e(i-1), b12d(i), nu, work(iv2tcs+i-1-1),work(iv2tsn+i-& 1_${ik}$-1) ) else call stdlib${ii}$_slartgs( b22e(i-1), b22d(i), mu, work(iv2tcs+i-1-1),work(iv2tsn+i-& 1_${ik}$-1) ) end if temp = work(iv1tcs+i-1)*b11d(i) + work(iv1tsn+i-1)*b11e(i) b11e(i) = work(iv1tcs+i-1)*b11e(i) -work(iv1tsn+i-1)*b11d(i) b11d(i) = temp b11bulge = work(iv1tsn+i-1)*b11d(i+1) b11d(i+1) = work(iv1tcs+i-1)*b11d(i+1) temp = work(iv1tcs+i-1)*b21d(i) + work(iv1tsn+i-1)*b21e(i) b21e(i) = work(iv1tcs+i-1)*b21e(i) -work(iv1tsn+i-1)*b21d(i) b21d(i) = temp b21bulge = work(iv1tsn+i-1)*b21d(i+1) b21d(i+1) = work(iv1tcs+i-1)*b21d(i+1) temp = work(iv2tcs+i-1-1)*b12e(i-1) +work(iv2tsn+i-1-1)*b12d(i) b12d(i) = work(iv2tcs+i-1-1)*b12d(i) -work(iv2tsn+i-1-1)*b12e(i-1) b12e(i-1) = temp b12bulge = work(iv2tsn+i-1-1)*b12e(i) b12e(i) = work(iv2tcs+i-1-1)*b12e(i) temp = work(iv2tcs+i-1-1)*b22e(i-1) +work(iv2tsn+i-1-1)*b22d(i) b22d(i) = work(iv2tcs+i-1-1)*b22d(i) -work(iv2tsn+i-1-1)*b22e(i-1) b22e(i-1) = temp b22bulge = work(iv2tsn+i-1-1)*b22e(i) b22e(i) = work(iv2tcs+i-1-1)*b22e(i) ! compute theta(i) x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1) x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1) y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge theta(i) = atan2( sqrt(y1**2_${ik}$+y2**2_${ik}$), sqrt(x1**2_${ik}$+x2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11d(i)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12e(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21d(i)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22e(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i+1,i), b12(i+1,i-1), ! b21(i+1,i), and b22(i+1,i-1). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart12 ) then call stdlib${ii}$_slartgp( x2, x1, work(iu1sn+i-1), work(iu1cs+i-1),r ) else if( .not. restart11 .and. restart12 ) then call stdlib${ii}$_slartgp( b11bulge, b11d(i), work(iu1sn+i-1),work(iu1cs+i-1), r ) else if( restart11 .and. .not. restart12 ) then call stdlib${ii}$_slartgp( b12bulge, b12e(i-1), work(iu1sn+i-1),work(iu1cs+i-1), r ) else if( mu <= nu ) then call stdlib${ii}$_slartgs( b11e(i), b11d(i+1), mu, work(iu1cs+i-1),work(iu1sn+i-1) ) else call stdlib${ii}$_slartgs( b12d(i), b12e(i), nu, work(iu1cs+i-1),work(iu1sn+i-1) ) end if if( .not. restart21 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( y2, y1, work(iu2sn+i-1), work(iu2cs+i-1),r ) else if( .not. restart21 .and. restart22 ) then call stdlib${ii}$_slartgp( b21bulge, b21d(i), work(iu2sn+i-1),work(iu2cs+i-1), r ) else if( restart21 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( b22bulge, b22e(i-1), work(iu2sn+i-1),work(iu2cs+i-1), r ) else if( nu < mu ) then call stdlib${ii}$_slartgs( b21e(i), b21e(i+1), nu, work(iu2cs+i-1),work(iu2sn+i-1) ) else call stdlib${ii}$_slartgs( b22d(i), b22e(i), mu, work(iu2cs+i-1),work(iu2sn+i-1) ) end if work(iu2cs+i-1) = -work(iu2cs+i-1) work(iu2sn+i-1) = -work(iu2sn+i-1) temp = work(iu1cs+i-1)*b11e(i) + work(iu1sn+i-1)*b11d(i+1) b11d(i+1) = work(iu1cs+i-1)*b11d(i+1) -work(iu1sn+i-1)*b11e(i) b11e(i) = temp if( i < imax - 1_${ik}$ ) then b11bulge = work(iu1sn+i-1)*b11e(i+1) b11e(i+1) = work(iu1cs+i-1)*b11e(i+1) end if temp = work(iu2cs+i-1)*b21e(i) + work(iu2sn+i-1)*b21d(i+1) b21d(i+1) = work(iu2cs+i-1)*b21d(i+1) -work(iu2sn+i-1)*b21e(i) b21e(i) = temp if( i < imax - 1_${ik}$ ) then b21bulge = work(iu2sn+i-1)*b21e(i+1) b21e(i+1) = work(iu2cs+i-1)*b21e(i+1) end if temp = work(iu1cs+i-1)*b12d(i) + work(iu1sn+i-1)*b12e(i) b12e(i) = work(iu1cs+i-1)*b12e(i) - work(iu1sn+i-1)*b12d(i) b12d(i) = temp b12bulge = work(iu1sn+i-1)*b12d(i+1) b12d(i+1) = work(iu1cs+i-1)*b12d(i+1) temp = work(iu2cs+i-1)*b22d(i) + work(iu2sn+i-1)*b22e(i) b22e(i) = work(iu2cs+i-1)*b22e(i) - work(iu2sn+i-1)*b22d(i) b22d(i) = temp b22bulge = work(iu2sn+i-1)*b22d(i+1) b22d(i+1) = work(iu2cs+i-1)*b22d(i+1) end do ! compute phi(imax-1) x1 = sin(theta(imax-1))*b11e(imax-1) +cos(theta(imax-1))*b21e(imax-1) y1 = sin(theta(imax-1))*b12d(imax-1) +cos(theta(imax-1))*b22d(imax-1) y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge phi(imax-1) = atan2( abs(x1), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! chase bulges from b12(imax-1,imax) and b22(imax-1,imax) restart12 = b12d(imax-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(imax-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( y2, y1, work(iv2tsn+imax-1-1),work(iv2tcs+imax-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_slartgp( b12bulge, b12d(imax-1), work(iv2tsn+imax-1-1),work(iv2tcs+& imax-1-1), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( b22bulge, b22d(imax-1), work(iv2tsn+imax-1-1),work(iv2tcs+& imax-1-1), r ) else if( nu < mu ) then call stdlib${ii}$_slartgs( b12e(imax-1), b12d(imax), nu,work(iv2tcs+imax-1-1), work(& iv2tsn+imax-1-1) ) else call stdlib${ii}$_slartgs( b22e(imax-1), b22d(imax), mu,work(iv2tcs+imax-1-1), work(& iv2tsn+imax-1-1) ) end if temp = work(iv2tcs+imax-1-1)*b12e(imax-1) +work(iv2tsn+imax-1-1)*b12d(imax) b12d(imax) = work(iv2tcs+imax-1-1)*b12d(imax) -work(iv2tsn+imax-1-1)*b12e(imax-1) b12e(imax-1) = temp temp = work(iv2tcs+imax-1-1)*b22e(imax-1) +work(iv2tsn+imax-1-1)*b22d(imax) b22d(imax) = work(iv2tcs+imax-1-1)*b22d(imax) -work(iv2tsn+imax-1-1)*b22e(imax-1) b22e(imax-1) = temp ! update singular vectors if( wantu1 ) then if( colmajor ) then call stdlib${ii}$_slasr( 'R', 'V', 'F', p, imax-imin+1,work(iu1cs+imin-1), work(& iu1sn+imin-1),u1(1_${ik}$,imin), ldu1 ) else call stdlib${ii}$_slasr( 'L', 'V', 'F', imax-imin+1, p,work(iu1cs+imin-1), work(& iu1sn+imin-1),u1(imin,1_${ik}$), ldu1 ) end if end if if( wantu2 ) then if( colmajor ) then call stdlib${ii}$_slasr( 'R', 'V', 'F', m-p, imax-imin+1,work(iu2cs+imin-1), work(& iu2sn+imin-1),u2(1_${ik}$,imin), ldu2 ) else call stdlib${ii}$_slasr( 'L', 'V', 'F', imax-imin+1, m-p,work(iu2cs+imin-1), work(& iu2sn+imin-1),u2(imin,1_${ik}$), ldu2 ) end if end if if( wantv1t ) then if( colmajor ) then call stdlib${ii}$_slasr( 'L', 'V', 'F', imax-imin+1, q,work(iv1tcs+imin-1), work(& iv1tsn+imin-1),v1t(imin,1_${ik}$), ldv1t ) else call stdlib${ii}$_slasr( 'R', 'V', 'F', q, imax-imin+1,work(iv1tcs+imin-1), work(& iv1tsn+imin-1),v1t(1_${ik}$,imin), ldv1t ) end if end if if( wantv2t ) then if( colmajor ) then call stdlib${ii}$_slasr( 'L', 'V', 'F', imax-imin+1, m-q,work(iv2tcs+imin-1), work(& iv2tsn+imin-1),v2t(imin,1_${ik}$), ldv2t ) else call stdlib${ii}$_slasr( 'R', 'V', 'F', m-q, imax-imin+1,work(iv2tcs+imin-1), work(& iv2tsn+imin-1),v2t(1_${ik}$,imin), ldv2t ) end if end if ! fix signs on b11(imax-1,imax) and b21(imax-1,imax) if( b11e(imax-1)+b21e(imax-1) > 0_${ik}$ ) then b11d(imax) = -b11d(imax) b21d(imax) = -b21d(imax) if( wantv1t ) then if( colmajor ) then call stdlib${ii}$_sscal( q, negone, v1t(imax,1_${ik}$), ldv1t ) else call stdlib${ii}$_sscal( q, negone, v1t(1_${ik}$,imax), 1_${ik}$ ) end if end if end if ! compute theta(imax) x1 = cos(phi(imax-1))*b11d(imax) +sin(phi(imax-1))*b12e(imax-1) y1 = cos(phi(imax-1))*b21d(imax) +sin(phi(imax-1))*b22e(imax-1) theta(imax) = atan2( abs(y1), abs(x1) ) ! fix signs on b11(imax,imax), b12(imax,imax-1), b21(imax,imax), ! and b22(imax,imax-1) if( b11d(imax)+b12e(imax-1) < 0_${ik}$ ) then b12d(imax) = -b12d(imax) if( wantu1 ) then if( colmajor ) then call stdlib${ii}$_sscal( p, negone, u1(1_${ik}$,imax), 1_${ik}$ ) else call stdlib${ii}$_sscal( p, negone, u1(imax,1_${ik}$), ldu1 ) end if end if end if if( b21d(imax)+b22e(imax-1) > 0_${ik}$ ) then b22d(imax) = -b22d(imax) if( wantu2 ) then if( colmajor ) then call stdlib${ii}$_sscal( m-p, negone, u2(1_${ik}$,imax), 1_${ik}$ ) else call stdlib${ii}$_sscal( m-p, negone, u2(imax,1_${ik}$), ldu2 ) end if end if end if ! fix signs on b12(imax,imax) and b22(imax,imax) if( b12d(imax)+b22d(imax) < 0_${ik}$ ) then if( wantv2t ) then if( colmajor ) then call stdlib${ii}$_sscal( m-q, negone, v2t(imax,1_${ik}$), ldv2t ) else call stdlib${ii}$_sscal( m-q, negone, v2t(1_${ik}$,imax), 1_${ik}$ ) end if end if end if ! test for negligible sines or cosines do i = imin, imax if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = imin, imax-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! deflate if (imax > 1_${ik}$) then do while( phi(imax-1) == zero ) imax = imax - 1_${ik}$ if (imax <= 1) exit end do end if if( imin > imax - 1_${ik}$ )imin = imax - 1_${ik}$ if (imin > 1_${ik}$) then do while (phi(imin-1) /= zero) imin = imin - 1_${ik}$ if (imin <= 1) exit end do end if ! repeat main iteration loop end do ! postprocessing: order theta from least to greatest do i = 1, q mini = i thetamin = theta(i) do j = i+1, q if( theta(j) < thetamin ) then mini = j thetamin = theta(j) end if end do if( mini /= i ) then theta(mini) = theta(i) theta(i) = thetamin if( colmajor ) then if( wantu1 )call stdlib${ii}$_sswap( p, u1(1_${ik}$,i), 1_${ik}$, u1(1_${ik}$,mini), 1_${ik}$ ) if( wantu2 )call stdlib${ii}$_sswap( m-p, u2(1_${ik}$,i), 1_${ik}$, u2(1_${ik}$,mini), 1_${ik}$ ) if( wantv1t )call stdlib${ii}$_sswap( q, v1t(i,1_${ik}$), ldv1t, v1t(mini,1_${ik}$), ldv1t ) if( wantv2t )call stdlib${ii}$_sswap( m-q, v2t(i,1_${ik}$), ldv2t, v2t(mini,1_${ik}$),ldv2t ) else if( wantu1 )call stdlib${ii}$_sswap( p, u1(i,1_${ik}$), ldu1, u1(mini,1_${ik}$), ldu1 ) if( wantu2 )call stdlib${ii}$_sswap( m-p, u2(i,1_${ik}$), ldu2, u2(mini,1_${ik}$), ldu2 ) if( wantv1t )call stdlib${ii}$_sswap( q, v1t(1_${ik}$,i), 1_${ik}$, v1t(1_${ik}$,mini), 1_${ik}$ ) if( wantv2t )call stdlib${ii}$_sswap( m-q, v2t(1_${ik}$,i), 1_${ik}$, v2t(1_${ik}$,mini), 1_${ik}$ ) end if end if end do return end subroutine stdlib${ii}$_sbbcsd pure module subroutine stdlib${ii}$_dbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, u1, & !! DBBCSD computes the CS decomposition of an orthogonal matrix in !! bidiagonal-block form, !! [ B11 | B12 0 0 ] !! [ 0 | 0 -I 0 ] !! X = [----------------] !! [ B21 | B22 0 0 ] !! [ 0 | 0 0 I ] !! [ C | -S 0 0 ] !! [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**T !! = [---------] [---------------] [---------] . !! [ | U2 ] [ S | C 0 0 ] [ | V2 ] !! [ 0 | 0 0 I ] !! X is M-by-M, its top-left block is P-by-Q, and Q must be no larger !! than P, M-P, or M-Q. (If Q is not the smallest index, then X must be !! transposed and/or permuted. This can be done in constant time using !! the TRANS and SIGNS options. See DORCSD for details.) !! The bidiagonal matrices B11, B12, B21, and B22 are represented !! implicitly by angles THETA(1:Q) and PHI(1:Q-1). !! The orthogonal matrices U1, U2, V1T, and V2T are input/output. !! The input matrices are pre- or post-multiplied by the appropriate !! singular vector matrices. ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d, b22e, work, & lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_dp, only: negone, zero, one, ten, cnegone ! Scalar Arguments character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, lwork, m, p, q ! Array Arguments real(dp), intent(out) :: b11d(*), b11e(*), b12d(*), b12e(*), b21d(*), b21e(*), b22d(*),& b22e(*), work(*) real(dp), intent(inout) :: phi(*), theta(*) real(dp), intent(inout) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*) ! =================================================================== ! Parameters integer(${ik}$), parameter :: maxitr = 6_${ik}$ real(dp), parameter :: hundred = 100.0_dp real(dp), parameter :: meighth = -0.125_dp real(dp), parameter :: piover2 = 1.57079632679489661923132169163975144210_dp ! Local Scalars logical(lk) :: colmajor, lquery, restart11, restart12, restart21, restart22, wantu1, & wantu2, wantv1t, wantv2t integer(${ik}$) :: i, imin, imax, iter, iu1cs, iu1sn, iu2cs, iu2sn, iv1tcs, iv1tsn, & iv2tcs, iv2tsn, j, lworkmin, lworkopt, maxit, mini real(dp) :: b11bulge, b12bulge, b21bulge, b22bulge, dummy, eps, mu, nu, r, sigma11, & sigma21, temp, thetamax, thetamin, thresh, tol, tolmul, unfl, x1, x2, y1, y2 ! Intrinsic Functions ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lwork == -1_${ik}$ wantu1 = stdlib_lsame( jobu1, 'Y' ) wantu2 = stdlib_lsame( jobu2, 'Y' ) wantv1t = stdlib_lsame( jobv1t, 'Y' ) wantv2t = stdlib_lsame( jobv2t, 'Y' ) colmajor = .not. stdlib_lsame( trans, 'T' ) if( m < 0_${ik}$ ) then info = -6_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -7_${ik}$ else if( q < 0_${ik}$ .or. q > m ) then info = -8_${ik}$ else if( q > p .or. q > m-p .or. q > m-q ) then info = -8_${ik}$ else if( wantu1 .and. ldu1 < p ) then info = -12_${ik}$ else if( wantu2 .and. ldu2 < m-p ) then info = -14_${ik}$ else if( wantv1t .and. ldv1t < q ) then info = -16_${ik}$ else if( wantv2t .and. ldv2t < m-q ) then info = -18_${ik}$ end if ! quick return if q = 0 if( info == 0_${ik}$ .and. q == 0_${ik}$ ) then lworkmin = 1_${ik}$ work(1_${ik}$) = lworkmin return end if ! compute workspace if( info == 0_${ik}$ ) then iu1cs = 1_${ik}$ iu1sn = iu1cs + q iu2cs = iu1sn + q iu2sn = iu2cs + q iv1tcs = iu2sn + q iv1tsn = iv1tcs + q iv2tcs = iv1tsn + q iv2tsn = iv2tcs + q lworkopt = iv2tsn + q - 1_${ik}$ lworkmin = lworkopt work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not. lquery ) then info = -28_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DBBCSD', -info ) return else if( lquery ) then return end if ! get machine constants eps = stdlib${ii}$_dlamch( 'EPSILON' ) unfl = stdlib${ii}$_dlamch( 'SAFE MINIMUM' ) tolmul = max( ten, min( hundred, eps**meighth ) ) tol = tolmul*eps thresh = max( tol, maxitr*q*q*unfl ) ! test for negligible sines or cosines do i = 1, q if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = 1, q-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! initial deflation imax = q do while( imax > 1 ) if( phi(imax-1) /= zero ) then exit end if imax = imax - 1_${ik}$ end do imin = imax - 1_${ik}$ if ( imin > 1_${ik}$ ) then do while( phi(imin-1) /= zero ) imin = imin - 1_${ik}$ if ( imin <= 1 ) exit end do end if ! initialize iteration counter maxit = maxitr*q*q iter = 0_${ik}$ ! begin main iteration loop do while( imax > 1 ) ! compute the matrix entries b11d(imin) = cos( theta(imin) ) b21d(imin) = -sin( theta(imin) ) do i = imin, imax - 1 b11e(i) = -sin( theta(i) ) * sin( phi(i) ) b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) ) b12d(i) = sin( theta(i) ) * cos( phi(i) ) b12e(i) = cos( theta(i+1) ) * sin( phi(i) ) b21e(i) = -cos( theta(i) ) * sin( phi(i) ) b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) ) b22d(i) = cos( theta(i) ) * cos( phi(i) ) b22e(i) = -sin( theta(i+1) ) * sin( phi(i) ) end do b12d(imax) = sin( theta(imax) ) b22d(imax) = cos( theta(imax) ) ! abort if not converging; otherwise, increment iter if( iter > maxit ) then info = 0_${ik}$ do i = 1, q if( phi(i) /= zero )info = info + 1_${ik}$ end do return end if iter = iter + imax - imin ! compute shifts thetamax = theta(imin) thetamin = theta(imin) do i = imin+1, imax if( theta(i) > thetamax )thetamax = theta(i) if( theta(i) < thetamin )thetamin = theta(i) end do if( thetamax > piover2 - thresh ) then ! zero on diagonals of b11 and b22; induce deflation with a ! zero shift mu = zero nu = one else if( thetamin < thresh ) then ! zero on diagonals of b12 and b22; induce deflation with a ! zero shift mu = one nu = zero else ! compute shifts for b11 and b21 and use the lesser call stdlib${ii}$_dlas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,dummy ) call stdlib${ii}$_dlas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,dummy ) if( sigma11 <= sigma21 ) then mu = sigma11 nu = sqrt( one - mu**2_${ik}$ ) if( mu < thresh ) then mu = zero nu = one end if else nu = sigma21 mu = sqrt( one - nu**2_${ik}$ ) if( nu < thresh ) then mu = one nu = zero end if end if end if ! rotate to produce bulges in b11 and b21 if( mu <= nu ) then call stdlib${ii}$_dlartgs( b11d(imin), b11e(imin), mu,work(iv1tcs+imin-1), work(iv1tsn+& imin-1) ) else call stdlib${ii}$_dlartgs( b21d(imin), b21e(imin), nu,work(iv1tcs+imin-1), work(iv1tsn+& imin-1) ) end if temp = work(iv1tcs+imin-1)*b11d(imin) +work(iv1tsn+imin-1)*b11e(imin) b11e(imin) = work(iv1tcs+imin-1)*b11e(imin) -work(iv1tsn+imin-1)*b11d(imin) b11d(imin) = temp b11bulge = work(iv1tsn+imin-1)*b11d(imin+1) b11d(imin+1) = work(iv1tcs+imin-1)*b11d(imin+1) temp = work(iv1tcs+imin-1)*b21d(imin) +work(iv1tsn+imin-1)*b21e(imin) b21e(imin) = work(iv1tcs+imin-1)*b21e(imin) -work(iv1tsn+imin-1)*b21d(imin) b21d(imin) = temp b21bulge = work(iv1tsn+imin-1)*b21d(imin+1) b21d(imin+1) = work(iv1tcs+imin-1)*b21d(imin+1) ! compute theta(imin) theta( imin ) = atan2( sqrt( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ ),sqrt( b11d(imin)**2_${ik}$+& b11bulge**2_${ik}$ ) ) ! chase the bulges in b11(imin+1,imin) and b21(imin+1,imin) if( b11d(imin)**2_${ik}$+b11bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_dlartgp( b11bulge, b11d(imin), work(iu1sn+imin-1),work(iu1cs+imin-1),& r ) else if( mu <= nu ) then call stdlib${ii}$_dlartgs( b11e( imin ), b11d( imin + 1_${ik}$ ), mu,work(iu1cs+imin-1), work(& iu1sn+imin-1) ) else call stdlib${ii}$_dlartgs( b12d( imin ), b12e( imin ), nu,work(iu1cs+imin-1), work(& iu1sn+imin-1) ) end if if( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_dlartgp( b21bulge, b21d(imin), work(iu2sn+imin-1),work(iu2cs+imin-1),& r ) else if( nu < mu ) then call stdlib${ii}$_dlartgs( b21e( imin ), b21d( imin + 1_${ik}$ ), nu,work(iu2cs+imin-1), work(& iu2sn+imin-1) ) else call stdlib${ii}$_dlartgs( b22d(imin), b22e(imin), mu,work(iu2cs+imin-1), work(iu2sn+& imin-1) ) end if work(iu2cs+imin-1) = -work(iu2cs+imin-1) work(iu2sn+imin-1) = -work(iu2sn+imin-1) temp = work(iu1cs+imin-1)*b11e(imin) +work(iu1sn+imin-1)*b11d(imin+1) b11d(imin+1) = work(iu1cs+imin-1)*b11d(imin+1) -work(iu1sn+imin-1)*b11e(imin) b11e(imin) = temp if( imax > imin+1 ) then b11bulge = work(iu1sn+imin-1)*b11e(imin+1) b11e(imin+1) = work(iu1cs+imin-1)*b11e(imin+1) end if temp = work(iu1cs+imin-1)*b12d(imin) +work(iu1sn+imin-1)*b12e(imin) b12e(imin) = work(iu1cs+imin-1)*b12e(imin) -work(iu1sn+imin-1)*b12d(imin) b12d(imin) = temp b12bulge = work(iu1sn+imin-1)*b12d(imin+1) b12d(imin+1) = work(iu1cs+imin-1)*b12d(imin+1) temp = work(iu2cs+imin-1)*b21e(imin) +work(iu2sn+imin-1)*b21d(imin+1) b21d(imin+1) = work(iu2cs+imin-1)*b21d(imin+1) -work(iu2sn+imin-1)*b21e(imin) b21e(imin) = temp if( imax > imin+1 ) then b21bulge = work(iu2sn+imin-1)*b21e(imin+1) b21e(imin+1) = work(iu2cs+imin-1)*b21e(imin+1) end if temp = work(iu2cs+imin-1)*b22d(imin) +work(iu2sn+imin-1)*b22e(imin) b22e(imin) = work(iu2cs+imin-1)*b22e(imin) -work(iu2sn+imin-1)*b22d(imin) b22d(imin) = temp b22bulge = work(iu2sn+imin-1)*b22d(imin+1) b22d(imin+1) = work(iu2cs+imin-1)*b22d(imin+1) ! inner loop: chase bulges from b11(imin,imin+2), ! b12(imin,imin+1), b21(imin,imin+2), and b22(imin,imin+1) to ! bottom-right do i = imin+1, imax-1 ! compute phi(i-1) x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1) x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1) y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge phi(i-1) = atan2( sqrt(x1**2_${ik}$+x2**2_${ik}$), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11e(i-1)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21e(i-1)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12d(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i-1,i+1), b12(i-1,i), ! b21(i-1,i+1), and b22(i-1,i). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart21 ) then call stdlib${ii}$_dlartgp( x2, x1, work(iv1tsn+i-1), work(iv1tcs+i-1),r ) else if( .not. restart11 .and. restart21 ) then call stdlib${ii}$_dlartgp( b11bulge, b11e(i-1), work(iv1tsn+i-1),work(iv1tcs+i-1), & r ) else if( restart11 .and. .not. restart21 ) then call stdlib${ii}$_dlartgp( b21bulge, b21e(i-1), work(iv1tsn+i-1),work(iv1tcs+i-1), & r ) else if( mu <= nu ) then call stdlib${ii}$_dlartgs( b11d(i), b11e(i), mu, work(iv1tcs+i-1),work(iv1tsn+i-1) ) else call stdlib${ii}$_dlartgs( b21d(i), b21e(i), nu, work(iv1tcs+i-1),work(iv1tsn+i-1) ) end if work(iv1tcs+i-1) = -work(iv1tcs+i-1) work(iv1tsn+i-1) = -work(iv1tsn+i-1) if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_dlartgp( y2, y1, work(iv2tsn+i-1-1),work(iv2tcs+i-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_dlartgp( b12bulge, b12d(i-1), work(iv2tsn+i-1-1),work(iv2tcs+i-1-& 1_${ik}$), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_dlartgp( b22bulge, b22d(i-1), work(iv2tsn+i-1-1),work(iv2tcs+i-1-& 1_${ik}$), r ) else if( nu < mu ) then call stdlib${ii}$_dlartgs( b12e(i-1), b12d(i), nu, work(iv2tcs+i-1-1),work(iv2tsn+i-& 1_${ik}$-1) ) else call stdlib${ii}$_dlartgs( b22e(i-1), b22d(i), mu, work(iv2tcs+i-1-1),work(iv2tsn+i-& 1_${ik}$-1) ) end if temp = work(iv1tcs+i-1)*b11d(i) + work(iv1tsn+i-1)*b11e(i) b11e(i) = work(iv1tcs+i-1)*b11e(i) -work(iv1tsn+i-1)*b11d(i) b11d(i) = temp b11bulge = work(iv1tsn+i-1)*b11d(i+1) b11d(i+1) = work(iv1tcs+i-1)*b11d(i+1) temp = work(iv1tcs+i-1)*b21d(i) + work(iv1tsn+i-1)*b21e(i) b21e(i) = work(iv1tcs+i-1)*b21e(i) -work(iv1tsn+i-1)*b21d(i) b21d(i) = temp b21bulge = work(iv1tsn+i-1)*b21d(i+1) b21d(i+1) = work(iv1tcs+i-1)*b21d(i+1) temp = work(iv2tcs+i-1-1)*b12e(i-1) +work(iv2tsn+i-1-1)*b12d(i) b12d(i) = work(iv2tcs+i-1-1)*b12d(i) -work(iv2tsn+i-1-1)*b12e(i-1) b12e(i-1) = temp b12bulge = work(iv2tsn+i-1-1)*b12e(i) b12e(i) = work(iv2tcs+i-1-1)*b12e(i) temp = work(iv2tcs+i-1-1)*b22e(i-1) +work(iv2tsn+i-1-1)*b22d(i) b22d(i) = work(iv2tcs+i-1-1)*b22d(i) -work(iv2tsn+i-1-1)*b22e(i-1) b22e(i-1) = temp b22bulge = work(iv2tsn+i-1-1)*b22e(i) b22e(i) = work(iv2tcs+i-1-1)*b22e(i) ! compute theta(i) x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1) x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1) y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge theta(i) = atan2( sqrt(y1**2_${ik}$+y2**2_${ik}$), sqrt(x1**2_${ik}$+x2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11d(i)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12e(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21d(i)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22e(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i+1,i), b12(i+1,i-1), ! b21(i+1,i), and b22(i+1,i-1). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart12 ) then call stdlib${ii}$_dlartgp( x2, x1, work(iu1sn+i-1), work(iu1cs+i-1),r ) else if( .not. restart11 .and. restart12 ) then call stdlib${ii}$_dlartgp( b11bulge, b11d(i), work(iu1sn+i-1),work(iu1cs+i-1), r ) else if( restart11 .and. .not. restart12 ) then call stdlib${ii}$_dlartgp( b12bulge, b12e(i-1), work(iu1sn+i-1),work(iu1cs+i-1), r ) else if( mu <= nu ) then call stdlib${ii}$_dlartgs( b11e(i), b11d(i+1), mu, work(iu1cs+i-1),work(iu1sn+i-1) ) else call stdlib${ii}$_dlartgs( b12d(i), b12e(i), nu, work(iu1cs+i-1),work(iu1sn+i-1) ) end if if( .not. restart21 .and. .not. restart22 ) then call stdlib${ii}$_dlartgp( y2, y1, work(iu2sn+i-1), work(iu2cs+i-1),r ) else if( .not. restart21 .and. restart22 ) then call stdlib${ii}$_dlartgp( b21bulge, b21d(i), work(iu2sn+i-1),work(iu2cs+i-1), r ) else if( restart21 .and. .not. restart22 ) then call stdlib${ii}$_dlartgp( b22bulge, b22e(i-1), work(iu2sn+i-1),work(iu2cs+i-1), r ) else if( nu < mu ) then call stdlib${ii}$_dlartgs( b21e(i), b21e(i+1), nu, work(iu2cs+i-1),work(iu2sn+i-1) ) else call stdlib${ii}$_dlartgs( b22d(i), b22e(i), mu, work(iu2cs+i-1),work(iu2sn+i-1) ) end if work(iu2cs+i-1) = -work(iu2cs+i-1) work(iu2sn+i-1) = -work(iu2sn+i-1) temp = work(iu1cs+i-1)*b11e(i) + work(iu1sn+i-1)*b11d(i+1) b11d(i+1) = work(iu1cs+i-1)*b11d(i+1) -work(iu1sn+i-1)*b11e(i) b11e(i) = temp if( i < imax - 1_${ik}$ ) then b11bulge = work(iu1sn+i-1)*b11e(i+1) b11e(i+1) = work(iu1cs+i-1)*b11e(i+1) end if temp = work(iu2cs+i-1)*b21e(i) + work(iu2sn+i-1)*b21d(i+1) b21d(i+1) = work(iu2cs+i-1)*b21d(i+1) -work(iu2sn+i-1)*b21e(i) b21e(i) = temp if( i < imax - 1_${ik}$ ) then b21bulge = work(iu2sn+i-1)*b21e(i+1) b21e(i+1) = work(iu2cs+i-1)*b21e(i+1) end if temp = work(iu1cs+i-1)*b12d(i) + work(iu1sn+i-1)*b12e(i) b12e(i) = work(iu1cs+i-1)*b12e(i) - work(iu1sn+i-1)*b12d(i) b12d(i) = temp b12bulge = work(iu1sn+i-1)*b12d(i+1) b12d(i+1) = work(iu1cs+i-1)*b12d(i+1) temp = work(iu2cs+i-1)*b22d(i) + work(iu2sn+i-1)*b22e(i) b22e(i) = work(iu2cs+i-1)*b22e(i) - work(iu2sn+i-1)*b22d(i) b22d(i) = temp b22bulge = work(iu2sn+i-1)*b22d(i+1) b22d(i+1) = work(iu2cs+i-1)*b22d(i+1) end do ! compute phi(imax-1) x1 = sin(theta(imax-1))*b11e(imax-1) +cos(theta(imax-1))*b21e(imax-1) y1 = sin(theta(imax-1))*b12d(imax-1) +cos(theta(imax-1))*b22d(imax-1) y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge phi(imax-1) = atan2( abs(x1), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! chase bulges from b12(imax-1,imax) and b22(imax-1,imax) restart12 = b12d(imax-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(imax-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_dlartgp( y2, y1, work(iv2tsn+imax-1-1),work(iv2tcs+imax-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_dlartgp( b12bulge, b12d(imax-1), work(iv2tsn+imax-1-1),work(iv2tcs+& imax-1-1), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_dlartgp( b22bulge, b22d(imax-1), work(iv2tsn+imax-1-1),work(iv2tcs+& imax-1-1), r ) else if( nu < mu ) then call stdlib${ii}$_dlartgs( b12e(imax-1), b12d(imax), nu,work(iv2tcs+imax-1-1), work(& iv2tsn+imax-1-1) ) else call stdlib${ii}$_dlartgs( b22e(imax-1), b22d(imax), mu,work(iv2tcs+imax-1-1), work(& iv2tsn+imax-1-1) ) end if temp = work(iv2tcs+imax-1-1)*b12e(imax-1) +work(iv2tsn+imax-1-1)*b12d(imax) b12d(imax) = work(iv2tcs+imax-1-1)*b12d(imax) -work(iv2tsn+imax-1-1)*b12e(imax-1) b12e(imax-1) = temp temp = work(iv2tcs+imax-1-1)*b22e(imax-1) +work(iv2tsn+imax-1-1)*b22d(imax) b22d(imax) = work(iv2tcs+imax-1-1)*b22d(imax) -work(iv2tsn+imax-1-1)*b22e(imax-1) b22e(imax-1) = temp ! update singular vectors if( wantu1 ) then if( colmajor ) then call stdlib${ii}$_dlasr( 'R', 'V', 'F', p, imax-imin+1,work(iu1cs+imin-1), work(& iu1sn+imin-1),u1(1_${ik}$,imin), ldu1 ) else call stdlib${ii}$_dlasr( 'L', 'V', 'F', imax-imin+1, p,work(iu1cs+imin-1), work(& iu1sn+imin-1),u1(imin,1_${ik}$), ldu1 ) end if end if if( wantu2 ) then if( colmajor ) then call stdlib${ii}$_dlasr( 'R', 'V', 'F', m-p, imax-imin+1,work(iu2cs+imin-1), work(& iu2sn+imin-1),u2(1_${ik}$,imin), ldu2 ) else call stdlib${ii}$_dlasr( 'L', 'V', 'F', imax-imin+1, m-p,work(iu2cs+imin-1), work(& iu2sn+imin-1),u2(imin,1_${ik}$), ldu2 ) end if end if if( wantv1t ) then if( colmajor ) then call stdlib${ii}$_dlasr( 'L', 'V', 'F', imax-imin+1, q,work(iv1tcs+imin-1), work(& iv1tsn+imin-1),v1t(imin,1_${ik}$), ldv1t ) else call stdlib${ii}$_dlasr( 'R', 'V', 'F', q, imax-imin+1,work(iv1tcs+imin-1), work(& iv1tsn+imin-1),v1t(1_${ik}$,imin), ldv1t ) end if end if if( wantv2t ) then if( colmajor ) then call stdlib${ii}$_dlasr( 'L', 'V', 'F', imax-imin+1, m-q,work(iv2tcs+imin-1), work(& iv2tsn+imin-1),v2t(imin,1_${ik}$), ldv2t ) else call stdlib${ii}$_dlasr( 'R', 'V', 'F', m-q, imax-imin+1,work(iv2tcs+imin-1), work(& iv2tsn+imin-1),v2t(1_${ik}$,imin), ldv2t ) end if end if ! fix signs on b11(imax-1,imax) and b21(imax-1,imax) if( b11e(imax-1)+b21e(imax-1) > 0_${ik}$ ) then b11d(imax) = -b11d(imax) b21d(imax) = -b21d(imax) if( wantv1t ) then if( colmajor ) then call stdlib${ii}$_dscal( q, negone, v1t(imax,1_${ik}$), ldv1t ) else call stdlib${ii}$_dscal( q, negone, v1t(1_${ik}$,imax), 1_${ik}$ ) end if end if end if ! compute theta(imax) x1 = cos(phi(imax-1))*b11d(imax) +sin(phi(imax-1))*b12e(imax-1) y1 = cos(phi(imax-1))*b21d(imax) +sin(phi(imax-1))*b22e(imax-1) theta(imax) = atan2( abs(y1), abs(x1) ) ! fix signs on b11(imax,imax), b12(imax,imax-1), b21(imax,imax), ! and b22(imax,imax-1) if( b11d(imax)+b12e(imax-1) < 0_${ik}$ ) then b12d(imax) = -b12d(imax) if( wantu1 ) then if( colmajor ) then call stdlib${ii}$_dscal( p, negone, u1(1_${ik}$,imax), 1_${ik}$ ) else call stdlib${ii}$_dscal( p, negone, u1(imax,1_${ik}$), ldu1 ) end if end if end if if( b21d(imax)+b22e(imax-1) > 0_${ik}$ ) then b22d(imax) = -b22d(imax) if( wantu2 ) then if( colmajor ) then call stdlib${ii}$_dscal( m-p, negone, u2(1_${ik}$,imax), 1_${ik}$ ) else call stdlib${ii}$_dscal( m-p, negone, u2(imax,1_${ik}$), ldu2 ) end if end if end if ! fix signs on b12(imax,imax) and b22(imax,imax) if( b12d(imax)+b22d(imax) < 0_${ik}$ ) then if( wantv2t ) then if( colmajor ) then call stdlib${ii}$_dscal( m-q, negone, v2t(imax,1_${ik}$), ldv2t ) else call stdlib${ii}$_dscal( m-q, negone, v2t(1_${ik}$,imax), 1_${ik}$ ) end if end if end if ! test for negligible sines or cosines do i = imin, imax if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = imin, imax-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! deflate if (imax > 1_${ik}$) then do while( phi(imax-1) == zero ) imax = imax - 1_${ik}$ if (imax <= 1) exit end do end if if( imin > imax - 1_${ik}$ )imin = imax - 1_${ik}$ if (imin > 1_${ik}$) then do while (phi(imin-1) /= zero) imin = imin - 1_${ik}$ if (imin <= 1) exit end do end if ! repeat main iteration loop end do ! postprocessing: order theta from least to greatest do i = 1, q mini = i thetamin = theta(i) do j = i+1, q if( theta(j) < thetamin ) then mini = j thetamin = theta(j) end if end do if( mini /= i ) then theta(mini) = theta(i) theta(i) = thetamin if( colmajor ) then if( wantu1 )call stdlib${ii}$_dswap( p, u1(1_${ik}$,i), 1_${ik}$, u1(1_${ik}$,mini), 1_${ik}$ ) if( wantu2 )call stdlib${ii}$_dswap( m-p, u2(1_${ik}$,i), 1_${ik}$, u2(1_${ik}$,mini), 1_${ik}$ ) if( wantv1t )call stdlib${ii}$_dswap( q, v1t(i,1_${ik}$), ldv1t, v1t(mini,1_${ik}$), ldv1t ) if( wantv2t )call stdlib${ii}$_dswap( m-q, v2t(i,1_${ik}$), ldv2t, v2t(mini,1_${ik}$),ldv2t ) else if( wantu1 )call stdlib${ii}$_dswap( p, u1(i,1_${ik}$), ldu1, u1(mini,1_${ik}$), ldu1 ) if( wantu2 )call stdlib${ii}$_dswap( m-p, u2(i,1_${ik}$), ldu2, u2(mini,1_${ik}$), ldu2 ) if( wantv1t )call stdlib${ii}$_dswap( q, v1t(1_${ik}$,i), 1_${ik}$, v1t(1_${ik}$,mini), 1_${ik}$ ) if( wantv2t )call stdlib${ii}$_dswap( m-q, v2t(1_${ik}$,i), 1_${ik}$, v2t(1_${ik}$,mini), 1_${ik}$ ) end if end if end do return end subroutine stdlib${ii}$_dbbcsd #:for rk,rt,ri in REAL_KINDS_TYPES #:if not rk in ["sp","dp"] pure module subroutine stdlib${ii}$_${ri}$bbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, u1, & !! DBBCSD: computes the CS decomposition of an orthogonal matrix in !! bidiagonal-block form, !! [ B11 | B12 0 0 ] !! [ 0 | 0 -I 0 ] !! X = [----------------] !! [ B21 | B22 0 0 ] !! [ 0 | 0 0 I ] !! [ C | -S 0 0 ] !! [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**T !! = [---------] [---------------] [---------] . !! [ | U2 ] [ S | C 0 0 ] [ | V2 ] !! [ 0 | 0 0 I ] !! X is M-by-M, its top-left block is P-by-Q, and Q must be no larger !! than P, M-P, or M-Q. (If Q is not the smallest index, then X must be !! transposed and/or permuted. This can be done in constant time using !! the TRANS and SIGNS options. See DORCSD for details.) !! The bidiagonal matrices B11, B12, B21, and B22 are represented !! implicitly by angles THETA(1:Q) and PHI(1:Q-1). !! The orthogonal matrices U1, U2, V1T, and V2T are input/output. !! The input matrices are pre- or post-multiplied by the appropriate !! singular vector matrices. ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d, b22e, work, & lwork, info ) ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_${rk}$, only: negone, zero, one, ten, cnegone ! Scalar Arguments character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, lwork, m, p, q ! Array Arguments real(${rk}$), intent(out) :: b11d(*), b11e(*), b12d(*), b12e(*), b21d(*), b21e(*), b22d(*),& b22e(*), work(*) real(${rk}$), intent(inout) :: phi(*), theta(*) real(${rk}$), intent(inout) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*) ! =================================================================== ! Parameters integer(${ik}$), parameter :: maxitr = 6_${ik}$ real(${rk}$), parameter :: hundred = 100.0_${rk}$ real(${rk}$), parameter :: meighth = -0.125_${rk}$ real(${rk}$), parameter :: piover2 = 1.57079632679489661923132169163975144210_${rk}$ ! Local Scalars logical(lk) :: colmajor, lquery, restart11, restart12, restart21, restart22, wantu1, & wantu2, wantv1t, wantv2t integer(${ik}$) :: i, imin, imax, iter, iu1cs, iu1sn, iu2cs, iu2sn, iv1tcs, iv1tsn, & iv2tcs, iv2tsn, j, lworkmin, lworkopt, maxit, mini real(${rk}$) :: b11bulge, b12bulge, b21bulge, b22bulge, dummy, eps, mu, nu, r, sigma11, & sigma21, temp, thetamax, thetamin, thresh, tol, tolmul, unfl, x1, x2, y1, y2 ! Intrinsic Functions ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lwork == -1_${ik}$ wantu1 = stdlib_lsame( jobu1, 'Y' ) wantu2 = stdlib_lsame( jobu2, 'Y' ) wantv1t = stdlib_lsame( jobv1t, 'Y' ) wantv2t = stdlib_lsame( jobv2t, 'Y' ) colmajor = .not. stdlib_lsame( trans, 'T' ) if( m < 0_${ik}$ ) then info = -6_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -7_${ik}$ else if( q < 0_${ik}$ .or. q > m ) then info = -8_${ik}$ else if( q > p .or. q > m-p .or. q > m-q ) then info = -8_${ik}$ else if( wantu1 .and. ldu1 < p ) then info = -12_${ik}$ else if( wantu2 .and. ldu2 < m-p ) then info = -14_${ik}$ else if( wantv1t .and. ldv1t < q ) then info = -16_${ik}$ else if( wantv2t .and. ldv2t < m-q ) then info = -18_${ik}$ end if ! quick return if q = 0 if( info == 0_${ik}$ .and. q == 0_${ik}$ ) then lworkmin = 1_${ik}$ work(1_${ik}$) = lworkmin return end if ! compute workspace if( info == 0_${ik}$ ) then iu1cs = 1_${ik}$ iu1sn = iu1cs + q iu2cs = iu1sn + q iu2sn = iu2cs + q iv1tcs = iu2sn + q iv1tsn = iv1tcs + q iv2tcs = iv1tsn + q iv2tsn = iv2tcs + q lworkopt = iv2tsn + q - 1_${ik}$ lworkmin = lworkopt work(1_${ik}$) = lworkopt if( lwork < lworkmin .and. .not. lquery ) then info = -28_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'DBBCSD', -info ) return else if( lquery ) then return end if ! get machine constants eps = stdlib${ii}$_${ri}$lamch( 'EPSILON' ) unfl = stdlib${ii}$_${ri}$lamch( 'SAFE MINIMUM' ) tolmul = max( ten, min( hundred, eps**meighth ) ) tol = tolmul*eps thresh = max( tol, maxitr*q*q*unfl ) ! test for negligible sines or cosines do i = 1, q if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = 1, q-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! initial deflation imax = q do while( imax > 1 ) if( phi(imax-1) /= zero ) then exit end if imax = imax - 1_${ik}$ end do imin = imax - 1_${ik}$ if ( imin > 1_${ik}$ ) then do while( phi(imin-1) /= zero ) imin = imin - 1_${ik}$ if ( imin <= 1 ) exit end do end if ! initialize iteration counter maxit = maxitr*q*q iter = 0_${ik}$ ! begin main iteration loop do while( imax > 1 ) ! compute the matrix entries b11d(imin) = cos( theta(imin) ) b21d(imin) = -sin( theta(imin) ) do i = imin, imax - 1 b11e(i) = -sin( theta(i) ) * sin( phi(i) ) b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) ) b12d(i) = sin( theta(i) ) * cos( phi(i) ) b12e(i) = cos( theta(i+1) ) * sin( phi(i) ) b21e(i) = -cos( theta(i) ) * sin( phi(i) ) b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) ) b22d(i) = cos( theta(i) ) * cos( phi(i) ) b22e(i) = -sin( theta(i+1) ) * sin( phi(i) ) end do b12d(imax) = sin( theta(imax) ) b22d(imax) = cos( theta(imax) ) ! abort if not converging; otherwise, increment iter if( iter > maxit ) then info = 0_${ik}$ do i = 1, q if( phi(i) /= zero )info = info + 1_${ik}$ end do return end if iter = iter + imax - imin ! compute shifts thetamax = theta(imin) thetamin = theta(imin) do i = imin+1, imax if( theta(i) > thetamax )thetamax = theta(i) if( theta(i) < thetamin )thetamin = theta(i) end do if( thetamax > piover2 - thresh ) then ! zero on diagonals of b11 and b22; induce deflation with a ! zero shift mu = zero nu = one else if( thetamin < thresh ) then ! zero on diagonals of b12 and b22; induce deflation with a ! zero shift mu = one nu = zero else ! compute shifts for b11 and b21 and use the lesser call stdlib${ii}$_${ri}$las2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,dummy ) call stdlib${ii}$_${ri}$las2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,dummy ) if( sigma11 <= sigma21 ) then mu = sigma11 nu = sqrt( one - mu**2_${ik}$ ) if( mu < thresh ) then mu = zero nu = one end if else nu = sigma21 mu = sqrt( one - nu**2_${ik}$ ) if( nu < thresh ) then mu = one nu = zero end if end if end if ! rotate to produce bulges in b11 and b21 if( mu <= nu ) then call stdlib${ii}$_${ri}$lartgs( b11d(imin), b11e(imin), mu,work(iv1tcs+imin-1), work(iv1tsn+& imin-1) ) else call stdlib${ii}$_${ri}$lartgs( b21d(imin), b21e(imin), nu,work(iv1tcs+imin-1), work(iv1tsn+& imin-1) ) end if temp = work(iv1tcs+imin-1)*b11d(imin) +work(iv1tsn+imin-1)*b11e(imin) b11e(imin) = work(iv1tcs+imin-1)*b11e(imin) -work(iv1tsn+imin-1)*b11d(imin) b11d(imin) = temp b11bulge = work(iv1tsn+imin-1)*b11d(imin+1) b11d(imin+1) = work(iv1tcs+imin-1)*b11d(imin+1) temp = work(iv1tcs+imin-1)*b21d(imin) +work(iv1tsn+imin-1)*b21e(imin) b21e(imin) = work(iv1tcs+imin-1)*b21e(imin) -work(iv1tsn+imin-1)*b21d(imin) b21d(imin) = temp b21bulge = work(iv1tsn+imin-1)*b21d(imin+1) b21d(imin+1) = work(iv1tcs+imin-1)*b21d(imin+1) ! compute theta(imin) theta( imin ) = atan2( sqrt( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ ),sqrt( b11d(imin)**2_${ik}$+& b11bulge**2_${ik}$ ) ) ! chase the bulges in b11(imin+1,imin) and b21(imin+1,imin) if( b11d(imin)**2_${ik}$+b11bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_${ri}$lartgp( b11bulge, b11d(imin), work(iu1sn+imin-1),work(iu1cs+imin-1),& r ) else if( mu <= nu ) then call stdlib${ii}$_${ri}$lartgs( b11e( imin ), b11d( imin + 1_${ik}$ ), mu,work(iu1cs+imin-1), work(& iu1sn+imin-1) ) else call stdlib${ii}$_${ri}$lartgs( b12d( imin ), b12e( imin ), nu,work(iu1cs+imin-1), work(& iu1sn+imin-1) ) end if if( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_${ri}$lartgp( b21bulge, b21d(imin), work(iu2sn+imin-1),work(iu2cs+imin-1),& r ) else if( nu < mu ) then call stdlib${ii}$_${ri}$lartgs( b21e( imin ), b21d( imin + 1_${ik}$ ), nu,work(iu2cs+imin-1), work(& iu2sn+imin-1) ) else call stdlib${ii}$_${ri}$lartgs( b22d(imin), b22e(imin), mu,work(iu2cs+imin-1), work(iu2sn+& imin-1) ) end if work(iu2cs+imin-1) = -work(iu2cs+imin-1) work(iu2sn+imin-1) = -work(iu2sn+imin-1) temp = work(iu1cs+imin-1)*b11e(imin) +work(iu1sn+imin-1)*b11d(imin+1) b11d(imin+1) = work(iu1cs+imin-1)*b11d(imin+1) -work(iu1sn+imin-1)*b11e(imin) b11e(imin) = temp if( imax > imin+1 ) then b11bulge = work(iu1sn+imin-1)*b11e(imin+1) b11e(imin+1) = work(iu1cs+imin-1)*b11e(imin+1) end if temp = work(iu1cs+imin-1)*b12d(imin) +work(iu1sn+imin-1)*b12e(imin) b12e(imin) = work(iu1cs+imin-1)*b12e(imin) -work(iu1sn+imin-1)*b12d(imin) b12d(imin) = temp b12bulge = work(iu1sn+imin-1)*b12d(imin+1) b12d(imin+1) = work(iu1cs+imin-1)*b12d(imin+1) temp = work(iu2cs+imin-1)*b21e(imin) +work(iu2sn+imin-1)*b21d(imin+1) b21d(imin+1) = work(iu2cs+imin-1)*b21d(imin+1) -work(iu2sn+imin-1)*b21e(imin) b21e(imin) = temp if( imax > imin+1 ) then b21bulge = work(iu2sn+imin-1)*b21e(imin+1) b21e(imin+1) = work(iu2cs+imin-1)*b21e(imin+1) end if temp = work(iu2cs+imin-1)*b22d(imin) +work(iu2sn+imin-1)*b22e(imin) b22e(imin) = work(iu2cs+imin-1)*b22e(imin) -work(iu2sn+imin-1)*b22d(imin) b22d(imin) = temp b22bulge = work(iu2sn+imin-1)*b22d(imin+1) b22d(imin+1) = work(iu2cs+imin-1)*b22d(imin+1) ! inner loop: chase bulges from b11(imin,imin+2), ! b12(imin,imin+1), b21(imin,imin+2), and b22(imin,imin+1) to ! bottom-right do i = imin+1, imax-1 ! compute phi(i-1) x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1) x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1) y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge phi(i-1) = atan2( sqrt(x1**2_${ik}$+x2**2_${ik}$), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11e(i-1)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21e(i-1)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12d(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i-1,i+1), b12(i-1,i), ! b21(i-1,i+1), and b22(i-1,i). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart21 ) then call stdlib${ii}$_${ri}$lartgp( x2, x1, work(iv1tsn+i-1), work(iv1tcs+i-1),r ) else if( .not. restart11 .and. restart21 ) then call stdlib${ii}$_${ri}$lartgp( b11bulge, b11e(i-1), work(iv1tsn+i-1),work(iv1tcs+i-1), & r ) else if( restart11 .and. .not. restart21 ) then call stdlib${ii}$_${ri}$lartgp( b21bulge, b21e(i-1), work(iv1tsn+i-1),work(iv1tcs+i-1), & r ) else if( mu <= nu ) then call stdlib${ii}$_${ri}$lartgs( b11d(i), b11e(i), mu, work(iv1tcs+i-1),work(iv1tsn+i-1) ) else call stdlib${ii}$_${ri}$lartgs( b21d(i), b21e(i), nu, work(iv1tcs+i-1),work(iv1tsn+i-1) ) end if work(iv1tcs+i-1) = -work(iv1tcs+i-1) work(iv1tsn+i-1) = -work(iv1tsn+i-1) if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_${ri}$lartgp( y2, y1, work(iv2tsn+i-1-1),work(iv2tcs+i-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_${ri}$lartgp( b12bulge, b12d(i-1), work(iv2tsn+i-1-1),work(iv2tcs+i-1-& 1_${ik}$), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_${ri}$lartgp( b22bulge, b22d(i-1), work(iv2tsn+i-1-1),work(iv2tcs+i-1-& 1_${ik}$), r ) else if( nu < mu ) then call stdlib${ii}$_${ri}$lartgs( b12e(i-1), b12d(i), nu, work(iv2tcs+i-1-1),work(iv2tsn+i-& 1_${ik}$-1) ) else call stdlib${ii}$_${ri}$lartgs( b22e(i-1), b22d(i), mu, work(iv2tcs+i-1-1),work(iv2tsn+i-& 1_${ik}$-1) ) end if temp = work(iv1tcs+i-1)*b11d(i) + work(iv1tsn+i-1)*b11e(i) b11e(i) = work(iv1tcs+i-1)*b11e(i) -work(iv1tsn+i-1)*b11d(i) b11d(i) = temp b11bulge = work(iv1tsn+i-1)*b11d(i+1) b11d(i+1) = work(iv1tcs+i-1)*b11d(i+1) temp = work(iv1tcs+i-1)*b21d(i) + work(iv1tsn+i-1)*b21e(i) b21e(i) = work(iv1tcs+i-1)*b21e(i) -work(iv1tsn+i-1)*b21d(i) b21d(i) = temp b21bulge = work(iv1tsn+i-1)*b21d(i+1) b21d(i+1) = work(iv1tcs+i-1)*b21d(i+1) temp = work(iv2tcs+i-1-1)*b12e(i-1) +work(iv2tsn+i-1-1)*b12d(i) b12d(i) = work(iv2tcs+i-1-1)*b12d(i) -work(iv2tsn+i-1-1)*b12e(i-1) b12e(i-1) = temp b12bulge = work(iv2tsn+i-1-1)*b12e(i) b12e(i) = work(iv2tcs+i-1-1)*b12e(i) temp = work(iv2tcs+i-1-1)*b22e(i-1) +work(iv2tsn+i-1-1)*b22d(i) b22d(i) = work(iv2tcs+i-1-1)*b22d(i) -work(iv2tsn+i-1-1)*b22e(i-1) b22e(i-1) = temp b22bulge = work(iv2tsn+i-1-1)*b22e(i) b22e(i) = work(iv2tcs+i-1-1)*b22e(i) ! compute theta(i) x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1) x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1) y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge theta(i) = atan2( sqrt(y1**2_${ik}$+y2**2_${ik}$), sqrt(x1**2_${ik}$+x2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11d(i)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12e(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21d(i)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22e(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i+1,i), b12(i+1,i-1), ! b21(i+1,i), and b22(i+1,i-1). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart12 ) then call stdlib${ii}$_${ri}$lartgp( x2, x1, work(iu1sn+i-1), work(iu1cs+i-1),r ) else if( .not. restart11 .and. restart12 ) then call stdlib${ii}$_${ri}$lartgp( b11bulge, b11d(i), work(iu1sn+i-1),work(iu1cs+i-1), r ) else if( restart11 .and. .not. restart12 ) then call stdlib${ii}$_${ri}$lartgp( b12bulge, b12e(i-1), work(iu1sn+i-1),work(iu1cs+i-1), r ) else if( mu <= nu ) then call stdlib${ii}$_${ri}$lartgs( b11e(i), b11d(i+1), mu, work(iu1cs+i-1),work(iu1sn+i-1) ) else call stdlib${ii}$_${ri}$lartgs( b12d(i), b12e(i), nu, work(iu1cs+i-1),work(iu1sn+i-1) ) end if if( .not. restart21 .and. .not. restart22 ) then call stdlib${ii}$_${ri}$lartgp( y2, y1, work(iu2sn+i-1), work(iu2cs+i-1),r ) else if( .not. restart21 .and. restart22 ) then call stdlib${ii}$_${ri}$lartgp( b21bulge, b21d(i), work(iu2sn+i-1),work(iu2cs+i-1), r ) else if( restart21 .and. .not. restart22 ) then call stdlib${ii}$_${ri}$lartgp( b22bulge, b22e(i-1), work(iu2sn+i-1),work(iu2cs+i-1), r ) else if( nu < mu ) then call stdlib${ii}$_${ri}$lartgs( b21e(i), b21e(i+1), nu, work(iu2cs+i-1),work(iu2sn+i-1) ) else call stdlib${ii}$_${ri}$lartgs( b22d(i), b22e(i), mu, work(iu2cs+i-1),work(iu2sn+i-1) ) end if work(iu2cs+i-1) = -work(iu2cs+i-1) work(iu2sn+i-1) = -work(iu2sn+i-1) temp = work(iu1cs+i-1)*b11e(i) + work(iu1sn+i-1)*b11d(i+1) b11d(i+1) = work(iu1cs+i-1)*b11d(i+1) -work(iu1sn+i-1)*b11e(i) b11e(i) = temp if( i < imax - 1_${ik}$ ) then b11bulge = work(iu1sn+i-1)*b11e(i+1) b11e(i+1) = work(iu1cs+i-1)*b11e(i+1) end if temp = work(iu2cs+i-1)*b21e(i) + work(iu2sn+i-1)*b21d(i+1) b21d(i+1) = work(iu2cs+i-1)*b21d(i+1) -work(iu2sn+i-1)*b21e(i) b21e(i) = temp if( i < imax - 1_${ik}$ ) then b21bulge = work(iu2sn+i-1)*b21e(i+1) b21e(i+1) = work(iu2cs+i-1)*b21e(i+1) end if temp = work(iu1cs+i-1)*b12d(i) + work(iu1sn+i-1)*b12e(i) b12e(i) = work(iu1cs+i-1)*b12e(i) - work(iu1sn+i-1)*b12d(i) b12d(i) = temp b12bulge = work(iu1sn+i-1)*b12d(i+1) b12d(i+1) = work(iu1cs+i-1)*b12d(i+1) temp = work(iu2cs+i-1)*b22d(i) + work(iu2sn+i-1)*b22e(i) b22e(i) = work(iu2cs+i-1)*b22e(i) - work(iu2sn+i-1)*b22d(i) b22d(i) = temp b22bulge = work(iu2sn+i-1)*b22d(i+1) b22d(i+1) = work(iu2cs+i-1)*b22d(i+1) end do ! compute phi(imax-1) x1 = sin(theta(imax-1))*b11e(imax-1) +cos(theta(imax-1))*b21e(imax-1) y1 = sin(theta(imax-1))*b12d(imax-1) +cos(theta(imax-1))*b22d(imax-1) y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge phi(imax-1) = atan2( abs(x1), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! chase bulges from b12(imax-1,imax) and b22(imax-1,imax) restart12 = b12d(imax-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(imax-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_${ri}$lartgp( y2, y1, work(iv2tsn+imax-1-1),work(iv2tcs+imax-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_${ri}$lartgp( b12bulge, b12d(imax-1), work(iv2tsn+imax-1-1),work(iv2tcs+& imax-1-1), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_${ri}$lartgp( b22bulge, b22d(imax-1), work(iv2tsn+imax-1-1),work(iv2tcs+& imax-1-1), r ) else if( nu < mu ) then call stdlib${ii}$_${ri}$lartgs( b12e(imax-1), b12d(imax), nu,work(iv2tcs+imax-1-1), work(& iv2tsn+imax-1-1) ) else call stdlib${ii}$_${ri}$lartgs( b22e(imax-1), b22d(imax), mu,work(iv2tcs+imax-1-1), work(& iv2tsn+imax-1-1) ) end if temp = work(iv2tcs+imax-1-1)*b12e(imax-1) +work(iv2tsn+imax-1-1)*b12d(imax) b12d(imax) = work(iv2tcs+imax-1-1)*b12d(imax) -work(iv2tsn+imax-1-1)*b12e(imax-1) b12e(imax-1) = temp temp = work(iv2tcs+imax-1-1)*b22e(imax-1) +work(iv2tsn+imax-1-1)*b22d(imax) b22d(imax) = work(iv2tcs+imax-1-1)*b22d(imax) -work(iv2tsn+imax-1-1)*b22e(imax-1) b22e(imax-1) = temp ! update singular vectors if( wantu1 ) then if( colmajor ) then call stdlib${ii}$_${ri}$lasr( 'R', 'V', 'F', p, imax-imin+1,work(iu1cs+imin-1), work(& iu1sn+imin-1),u1(1_${ik}$,imin), ldu1 ) else call stdlib${ii}$_${ri}$lasr( 'L', 'V', 'F', imax-imin+1, p,work(iu1cs+imin-1), work(& iu1sn+imin-1),u1(imin,1_${ik}$), ldu1 ) end if end if if( wantu2 ) then if( colmajor ) then call stdlib${ii}$_${ri}$lasr( 'R', 'V', 'F', m-p, imax-imin+1,work(iu2cs+imin-1), work(& iu2sn+imin-1),u2(1_${ik}$,imin), ldu2 ) else call stdlib${ii}$_${ri}$lasr( 'L', 'V', 'F', imax-imin+1, m-p,work(iu2cs+imin-1), work(& iu2sn+imin-1),u2(imin,1_${ik}$), ldu2 ) end if end if if( wantv1t ) then if( colmajor ) then call stdlib${ii}$_${ri}$lasr( 'L', 'V', 'F', imax-imin+1, q,work(iv1tcs+imin-1), work(& iv1tsn+imin-1),v1t(imin,1_${ik}$), ldv1t ) else call stdlib${ii}$_${ri}$lasr( 'R', 'V', 'F', q, imax-imin+1,work(iv1tcs+imin-1), work(& iv1tsn+imin-1),v1t(1_${ik}$,imin), ldv1t ) end if end if if( wantv2t ) then if( colmajor ) then call stdlib${ii}$_${ri}$lasr( 'L', 'V', 'F', imax-imin+1, m-q,work(iv2tcs+imin-1), work(& iv2tsn+imin-1),v2t(imin,1_${ik}$), ldv2t ) else call stdlib${ii}$_${ri}$lasr( 'R', 'V', 'F', m-q, imax-imin+1,work(iv2tcs+imin-1), work(& iv2tsn+imin-1),v2t(1_${ik}$,imin), ldv2t ) end if end if ! fix signs on b11(imax-1,imax) and b21(imax-1,imax) if( b11e(imax-1)+b21e(imax-1) > 0_${ik}$ ) then b11d(imax) = -b11d(imax) b21d(imax) = -b21d(imax) if( wantv1t ) then if( colmajor ) then call stdlib${ii}$_${ri}$scal( q, negone, v1t(imax,1_${ik}$), ldv1t ) else call stdlib${ii}$_${ri}$scal( q, negone, v1t(1_${ik}$,imax), 1_${ik}$ ) end if end if end if ! compute theta(imax) x1 = cos(phi(imax-1))*b11d(imax) +sin(phi(imax-1))*b12e(imax-1) y1 = cos(phi(imax-1))*b21d(imax) +sin(phi(imax-1))*b22e(imax-1) theta(imax) = atan2( abs(y1), abs(x1) ) ! fix signs on b11(imax,imax), b12(imax,imax-1), b21(imax,imax), ! and b22(imax,imax-1) if( b11d(imax)+b12e(imax-1) < 0_${ik}$ ) then b12d(imax) = -b12d(imax) if( wantu1 ) then if( colmajor ) then call stdlib${ii}$_${ri}$scal( p, negone, u1(1_${ik}$,imax), 1_${ik}$ ) else call stdlib${ii}$_${ri}$scal( p, negone, u1(imax,1_${ik}$), ldu1 ) end if end if end if if( b21d(imax)+b22e(imax-1) > 0_${ik}$ ) then b22d(imax) = -b22d(imax) if( wantu2 ) then if( colmajor ) then call stdlib${ii}$_${ri}$scal( m-p, negone, u2(1_${ik}$,imax), 1_${ik}$ ) else call stdlib${ii}$_${ri}$scal( m-p, negone, u2(imax,1_${ik}$), ldu2 ) end if end if end if ! fix signs on b12(imax,imax) and b22(imax,imax) if( b12d(imax)+b22d(imax) < 0_${ik}$ ) then if( wantv2t ) then if( colmajor ) then call stdlib${ii}$_${ri}$scal( m-q, negone, v2t(imax,1_${ik}$), ldv2t ) else call stdlib${ii}$_${ri}$scal( m-q, negone, v2t(1_${ik}$,imax), 1_${ik}$ ) end if end if end if ! test for negligible sines or cosines do i = imin, imax if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = imin, imax-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! deflate if (imax > 1_${ik}$) then do while( phi(imax-1) == zero ) imax = imax - 1_${ik}$ if (imax <= 1) exit end do end if if( imin > imax - 1_${ik}$ )imin = imax - 1_${ik}$ if (imin > 1_${ik}$) then do while (phi(imin-1) /= zero) imin = imin - 1_${ik}$ if (imin <= 1) exit end do end if ! repeat main iteration loop end do ! postprocessing: order theta from least to greatest do i = 1, q mini = i thetamin = theta(i) do j = i+1, q if( theta(j) < thetamin ) then mini = j thetamin = theta(j) end if end do if( mini /= i ) then theta(mini) = theta(i) theta(i) = thetamin if( colmajor ) then if( wantu1 )call stdlib${ii}$_${ri}$swap( p, u1(1_${ik}$,i), 1_${ik}$, u1(1_${ik}$,mini), 1_${ik}$ ) if( wantu2 )call stdlib${ii}$_${ri}$swap( m-p, u2(1_${ik}$,i), 1_${ik}$, u2(1_${ik}$,mini), 1_${ik}$ ) if( wantv1t )call stdlib${ii}$_${ri}$swap( q, v1t(i,1_${ik}$), ldv1t, v1t(mini,1_${ik}$), ldv1t ) if( wantv2t )call stdlib${ii}$_${ri}$swap( m-q, v2t(i,1_${ik}$), ldv2t, v2t(mini,1_${ik}$),ldv2t ) else if( wantu1 )call stdlib${ii}$_${ri}$swap( p, u1(i,1_${ik}$), ldu1, u1(mini,1_${ik}$), ldu1 ) if( wantu2 )call stdlib${ii}$_${ri}$swap( m-p, u2(i,1_${ik}$), ldu2, u2(mini,1_${ik}$), ldu2 ) if( wantv1t )call stdlib${ii}$_${ri}$swap( q, v1t(1_${ik}$,i), 1_${ik}$, v1t(1_${ik}$,mini), 1_${ik}$ ) if( wantv2t )call stdlib${ii}$_${ri}$swap( m-q, v2t(1_${ik}$,i), 1_${ik}$, v2t(1_${ik}$,mini), 1_${ik}$ ) end if end if end do return end subroutine stdlib${ii}$_${ri}$bbcsd #:endif #:endfor pure module subroutine stdlib${ii}$_cbbcsd( jobu1, jobu2, jobv1t, jobv2t, trans, m, p, q,theta, phi, u1, & ldu1, u2, ldu2, v1t, ldv1t,v2t, ldv2t, b11d, b11e, b12d, b12e, b21d, b21e,b22d, b22e, rwork, & lrwork, info ) !! CBBCSD computes the CS decomposition of a unitary matrix in !! bidiagonal-block form, !! [ B11 | B12 0 0 ] !! [ 0 | 0 -I 0 ] !! X = [----------------] !! [ B21 | B22 0 0 ] !! [ 0 | 0 0 I ] !! [ C | -S 0 0 ] !! [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**H !! = [---------] [---------------] [---------] . !! [ | U2 ] [ S | C 0 0 ] [ | V2 ] !! [ 0 | 0 0 I ] !! X is M-by-M, its top-left block is P-by-Q, and Q must be no larger !! than P, M-P, or M-Q. (If Q is not the smallest index, then X must be !! transposed and/or permuted. This can be done in constant time using !! the TRANS and SIGNS options. See CUNCSD for details.) !! The bidiagonal matrices B11, B12, B21, and B22 are represented !! implicitly by angles THETA(1:Q) and PHI(1:Q-1). !! The unitary matrices U1, U2, V1T, and V2T are input/output. !! The input matrices are pre- or post-multiplied by the appropriate !! singular vector matrices. ! -- lapack computational routine -- ! -- lapack is a software package provided by univ. of tennessee, -- ! -- univ. of california berkeley, univ. of colorado denver and nag ltd..-- use stdlib_blas_constants_sp, only: negone, zero, one, ten, cnegone ! Scalar Arguments character, intent(in) :: jobu1, jobu2, jobv1t, jobv2t, trans integer(${ik}$), intent(out) :: info integer(${ik}$), intent(in) :: ldu1, ldu2, ldv1t, ldv2t, lrwork, m, p, q ! Array Arguments real(sp), intent(out) :: b11d(*), b11e(*), b12d(*), b12e(*), b21d(*), b21e(*), b22d(*),& b22e(*), rwork(*) real(sp), intent(inout) :: phi(*), theta(*) complex(sp), intent(inout) :: u1(ldu1,*), u2(ldu2,*), v1t(ldv1t,*), v2t(ldv2t,*) ! =================================================================== ! Parameters integer(${ik}$), parameter :: maxitr = 6_${ik}$ real(sp), parameter :: hundred = 100.0_sp real(sp), parameter :: meighth = -0.125_sp real(sp), parameter :: piover2 = 1.57079632679489661923132169163975144210_sp ! Local Scalars logical(lk) :: colmajor, lquery, restart11, restart12, restart21, restart22, wantu1, & wantu2, wantv1t, wantv2t integer(${ik}$) :: i, imin, imax, iter, iu1cs, iu1sn, iu2cs, iu2sn, iv1tcs, iv1tsn, & iv2tcs, iv2tsn, j, lrworkmin, lrworkopt, maxit, mini real(sp) :: b11bulge, b12bulge, b21bulge, b22bulge, dummy, eps, mu, nu, r, sigma11, & sigma21, temp, thetamax, thetamin, thresh, tol, tolmul, unfl, x1, x2, y1, y2 ! Intrinsic Functions ! Executable Statements ! test input arguments info = 0_${ik}$ lquery = lrwork == -1_${ik}$ wantu1 = stdlib_lsame( jobu1, 'Y' ) wantu2 = stdlib_lsame( jobu2, 'Y' ) wantv1t = stdlib_lsame( jobv1t, 'Y' ) wantv2t = stdlib_lsame( jobv2t, 'Y' ) colmajor = .not. stdlib_lsame( trans, 'T' ) if( m < 0_${ik}$ ) then info = -6_${ik}$ else if( p < 0_${ik}$ .or. p > m ) then info = -7_${ik}$ else if( q < 0_${ik}$ .or. q > m ) then info = -8_${ik}$ else if( q > p .or. q > m-p .or. q > m-q ) then info = -8_${ik}$ else if( wantu1 .and. ldu1 < p ) then info = -12_${ik}$ else if( wantu2 .and. ldu2 < m-p ) then info = -14_${ik}$ else if( wantv1t .and. ldv1t < q ) then info = -16_${ik}$ else if( wantv2t .and. ldv2t < m-q ) then info = -18_${ik}$ end if ! quick return if q = 0 if( info == 0_${ik}$ .and. q == 0_${ik}$ ) then lrworkmin = 1_${ik}$ rwork(1_${ik}$) = lrworkmin return end if ! compute workspace if( info == 0_${ik}$ ) then iu1cs = 1_${ik}$ iu1sn = iu1cs + q iu2cs = iu1sn + q iu2sn = iu2cs + q iv1tcs = iu2sn + q iv1tsn = iv1tcs + q iv2tcs = iv1tsn + q iv2tsn = iv2tcs + q lrworkopt = iv2tsn + q - 1_${ik}$ lrworkmin = lrworkopt rwork(1_${ik}$) = lrworkopt if( lrwork < lrworkmin .and. .not. lquery ) then info = -28_${ik}$ end if end if if( info /= 0_${ik}$ ) then call stdlib${ii}$_xerbla( 'CBBCSD', -info ) return else if( lquery ) then return end if ! get machine constants eps = stdlib${ii}$_slamch( 'EPSILON' ) unfl = stdlib${ii}$_slamch( 'SAFE MINIMUM' ) tolmul = max( ten, min( hundred, eps**meighth ) ) tol = tolmul*eps thresh = max( tol, maxitr*q*q*unfl ) ! test for negligible sines or cosines do i = 1, q if( theta(i) < thresh ) then theta(i) = zero else if( theta(i) > piover2-thresh ) then theta(i) = piover2 end if end do do i = 1, q-1 if( phi(i) < thresh ) then phi(i) = zero else if( phi(i) > piover2-thresh ) then phi(i) = piover2 end if end do ! initial deflation imax = q do while( imax > 1 ) if( phi(imax-1) /= zero ) then exit end if imax = imax - 1_${ik}$ end do imin = imax - 1_${ik}$ if ( imin > 1_${ik}$ ) then do while( phi(imin-1) /= zero ) imin = imin - 1_${ik}$ if ( imin <= 1 ) exit end do end if ! initialize iteration counter maxit = maxitr*q*q iter = 0_${ik}$ ! begin main iteration loop do while( imax > 1 ) ! compute the matrix entries b11d(imin) = cos( theta(imin) ) b21d(imin) = -sin( theta(imin) ) do i = imin, imax - 1 b11e(i) = -sin( theta(i) ) * sin( phi(i) ) b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) ) b12d(i) = sin( theta(i) ) * cos( phi(i) ) b12e(i) = cos( theta(i+1) ) * sin( phi(i) ) b21e(i) = -cos( theta(i) ) * sin( phi(i) ) b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) ) b22d(i) = cos( theta(i) ) * cos( phi(i) ) b22e(i) = -sin( theta(i+1) ) * sin( phi(i) ) end do b12d(imax) = sin( theta(imax) ) b22d(imax) = cos( theta(imax) ) ! abort if not converging; otherwise, increment iter if( iter > maxit ) then info = 0_${ik}$ do i = 1, q if( phi(i) /= zero )info = info + 1_${ik}$ end do return end if iter = iter + imax - imin ! compute shifts thetamax = theta(imin) thetamin = theta(imin) do i = imin+1, imax if( theta(i) > thetamax )thetamax = theta(i) if( theta(i) < thetamin )thetamin = theta(i) end do if( thetamax > piover2 - thresh ) then ! zero on diagonals of b11 and b22; induce deflation with a ! zero shift mu = zero nu = one else if( thetamin < thresh ) then ! zero on diagonals of b12 and b22; induce deflation with a ! zero shift mu = one nu = zero else ! compute shifts for b11 and b21 and use the lesser call stdlib${ii}$_slas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,dummy ) call stdlib${ii}$_slas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,dummy ) if( sigma11 <= sigma21 ) then mu = sigma11 nu = sqrt( one - mu**2_${ik}$ ) if( mu < thresh ) then mu = zero nu = one end if else nu = sigma21 mu = sqrt( one - nu**2_${ik}$ ) if( nu < thresh ) then mu = one nu = zero end if end if end if ! rotate to produce bulges in b11 and b21 if( mu <= nu ) then call stdlib${ii}$_slartgs( b11d(imin), b11e(imin), mu,rwork(iv1tcs+imin-1), rwork(& iv1tsn+imin-1) ) else call stdlib${ii}$_slartgs( b21d(imin), b21e(imin), nu,rwork(iv1tcs+imin-1), rwork(& iv1tsn+imin-1) ) end if temp = rwork(iv1tcs+imin-1)*b11d(imin) +rwork(iv1tsn+imin-1)*b11e(imin) b11e(imin) = rwork(iv1tcs+imin-1)*b11e(imin) -rwork(iv1tsn+imin-1)*b11d(imin) b11d(imin) = temp b11bulge = rwork(iv1tsn+imin-1)*b11d(imin+1) b11d(imin+1) = rwork(iv1tcs+imin-1)*b11d(imin+1) temp = rwork(iv1tcs+imin-1)*b21d(imin) +rwork(iv1tsn+imin-1)*b21e(imin) b21e(imin) = rwork(iv1tcs+imin-1)*b21e(imin) -rwork(iv1tsn+imin-1)*b21d(imin) b21d(imin) = temp b21bulge = rwork(iv1tsn+imin-1)*b21d(imin+1) b21d(imin+1) = rwork(iv1tcs+imin-1)*b21d(imin+1) ! compute theta(imin) theta( imin ) = atan2( sqrt( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ ),sqrt( b11d(imin)**2_${ik}$+& b11bulge**2_${ik}$ ) ) ! chase the bulges in b11(imin+1,imin) and b21(imin+1,imin) if( b11d(imin)**2_${ik}$+b11bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_slartgp( b11bulge, b11d(imin), rwork(iu1sn+imin-1),rwork(iu1cs+imin-& 1_${ik}$), r ) else if( mu <= nu ) then call stdlib${ii}$_slartgs( b11e( imin ), b11d( imin + 1_${ik}$ ), mu,rwork(iu1cs+imin-1), & rwork(iu1sn+imin-1) ) else call stdlib${ii}$_slartgs( b12d( imin ), b12e( imin ), nu,rwork(iu1cs+imin-1), rwork(& iu1sn+imin-1) ) end if if( b21d(imin)**2_${ik}$+b21bulge**2_${ik}$ > thresh**2_${ik}$ ) then call stdlib${ii}$_slartgp( b21bulge, b21d(imin), rwork(iu2sn+imin-1),rwork(iu2cs+imin-& 1_${ik}$), r ) else if( nu < mu ) then call stdlib${ii}$_slartgs( b21e( imin ), b21d( imin + 1_${ik}$ ), nu,rwork(iu2cs+imin-1), & rwork(iu2sn+imin-1) ) else call stdlib${ii}$_slartgs( b22d(imin), b22e(imin), mu,rwork(iu2cs+imin-1), rwork(iu2sn+& imin-1) ) end if rwork(iu2cs+imin-1) = -rwork(iu2cs+imin-1) rwork(iu2sn+imin-1) = -rwork(iu2sn+imin-1) temp = rwork(iu1cs+imin-1)*b11e(imin) +rwork(iu1sn+imin-1)*b11d(imin+1) b11d(imin+1) = rwork(iu1cs+imin-1)*b11d(imin+1) -rwork(iu1sn+imin-1)*b11e(imin) b11e(imin) = temp if( imax > imin+1 ) then b11bulge = rwork(iu1sn+imin-1)*b11e(imin+1) b11e(imin+1) = rwork(iu1cs+imin-1)*b11e(imin+1) end if temp = rwork(iu1cs+imin-1)*b12d(imin) +rwork(iu1sn+imin-1)*b12e(imin) b12e(imin) = rwork(iu1cs+imin-1)*b12e(imin) -rwork(iu1sn+imin-1)*b12d(imin) b12d(imin) = temp b12bulge = rwork(iu1sn+imin-1)*b12d(imin+1) b12d(imin+1) = rwork(iu1cs+imin-1)*b12d(imin+1) temp = rwork(iu2cs+imin-1)*b21e(imin) +rwork(iu2sn+imin-1)*b21d(imin+1) b21d(imin+1) = rwork(iu2cs+imin-1)*b21d(imin+1) -rwork(iu2sn+imin-1)*b21e(imin) b21e(imin) = temp if( imax > imin+1 ) then b21bulge = rwork(iu2sn+imin-1)*b21e(imin+1) b21e(imin+1) = rwork(iu2cs+imin-1)*b21e(imin+1) end if temp = rwork(iu2cs+imin-1)*b22d(imin) +rwork(iu2sn+imin-1)*b22e(imin) b22e(imin) = rwork(iu2cs+imin-1)*b22e(imin) -rwork(iu2sn+imin-1)*b22d(imin) b22d(imin) = temp b22bulge = rwork(iu2sn+imin-1)*b22d(imin+1) b22d(imin+1) = rwork(iu2cs+imin-1)*b22d(imin+1) ! inner loop: chase bulges from b11(imin,imin+2), ! b12(imin,imin+1), b21(imin,imin+2), and b22(imin,imin+1) to ! bottom-right do i = imin+1, imax-1 ! compute phi(i-1) x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1) x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1) y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge phi(i-1) = atan2( sqrt(x1**2_${ik}$+x2**2_${ik}$), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11e(i-1)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21e(i-1)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12d(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i-1,i+1), b12(i-1,i), ! b21(i-1,i+1), and b22(i-1,i). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart21 ) then call stdlib${ii}$_slartgp( x2, x1, rwork(iv1tsn+i-1),rwork(iv1tcs+i-1), r ) else if( .not. restart11 .and. restart21 ) then call stdlib${ii}$_slartgp( b11bulge, b11e(i-1), rwork(iv1tsn+i-1),rwork(iv1tcs+i-1),& r ) else if( restart11 .and. .not. restart21 ) then call stdlib${ii}$_slartgp( b21bulge, b21e(i-1), rwork(iv1tsn+i-1),rwork(iv1tcs+i-1),& r ) else if( mu <= nu ) then call stdlib${ii}$_slartgs( b11d(i), b11e(i), mu, rwork(iv1tcs+i-1),rwork(iv1tsn+i-1)& ) else call stdlib${ii}$_slartgs( b21d(i), b21e(i), nu, rwork(iv1tcs+i-1),rwork(iv1tsn+i-1)& ) end if rwork(iv1tcs+i-1) = -rwork(iv1tcs+i-1) rwork(iv1tsn+i-1) = -rwork(iv1tsn+i-1) if( .not. restart12 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( y2, y1, rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-1-1), r ) else if( .not. restart12 .and. restart22 ) then call stdlib${ii}$_slartgp( b12bulge, b12d(i-1), rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-& 1_${ik}$-1), r ) else if( restart12 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( b22bulge, b22d(i-1), rwork(iv2tsn+i-1-1),rwork(iv2tcs+i-& 1_${ik}$-1), r ) else if( nu < mu ) then call stdlib${ii}$_slartgs( b12e(i-1), b12d(i), nu,rwork(iv2tcs+i-1-1), rwork(iv2tsn+& i-1-1) ) else call stdlib${ii}$_slartgs( b22e(i-1), b22d(i), mu,rwork(iv2tcs+i-1-1), rwork(iv2tsn+& i-1-1) ) end if temp = rwork(iv1tcs+i-1)*b11d(i) + rwork(iv1tsn+i-1)*b11e(i) b11e(i) = rwork(iv1tcs+i-1)*b11e(i) -rwork(iv1tsn+i-1)*b11d(i) b11d(i) = temp b11bulge = rwork(iv1tsn+i-1)*b11d(i+1) b11d(i+1) = rwork(iv1tcs+i-1)*b11d(i+1) temp = rwork(iv1tcs+i-1)*b21d(i) + rwork(iv1tsn+i-1)*b21e(i) b21e(i) = rwork(iv1tcs+i-1)*b21e(i) -rwork(iv1tsn+i-1)*b21d(i) b21d(i) = temp b21bulge = rwork(iv1tsn+i-1)*b21d(i+1) b21d(i+1) = rwork(iv1tcs+i-1)*b21d(i+1) temp = rwork(iv2tcs+i-1-1)*b12e(i-1) +rwork(iv2tsn+i-1-1)*b12d(i) b12d(i) = rwork(iv2tcs+i-1-1)*b12d(i) -rwork(iv2tsn+i-1-1)*b12e(i-1) b12e(i-1) = temp b12bulge = rwork(iv2tsn+i-1-1)*b12e(i) b12e(i) = rwork(iv2tcs+i-1-1)*b12e(i) temp = rwork(iv2tcs+i-1-1)*b22e(i-1) +rwork(iv2tsn+i-1-1)*b22d(i) b22d(i) = rwork(iv2tcs+i-1-1)*b22d(i) -rwork(iv2tsn+i-1-1)*b22e(i-1) b22e(i-1) = temp b22bulge = rwork(iv2tsn+i-1-1)*b22e(i) b22e(i) = rwork(iv2tcs+i-1-1)*b22e(i) ! compute theta(i) x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1) x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1) y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge theta(i) = atan2( sqrt(y1**2_${ik}$+y2**2_${ik}$), sqrt(x1**2_${ik}$+x2**2_${ik}$) ) ! determine if there are bulges to chase or if a new direct ! summand has been reached restart11 = b11d(i)**2_${ik}$ + b11bulge**2_${ik}$ <= thresh**2_${ik}$ restart12 = b12e(i-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart21 = b21d(i)**2_${ik}$ + b21bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22e(i-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ ! if possible, chase bulges from b11(i+1,i), b12(i+1,i-1), ! b21(i+1,i), and b22(i+1,i-1). if necessary, restart bulge- ! chasing by applying the original shift again. if( .not. restart11 .and. .not. restart12 ) then call stdlib${ii}$_slartgp( x2, x1, rwork(iu1sn+i-1), rwork(iu1cs+i-1),r ) else if( .not. restart11 .and. restart12 ) then call stdlib${ii}$_slartgp( b11bulge, b11d(i), rwork(iu1sn+i-1),rwork(iu1cs+i-1), r ) else if( restart11 .and. .not. restart12 ) then call stdlib${ii}$_slartgp( b12bulge, b12e(i-1), rwork(iu1sn+i-1),rwork(iu1cs+i-1), & r ) else if( mu <= nu ) then call stdlib${ii}$_slartgs( b11e(i), b11d(i+1), mu, rwork(iu1cs+i-1),rwork(iu1sn+i-1)& ) else call stdlib${ii}$_slartgs( b12d(i), b12e(i), nu, rwork(iu1cs+i-1),rwork(iu1sn+i-1) ) end if if( .not. restart21 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( y2, y1, rwork(iu2sn+i-1), rwork(iu2cs+i-1),r ) else if( .not. restart21 .and. restart22 ) then call stdlib${ii}$_slartgp( b21bulge, b21d(i), rwork(iu2sn+i-1),rwork(iu2cs+i-1), r ) else if( restart21 .and. .not. restart22 ) then call stdlib${ii}$_slartgp( b22bulge, b22e(i-1), rwork(iu2sn+i-1),rwork(iu2cs+i-1), & r ) else if( nu < mu ) then call stdlib${ii}$_slartgs( b21e(i), b21e(i+1), nu, rwork(iu2cs+i-1),rwork(iu2sn+i-1)& ) else call stdlib${ii}$_slartgs( b22d(i), b22e(i), mu, rwork(iu2cs+i-1),rwork(iu2sn+i-1) ) end if rwork(iu2cs+i-1) = -rwork(iu2cs+i-1) rwork(iu2sn+i-1) = -rwork(iu2sn+i-1) temp = rwork(iu1cs+i-1)*b11e(i) + rwork(iu1sn+i-1)*b11d(i+1) b11d(i+1) = rwork(iu1cs+i-1)*b11d(i+1) -rwork(iu1sn+i-1)*b11e(i) b11e(i) = temp if( i < imax - 1_${ik}$ ) then b11bulge = rwork(iu1sn+i-1)*b11e(i+1) b11e(i+1) = rwork(iu1cs+i-1)*b11e(i+1) end if temp = rwork(iu2cs+i-1)*b21e(i) + rwork(iu2sn+i-1)*b21d(i+1) b21d(i+1) = rwork(iu2cs+i-1)*b21d(i+1) -rwork(iu2sn+i-1)*b21e(i) b21e(i) = temp if( i < imax - 1_${ik}$ ) then b21bulge = rwork(iu2sn+i-1)*b21e(i+1) b21e(i+1) = rwork(iu2cs+i-1)*b21e(i+1) end if temp = rwork(iu1cs+i-1)*b12d(i) + rwork(iu1sn+i-1)*b12e(i) b12e(i) = rwork(iu1cs+i-1)*b12e(i) -rwork(iu1sn+i-1)*b12d(i) b12d(i) = temp b12bulge = rwork(iu1sn+i-1)*b12d(i+1) b12d(i+1) = rwork(iu1cs+i-1)*b12d(i+1) temp = rwork(iu2cs+i-1)*b22d(i) + rwork(iu2sn+i-1)*b22e(i) b22e(i) = rwork(iu2cs+i-1)*b22e(i) -rwork(iu2sn+i-1)*b22d(i) b22d(i) = temp b22bulge = rwork(iu2sn+i-1)*b22d(i+1) b22d(i+1) = rwork(iu2cs+i-1)*b22d(i+1) end do ! compute phi(imax-1) x1 = sin(theta(imax-1))*b11e(imax-1) +cos(theta(imax-1))*b21e(imax-1) y1 = sin(theta(imax-1))*b12d(imax-1) +cos(theta(imax-1))*b22d(imax-1) y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge phi(imax-1) = atan2( abs(x1), sqrt(y1**2_${ik}$+y2**2_${ik}$) ) ! chase bulges from b12(imax-1,imax) and b22(imax-1,imax) restart12 = b12d(imax-1)**2_${ik}$ + b12bulge**2_${ik}$ <= thresh**2_${ik}$ restart22 = b22d(imax-1)**2_${ik}$ + b22bulge**2_${ik}$ <= thresh**2_${ik}$ if( .not. restart12