#:include "common.fypp" #:set RC_KINDS_TYPES = REAL_KINDS_TYPES + CMPLX_KINDS_TYPES submodule (stdlib_linalg) stdlib_linalg_qr use stdlib_linalg_constants use stdlib_linalg_lapack, only: geqrf, geqp3, orgqr, ungqr use stdlib_linalg_lapack_aux, only: handle_geqrf_info, handle_orgqr_info, handle_geqp3_info use stdlib_linalg_state, only: linalg_state_type, linalg_error_handling, LINALG_ERROR, & LINALG_INTERNAL_ERROR, LINALG_VALUE_ERROR implicit none character(*), parameter :: this = 'qr' contains ! Check problem size and evaluate whether full/reduced problem is requested pure subroutine check_problem_size(m,n,q1,q2,r1,r2,err,reduced) integer(ilp), intent(in) :: m,n,q1,q2,r1,r2 type(linalg_state_type), intent(out) :: err logical, intent(out) :: reduced integer(ilp) :: k k = min(m,n) reduced = .false. ! Check sizes if (m<1 .or. n<1) then err = linalg_state_type(this,LINALG_VALUE_ERROR,'invalid matrix size: a(m,n)=',[m,n]) else ! Check if we should operate on reduced full QR ! - Reduced: shape(Q)==[m,k], shape(R)==[k,n] ! - Full : shape(Q)==[m,m], shape(R)==[m,n] if (all([q1,q2]==[m,k] .and. [r1,r2]==[k,n])) then reduced = .true. elseif (all([q1,q2]==[m,m] .and. [r1,r2]==[m,n])) then reduced = .false. else err = linalg_state_type(this,LINALG_VALUE_ERROR,'with a=',[m,n],'q=',[q1,q2],'r=',[r1,r2], & 'problem is neither full (q=',[m,m],'r=',[m,n],') nor reduced (q=',[m,m],'r=',[m,n],')') endif end if end subroutine check_problem_size #:for rk,rt,ri in RC_KINDS_TYPES ! Get workspace size for QR operations pure module subroutine get_qr_${ri}$_workspace(a,lwork,err) !> Input matrix a[m,n] ${rt}$, intent(in), target :: a(:,:) !> Minimum workspace size for both operations integer(ilp), intent(out) :: lwork !> State return flag. Returns an error if the query failed type(linalg_state_type), optional, intent(out) :: err integer(ilp) :: m,n,k,info,lwork_qr,lwork_ord ${rt}$ :: work_dummy(1),tau_dummy(1),a_dummy(1,1) type(linalg_state_type) :: err0 lwork = -1_ilp !> Problem sizes m = size(a,1,kind=ilp) n = size(a,2,kind=ilp) k = min(m,n) ! QR space lwork_qr = -1_ilp call geqrf(m,n,a_dummy,m,tau_dummy,work_dummy,lwork_qr,info) call handle_geqrf_info(this,info,m,n,lwork_qr,err0) if (err0%error()) then call linalg_error_handling(err0,err) return endif lwork_qr = ceiling(real(work_dummy(1),kind=${rk}$),kind=ilp) ! Ordering space (for full factorization) lwork_ord = -1_ilp call #{if rt.startswith('complex')}# ungqr #{else}# orgqr #{endif}# & (m,m,k,a_dummy,m,tau_dummy,work_dummy,lwork_ord,info) call handle_orgqr_info(this,info,m,n,k,lwork_ord,err0) if (err0%error()) then call linalg_error_handling(err0,err) return endif lwork_ord = ceiling(real(work_dummy(1),kind=${rk}$),kind=ilp) ! Pick the largest size, so two operations can be performed with the same allocation lwork = max(lwork_qr, lwork_ord) end subroutine get_qr_${ri}$_workspace ! Compute the solution to a real system of linear equations A * X = B pure module subroutine stdlib_linalg_${ri}$_qr(a,q,r,overwrite_a,storage,err) !> Input matrix a[m,n] ${rt}$, intent(inout), target :: a(:,:) !> Orthogonal matrix Q ([m,m], or [m,k] if reduced) ${rt}$, intent(out), contiguous, target :: q(:,:) !> Upper triangular matrix R ([m,n], or [k,n] if reduced) ${rt}$, intent(out), contiguous, target :: r(:,:) !> [optional] Can A data be overwritten and destroyed? logical(lk), optional, intent(in) :: overwrite_a !> [optional] Provide pre-allocated workspace, size to be checked with qr_space ${rt}$, intent(out), optional, target :: storage(:) !> [optional] state return flag. On error if not requested, the code will stop type(linalg_state_type), optional, intent(out) :: err !> Local variables type(linalg_state_type) :: err0 integer(ilp) :: i,j,m,n,k,q1,q2,r1,r2,lda,lwork,info logical(lk) :: overwrite_a_,use_q_matrix,reduced ${rt}$ :: r11 ${rt}$, parameter :: zero = 0.0_${rk}$ ${rt}$, pointer :: amat(:,:),tau(:),work(:) !> Problem sizes m = size(a,1,kind=ilp) n = size(a,2,kind=ilp) k = min(m,n) q1 = size(q,1,kind=ilp) q2 = size(q,2,kind=ilp) r1 = size(r,1,kind=ilp) r2 = size(r,2,kind=ilp) ! Check if we should operate on reduced full QR call check_problem_size(m,n,q1,q2,r1,r2,err0,reduced) if (err0%error()) then call linalg_error_handling(err0,err) return end if ! Check if Q can be used as temporary storage for A, ! to be destroyed by *GEQRF use_q_matrix = q1>=m .and. q2>=n ! Can A be overwritten? By default, do not overwrite overwrite_a_ = .false._lk if (present(overwrite_a) .and. .not.use_q_matrix) overwrite_a_ = overwrite_a ! Initialize a matrix temporary, or reuse available ! storage if possible if (use_q_matrix) then amat => q q(:m,:n) = a elseif (overwrite_a_) then amat => a else allocate(amat(m,n),source=a) endif lda = size(amat,1,kind=ilp) ! To store the elementary reflectors, we need a [1:k] column. if (.not.use_q_matrix) then ! Q is not being used as the storage matrix tau(1:k) => q(1:k,1) else ! R has unused contiguous storage in the 1st column, except for the ! diagonal element. So, use the full column and store it in a dummy variable tau(1:k) => r(1:k,1) endif ! Retrieve workspace size call get_qr_${ri}$_workspace(a,lwork,err0) if (err0%ok()) then if (present(storage)) then work => storage else allocate(work(lwork)) endif if (.not.size(work,kind=ilp)>=lwork) then err0 = linalg_state_type(this,LINALG_ERROR,'insufficient workspace: should be at least ',lwork) call linalg_error_handling(err0,err) return endif ! Compute factorization. call geqrf(m,n,amat,m,tau,work,lwork,info) call handle_geqrf_info(this,info,m,n,lwork,err0) if (err0%ok()) then ! Get R matrix out before overwritten. ! Do not copy the first column at this stage: it may be being used by `tau` r11 = amat(1,1) forall(i=1:min(r1,m),j=2:n) r(i,j) = merge(amat(i,j),zero,i<=j) ! Convert K elementary reflectors tau(1:k) -> orthogonal matrix Q call #{if rt.startswith('complex')}# ungqr #{else}# orgqr #{endif}# & (q1,q2,k,amat,lda,tau,work,lwork,info) call handle_orgqr_info(this,info,m,n,k,lwork,err0) ! Copy result back to Q if (.not.use_q_matrix) q = amat(:q1,:q2) ! Copy first column of R r(1,1) = r11 r(2:r1,1) = zero ! Ensure last m-n rows of R are zeros, ! if full matrices were provided if (.not.reduced) r(k+1:m,1:n) = zero endif if (.not.present(storage)) deallocate(work) endif if (.not.(use_q_matrix.or.overwrite_a_)) deallocate(amat) ! Process output and return call linalg_error_handling(err0,err) end subroutine stdlib_linalg_${ri}$_qr #:endfor !--------------------------------------------------------- !----- QR decomposition with column pivoting ----- !--------------------------------------------------------- #:for rk, rt, ri in RC_KINDS_TYPES ! Get workspace size for QR operations pure module subroutine get_pivoting_qr_${ri}$_workspace(a,lwork,pivoting,err) !> Input matrix a[m,n] ${rt}$, intent(in), target :: a(:,:) !> Minimum workspace size for both operations integer(ilp), intent(out) :: lwork !> Pivoting flag. logical(lk), intent(in) :: pivoting !> State return flag. Returns an error if the query failed type(linalg_state_type), optional, intent(out) :: err integer(ilp) :: m,n,k,info,lwork_qr,lwork_ord ${rt}$ :: work_dummy(1),tau_dummy(1),a_dummy(1,1) integer(ilp) :: jpvt_dummy(1) real(${rk}$) :: rwork_dummy(1) type(linalg_state_type) :: err0 if (pivoting) then lwork = -1_ilp !> Problem sizes m = size(a,1,kind=ilp) n = size(a,2,kind=ilp) k = min(m,n) ! QR space lwork_qr = -1_ilp #:if rt.startswith('complex') call geqp3(m, n, a_dummy, m, jpvt_dummy, tau_dummy, work_dummy, lwork_qr, rwork_dummy, info) #:else call geqp3(m, n, a_dummy, m, jpvt_dummy, tau_dummy, work_dummy, lwork_qr, info) #:endif call handle_geqp3_info(this, info, m, n, lwork_qr, err0) if (err0%error()) then call linalg_error_handling(err0,err) return endif lwork_qr = ceiling(real(work_dummy(1),kind=${rk}$),kind=ilp) ! Ordering space (for full factorization) lwork_ord = -1_ilp call #{if rt.startswith('complex')}# ungqr #{else}# orgqr #{endif}# & (m,k,k,a_dummy,m,tau_dummy,work_dummy,lwork_ord,info) call handle_orgqr_info(this,info,m,n,k,lwork_ord,err0) if (err0%error()) then call linalg_error_handling(err0,err) return endif lwork_ord = ceiling(real(work_dummy(1),kind=${rk}$),kind=ilp) ! Pick the largest size, so two operations can be performed with the same allocation lwork = max(lwork_qr, lwork_ord) else call qr_space(a, lwork, err) endif end subroutine get_pivoting_qr_${ri}$_workspace pure module subroutine stdlib_linalg_${ri}$_pivoting_qr(a, q, r, pivots, overwrite_a, storage, err) !> Input matrix a[m, n] ${rt}$, intent(inout), target :: a(:, :) !> Orthogonal matrix Q ([m, m] or [m, k] if reduced) ${rt}$, intent(out), contiguous, target :: q(:, :) !> Upper triangular matrix R ([m, n] or [k, n] if reduced) ${rt}$, intent(out), contiguous, target :: r(:, :) !> Pivots. integer(ilp), intent(out) :: pivots(:) !> [optional] Can A data be overwritten and destroyed? logical(lk), optional, intent(in) :: overwrite_a !> [optional] Provide pre-allocated workspace, size to be checked with qr_space. ${rt}$, intent(out), optional, target :: storage(:) !> [optional] state return flag. On error if not requested, the code will stop. type(linalg_state_type), optional, intent(out) :: err !> Local variables. type(linalg_state_type) :: err0 integer(ilp) :: i, j, m, n, k, q1, q2, r1, r2, lda, lwork, info logical(lk) :: overwrite_a_, use_q_matrix, reduced ${rt}$ :: r11 ${rt}$, parameter :: zero = 0.0_${rk}$ ${rt}$, pointer :: amat(:, :), tau(:), work(:) #:if rt.startswith('complex') real(${rk}$) :: rwork(2*size(a, 2, kind=ilp)) #:endif !> Problem sizes. m = size(a, 1, kind=ilp) n = size(a, 2, kind=ilp) k = min(m, n) q1 = size(q, 1, kind=ilp) q2 = size(q, 2, kind=ilp) r1 = size(r, 1, kind=ilp) r2 = size(r, 2, kind=ilp) pivots = 0_ilp !> Full or thin QR factorization ? call check_problem_size(m, n, q1, q2, r1, r2, err0, reduced) if (err0%error()) then call linalg_error_handling(err0, err) return endif !> Can Q be used as temporary storage for A, ! to be destroyed by *GEQP3. use_q_matrix = q1 >= m .and. q2 >= n !> Can A be overwritten ? (By default, no). overwrite_a_ = .false._lk if (present(overwrite_a) .and. .not. use_q_matrix) overwrite_a_ = overwrite_a !> Initialize a temporary matrix or reuse available storage if possible. if (use_q_matrix) then amat(1:q1, 1:q2) => q q(1:m, 1:n) = a else if (overwrite_a_) then amat => a else allocate(amat(m, n), source=a) endif lda = size(amat, 1, kind=ilp) !> Store the elementary reflectors. if (.not. use_q_matrix) then ! Q is not being used as the storage matrix. tau(1:k) => q(1:k, 1) else ! R has unused contiguous storage in the 1st column, except for the ! r11 element. Use the full column and store it in a dummy variable. tau(1:k) => r(1:k, 1) endif ! Retrieve workspace size. call get_pivoting_qr_${ri}$_workspace(a, lwork, .true., err0) if (err0%ok()) then if (present(storage)) then work => storage else allocate(work(lwork)) endif if (.not. size(work, kind=ilp) >= lwork) then err0 = linalg_state_type(this, LINALG_ERROR, "insufficient workspace: should be at least ", lwork) call linalg_error_handling(err0, err) return endif ! Compute factorization. #:if rt.startswith('complex') call geqp3(m, n, amat, m, pivots, tau, work, lwork, rwork, info) #:else call geqp3(m, n, amat, m, pivots, tau, work, lwork, info) #:endif call handle_geqp3_info(this, info, m, n, lwork, err0) if (err0%ok()) then ! Get R matrix out before overwritten. ! Do not copy the first column at this stage: it may be used by `tau` r11 = amat(1, 1) do j = 2, r2 do i = 1, min(r1, m) r(i, j) = merge(amat(i, j), zero, i <= j) enddo enddo ! Convert K elementary reflectors tau(1:k) -> orthogonal matrix Q call #{if rt.startswith('complex')}#ungqr#{else}#orgqr#{endif}#(q1, q2, k, amat, lda, tau, work, lwork, info) call handle_orgqr_info(this, info, m, n, k, lwork, err0) ! Copy result back to Q if (.not.use_q_matrix) q = amat(1:q1, 1:q2) ! Copy first column of R r(1,1) = r11 r(2:r1,1) = zero ! Ensure last m-n rows of R are zeros, ! if full matrices were provided if (.not.reduced) r(k+1:m,1:n) = zero endif if (.not. present(storage)) deallocate(work) endif if (.not.(use_q_matrix.or.overwrite_a_)) deallocate(amat) ! Process output and return call linalg_error_handling(err0,err) end subroutine stdlib_linalg_${ri}$_pivoting_qr #:endfor end submodule stdlib_linalg_qr