aboutsummaryrefslogtreecommitdiff
path: root/SRC/dlaqr2.f
diff options
context:
space:
mode:
authorjason <jason@8a072113-8704-0410-8d35-dd094bca7971>2008-10-28 01:38:50 +0000
committerjason <jason@8a072113-8704-0410-8d35-dd094bca7971>2008-10-28 01:38:50 +0000
commitbaba851215b44ac3b60b9248eb02bcce7eb76247 (patch)
tree8c0f5c006875532a30d4409f5e94b0f310ff00a7 /SRC/dlaqr2.f
Move LAPACK trunk into position.
Diffstat (limited to 'SRC/dlaqr2.f')
-rw-r--r--SRC/dlaqr2.f551
1 files changed, 551 insertions, 0 deletions
diff --git a/SRC/dlaqr2.f b/SRC/dlaqr2.f
new file mode 100644
index 00000000..6ddb3309
--- /dev/null
+++ b/SRC/dlaqr2.f
@@ -0,0 +1,551 @@
+ SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
+ $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
+ $ LDT, NV, WV, LDWV, WORK, LWORK )
+*
+* -- LAPACK auxiliary routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* November 2006
+*
+* .. Scalar Arguments ..
+ INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
+ $ LDZ, LWORK, N, ND, NH, NS, NV, NW
+ LOGICAL WANTT, WANTZ
+* ..
+* .. Array Arguments ..
+ DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), T( LDT, * ),
+ $ V( LDV, * ), WORK( * ), WV( LDWV, * ),
+ $ Z( LDZ, * )
+* ..
+*
+* This subroutine is identical to DLAQR3 except that it avoids
+* recursion by calling DLAHQR instead of DLAQR4.
+*
+*
+* ******************************************************************
+* Aggressive early deflation:
+*
+* This subroutine accepts as input an upper Hessenberg matrix
+* H and performs an orthogonal similarity transformation
+* designed to detect and deflate fully converged eigenvalues from
+* a trailing principal submatrix. On output H has been over-
+* written by a new Hessenberg matrix that is a perturbation of
+* an orthogonal similarity transformation of H. It is to be
+* hoped that the final version of H has many zero subdiagonal
+* entries.
+*
+* ******************************************************************
+* WANTT (input) LOGICAL
+* If .TRUE., then the Hessenberg matrix H is fully updated
+* so that the quasi-triangular Schur factor may be
+* computed (in cooperation with the calling subroutine).
+* If .FALSE., then only enough of H is updated to preserve
+* the eigenvalues.
+*
+* WANTZ (input) LOGICAL
+* If .TRUE., then the orthogonal matrix Z is updated so
+* so that the orthogonal Schur factor may be computed
+* (in cooperation with the calling subroutine).
+* If .FALSE., then Z is not referenced.
+*
+* N (input) INTEGER
+* The order of the matrix H and (if WANTZ is .TRUE.) the
+* order of the orthogonal matrix Z.
+*
+* KTOP (input) INTEGER
+* It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
+* KBOT and KTOP together determine an isolated block
+* along the diagonal of the Hessenberg matrix.
+*
+* KBOT (input) INTEGER
+* It is assumed without a check that either
+* KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together
+* determine an isolated block along the diagonal of the
+* Hessenberg matrix.
+*
+* NW (input) INTEGER
+* Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1).
+*
+* H (input/output) DOUBLE PRECISION array, dimension (LDH,N)
+* On input the initial N-by-N section of H stores the
+* Hessenberg matrix undergoing aggressive early deflation.
+* On output H has been transformed by an orthogonal
+* similarity transformation, perturbed, and the returned
+* to Hessenberg form that (it is to be hoped) has some
+* zero subdiagonal entries.
+*
+* LDH (input) integer
+* Leading dimension of H just as declared in the calling
+* subroutine. N .LE. LDH
+*
+* ILOZ (input) INTEGER
+* IHIZ (input) INTEGER
+* Specify the rows of Z to which transformations must be
+* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
+*
+* Z (input/output) DOUBLE PRECISION array, dimension (LDZ,IHI)
+* IF WANTZ is .TRUE., then on output, the orthogonal
+* similarity transformation mentioned above has been
+* accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
+* If WANTZ is .FALSE., then Z is unreferenced.
+*
+* LDZ (input) integer
+* The leading dimension of Z just as declared in the
+* calling subroutine. 1 .LE. LDZ.
+*
+* NS (output) integer
+* The number of unconverged (ie approximate) eigenvalues
+* returned in SR and SI that may be used as shifts by the
+* calling subroutine.
+*
+* ND (output) integer
+* The number of converged eigenvalues uncovered by this
+* subroutine.
+*
+* SR (output) DOUBLE PRECISION array, dimension KBOT
+* SI (output) DOUBLE PRECISION array, dimension KBOT
+* On output, the real and imaginary parts of approximate
+* eigenvalues that may be used for shifts are stored in
+* SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
+* SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
+* The real and imaginary parts of converged eigenvalues
+* are stored in SR(KBOT-ND+1) through SR(KBOT) and
+* SI(KBOT-ND+1) through SI(KBOT), respectively.
+*
+* V (workspace) DOUBLE PRECISION array, dimension (LDV,NW)
+* An NW-by-NW work array.
+*
+* LDV (input) integer scalar
+* The leading dimension of V just as declared in the
+* calling subroutine. NW .LE. LDV
+*
+* NH (input) integer scalar
+* The number of columns of T. NH.GE.NW.
+*
+* T (workspace) DOUBLE PRECISION array, dimension (LDT,NW)
+*
+* LDT (input) integer
+* The leading dimension of T just as declared in the
+* calling subroutine. NW .LE. LDT
+*
+* NV (input) integer
+* The number of rows of work array WV available for
+* workspace. NV.GE.NW.
+*
+* WV (workspace) DOUBLE PRECISION array, dimension (LDWV,NW)
+*
+* LDWV (input) integer
+* The leading dimension of W just as declared in the
+* calling subroutine. NW .LE. LDV
+*
+* WORK (workspace) DOUBLE PRECISION array, dimension LWORK.
+* On exit, WORK(1) is set to an estimate of the optimal value
+* of LWORK for the given values of N, NW, KTOP and KBOT.
+*
+* LWORK (input) integer
+* The dimension of the work array WORK. LWORK = 2*NW
+* suffices, but greater efficiency may result from larger
+* values of LWORK.
+*
+* If LWORK = -1, then a workspace query is assumed; DLAQR2
+* only estimates the optimal workspace size for the given
+* values of N, NW, KTOP and KBOT. The estimate is returned
+* in WORK(1). No error message related to LWORK is issued
+* by XERBLA. Neither H nor Z are accessed.
+*
+* ================================================================
+* Based on contributions by
+* Karen Braman and Ralph Byers, Department of Mathematics,
+* University of Kansas, USA
+*
+* ================================================================
+* .. Parameters ..
+ DOUBLE PRECISION ZERO, ONE
+ PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 )
+* ..
+* .. Local Scalars ..
+ DOUBLE PRECISION AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
+ $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP
+ INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
+ $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2,
+ $ LWKOPT
+ LOGICAL BULGE, SORTED
+* ..
+* .. External Functions ..
+ DOUBLE PRECISION DLAMCH
+ EXTERNAL DLAMCH
+* ..
+* .. External Subroutines ..
+ EXTERNAL DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR,
+ $ DLANV2, DLARF, DLARFG, DLASET, DORGHR, DTREXC
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC ABS, DBLE, INT, MAX, MIN, SQRT
+* ..
+* .. Executable Statements ..
+*
+* ==== Estimate optimal workspace. ====
+*
+ JW = MIN( NW, KBOT-KTOP+1 )
+ IF( JW.LE.2 ) THEN
+ LWKOPT = 1
+ ELSE
+*
+* ==== Workspace query call to DGEHRD ====
+*
+ CALL DGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
+ LWK1 = INT( WORK( 1 ) )
+*
+* ==== Workspace query call to DORGHR ====
+*
+ CALL DORGHR( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
+ LWK2 = INT( WORK( 1 ) )
+*
+* ==== Optimal workspace ====
+*
+ LWKOPT = JW + MAX( LWK1, LWK2 )
+ END IF
+*
+* ==== Quick return in case of workspace query. ====
+*
+ IF( LWORK.EQ.-1 ) THEN
+ WORK( 1 ) = DBLE( LWKOPT )
+ RETURN
+ END IF
+*
+* ==== Nothing to do ...
+* ... for an empty active block ... ====
+ NS = 0
+ ND = 0
+ IF( KTOP.GT.KBOT )
+ $ RETURN
+* ... nor for an empty deflation window. ====
+ IF( NW.LT.1 )
+ $ RETURN
+*
+* ==== Machine constants ====
+*
+ SAFMIN = DLAMCH( 'SAFE MINIMUM' )
+ SAFMAX = ONE / SAFMIN
+ CALL DLABAD( SAFMIN, SAFMAX )
+ ULP = DLAMCH( 'PRECISION' )
+ SMLNUM = SAFMIN*( DBLE( N ) / ULP )
+*
+* ==== Setup deflation window ====
+*
+ JW = MIN( NW, KBOT-KTOP+1 )
+ KWTOP = KBOT - JW + 1
+ IF( KWTOP.EQ.KTOP ) THEN
+ S = ZERO
+ ELSE
+ S = H( KWTOP, KWTOP-1 )
+ END IF
+*
+ IF( KBOT.EQ.KWTOP ) THEN
+*
+* ==== 1-by-1 deflation window: not much to do ====
+*
+ SR( KWTOP ) = H( KWTOP, KWTOP )
+ SI( KWTOP ) = ZERO
+ NS = 1
+ ND = 0
+ IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) )
+ $ THEN
+ NS = 0
+ ND = 1
+ IF( KWTOP.GT.KTOP )
+ $ H( KWTOP, KWTOP-1 ) = ZERO
+ END IF
+ RETURN
+ END IF
+*
+* ==== Convert to spike-triangular form. (In case of a
+* . rare QR failure, this routine continues to do
+* . aggressive early deflation using that part of
+* . the deflation window that converged using INFQR
+* . here and there to keep track.) ====
+*
+ CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
+ CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
+*
+ CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
+ CALL DLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),
+ $ SI( KWTOP ), 1, JW, V, LDV, INFQR )
+*
+* ==== DTREXC needs a clean margin near the diagonal ====
+*
+ DO 10 J = 1, JW - 3
+ T( J+2, J ) = ZERO
+ T( J+3, J ) = ZERO
+ 10 CONTINUE
+ IF( JW.GT.2 )
+ $ T( JW, JW-2 ) = ZERO
+*
+* ==== Deflation detection loop ====
+*
+ NS = JW
+ ILST = INFQR + 1
+ 20 CONTINUE
+ IF( ILST.LE.NS ) THEN
+ IF( NS.EQ.1 ) THEN
+ BULGE = .FALSE.
+ ELSE
+ BULGE = T( NS, NS-1 ).NE.ZERO
+ END IF
+*
+* ==== Small spike tip test for deflation ====
+*
+ IF( .NOT.BULGE ) THEN
+*
+* ==== Real eigenvalue ====
+*
+ FOO = ABS( T( NS, NS ) )
+ IF( FOO.EQ.ZERO )
+ $ FOO = ABS( S )
+ IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN
+*
+* ==== Deflatable ====
+*
+ NS = NS - 1
+ ELSE
+*
+* ==== Undeflatable. Move it up out of the way.
+* . (DTREXC can not fail in this case.) ====
+*
+ IFST = NS
+ CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
+ $ INFO )
+ ILST = ILST + 1
+ END IF
+ ELSE
+*
+* ==== Complex conjugate pair ====
+*
+ FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )*
+ $ SQRT( ABS( T( NS-1, NS ) ) )
+ IF( FOO.EQ.ZERO )
+ $ FOO = ABS( S )
+ IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE.
+ $ MAX( SMLNUM, ULP*FOO ) ) THEN
+*
+* ==== Deflatable ====
+*
+ NS = NS - 2
+ ELSE
+*
+* ==== Undflatable. Move them up out of the way.
+* . Fortunately, DTREXC does the right thing with
+* . ILST in case of a rare exchange failure. ====
+*
+ IFST = NS
+ CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
+ $ INFO )
+ ILST = ILST + 2
+ END IF
+ END IF
+*
+* ==== End deflation detection loop ====
+*
+ GO TO 20
+ END IF
+*
+* ==== Return to Hessenberg form ====
+*
+ IF( NS.EQ.0 )
+ $ S = ZERO
+*
+ IF( NS.LT.JW ) THEN
+*
+* ==== sorting diagonal blocks of T improves accuracy for
+* . graded matrices. Bubble sort deals well with
+* . exchange failures. ====
+*
+ SORTED = .false.
+ I = NS + 1
+ 30 CONTINUE
+ IF( SORTED )
+ $ GO TO 50
+ SORTED = .true.
+*
+ KEND = I - 1
+ I = INFQR + 1
+ IF( I.EQ.NS ) THEN
+ K = I + 1
+ ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
+ K = I + 1
+ ELSE
+ K = I + 2
+ END IF
+ 40 CONTINUE
+ IF( K.LE.KEND ) THEN
+ IF( K.EQ.I+1 ) THEN
+ EVI = ABS( T( I, I ) )
+ ELSE
+ EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )*
+ $ SQRT( ABS( T( I, I+1 ) ) )
+ END IF
+*
+ IF( K.EQ.KEND ) THEN
+ EVK = ABS( T( K, K ) )
+ ELSE IF( T( K+1, K ).EQ.ZERO ) THEN
+ EVK = ABS( T( K, K ) )
+ ELSE
+ EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )*
+ $ SQRT( ABS( T( K, K+1 ) ) )
+ END IF
+*
+ IF( EVI.GE.EVK ) THEN
+ I = K
+ ELSE
+ SORTED = .false.
+ IFST = I
+ ILST = K
+ CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
+ $ INFO )
+ IF( INFO.EQ.0 ) THEN
+ I = ILST
+ ELSE
+ I = K
+ END IF
+ END IF
+ IF( I.EQ.KEND ) THEN
+ K = I + 1
+ ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
+ K = I + 1
+ ELSE
+ K = I + 2
+ END IF
+ GO TO 40
+ END IF
+ GO TO 30
+ 50 CONTINUE
+ END IF
+*
+* ==== Restore shift/eigenvalue array from T ====
+*
+ I = JW
+ 60 CONTINUE
+ IF( I.GE.INFQR+1 ) THEN
+ IF( I.EQ.INFQR+1 ) THEN
+ SR( KWTOP+I-1 ) = T( I, I )
+ SI( KWTOP+I-1 ) = ZERO
+ I = I - 1
+ ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN
+ SR( KWTOP+I-1 ) = T( I, I )
+ SI( KWTOP+I-1 ) = ZERO
+ I = I - 1
+ ELSE
+ AA = T( I-1, I-1 )
+ CC = T( I, I-1 )
+ BB = T( I-1, I )
+ DD = T( I, I )
+ CALL DLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ),
+ $ SI( KWTOP+I-2 ), SR( KWTOP+I-1 ),
+ $ SI( KWTOP+I-1 ), CS, SN )
+ I = I - 2
+ END IF
+ GO TO 60
+ END IF
+*
+ IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
+ IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
+*
+* ==== Reflect spike back into lower triangle ====
+*
+ CALL DCOPY( NS, V, LDV, WORK, 1 )
+ BETA = WORK( 1 )
+ CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU )
+ WORK( 1 ) = ONE
+*
+ CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
+*
+ CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT,
+ $ WORK( JW+1 ) )
+ CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
+ $ WORK( JW+1 ) )
+ CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
+ $ WORK( JW+1 ) )
+*
+ CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
+ $ LWORK-JW, INFO )
+ END IF
+*
+* ==== Copy updated reduced window into place ====
+*
+ IF( KWTOP.GT.1 )
+ $ H( KWTOP, KWTOP-1 ) = S*V( 1, 1 )
+ CALL DLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
+ CALL DCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
+ $ LDH+1 )
+*
+* ==== Accumulate orthogonal matrix in order update
+* . H and Z, if requested. (A modified version
+* . of DORGHR that accumulates block Householder
+* . transformations into V directly might be
+* . marginally more efficient than the following.) ====
+*
+ IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
+ CALL DORGHR( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
+ $ LWORK-JW, INFO )
+ CALL DGEMM( 'N', 'N', JW, NS, NS, ONE, V, LDV, T, LDT, ZERO,
+ $ WV, LDWV )
+ CALL DLACPY( 'A', JW, NS, WV, LDWV, V, LDV )
+ END IF
+*
+* ==== Update vertical slab in H ====
+*
+ IF( WANTT ) THEN
+ LTOP = 1
+ ELSE
+ LTOP = KTOP
+ END IF
+ DO 70 KROW = LTOP, KWTOP - 1, NV
+ KLN = MIN( NV, KWTOP-KROW )
+ CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
+ $ LDH, V, LDV, ZERO, WV, LDWV )
+ CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
+ 70 CONTINUE
+*
+* ==== Update horizontal slab in H ====
+*
+ IF( WANTT ) THEN
+ DO 80 KCOL = KBOT + 1, N, NH
+ KLN = MIN( NH, N-KCOL+1 )
+ CALL DGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
+ $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
+ CALL DLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
+ $ LDH )
+ 80 CONTINUE
+ END IF
+*
+* ==== Update vertical slab in Z ====
+*
+ IF( WANTZ ) THEN
+ DO 90 KROW = ILOZ, IHIZ, NV
+ KLN = MIN( NV, IHIZ-KROW+1 )
+ CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
+ $ LDZ, V, LDV, ZERO, WV, LDWV )
+ CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
+ $ LDZ )
+ 90 CONTINUE
+ END IF
+ END IF
+*
+* ==== Return the number of deflations ... ====
+*
+ ND = JW - NS
+*
+* ==== ... and the number of shifts. (Subtracting
+* . INFQR from the spike length takes care
+* . of the case of a rare QR failure while
+* . calculating eigenvalues of the deflation
+* . window.) ====
+*
+ NS = NS - INFQR
+*
+* ==== Return optimal workspace. ====
+*
+ WORK( 1 ) = DBLE( LWKOPT )
+*
+* ==== End of DLAQR2 ====
+*
+ END