Move LAPACK trunk into position.

author: jason <jason@8a072113-8704-0410-8d35-dd094bca7971> 2008-10-28 01:38:50 +0000
committer: jason <jason@8a072113-8704-0410-8d35-dd094bca7971> 2008-10-28 01:38:50 +0000
commit: baba851215b44ac3b60b9248eb02bcce7eb76247 (patch)
tree: 8c0f5c006875532a30d4409f5e94b0f310ff00a7 /SRC/dlaqr2.f
1 files changed, 551 insertions, 0 deletions
diff --git a/SRC/dlaqr2.f b/SRC/dlaqr2.f
new file mode 100644
index 00000000..6ddb3309
--- /dev/null
+++ b/SRC/dlaqr2.f
@@ -0,0 +1,551 @@
+      SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
+     $                   IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
+     $                   LDT, NV, WV, LDWV, WORK, LWORK )
+*
+*  -- LAPACK auxiliary routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     November 2006
+*
+*     .. Scalar Arguments ..
+      INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
+     $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
+      LOGICAL            WANTT, WANTZ
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   H( LDH, * ), SI( * ), SR( * ), T( LDT, * ),
+     $                   V( LDV, * ), WORK( * ), WV( LDWV, * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*     This subroutine is identical to DLAQR3 except that it avoids
+*     recursion by calling DLAHQR instead of DLAQR4.
+*
+*
+*     ******************************************************************
+*     Aggressive early deflation:
+*
+*     This subroutine accepts as input an upper Hessenberg matrix
+*     H and performs an orthogonal similarity transformation
+*     designed to detect and deflate fully converged eigenvalues from
+*     a trailing principal submatrix.  On output H has been over-
+*     written by a new Hessenberg matrix that is a perturbation of
+*     an orthogonal similarity transformation of H.  It is to be
+*     hoped that the final version of H has many zero subdiagonal
+*     entries.
+*
+*     ******************************************************************
+*     WANTT   (input) LOGICAL
+*          If .TRUE., then the Hessenberg matrix H is fully updated
+*          so that the quasi-triangular Schur factor may be
+*          computed (in cooperation with the calling subroutine).
+*          If .FALSE., then only enough of H is updated to preserve
+*          the eigenvalues.
+*
+*     WANTZ   (input) LOGICAL
+*          If .TRUE., then the orthogonal matrix Z is updated so
+*          so that the orthogonal Schur factor may be computed
+*          (in cooperation with the calling subroutine).
+*          If .FALSE., then Z is not referenced.
+*
+*     N       (input) INTEGER
+*          The order of the matrix H and (if WANTZ is .TRUE.) the
+*          order of the orthogonal matrix Z.
+*
+*     KTOP    (input) INTEGER
+*          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
+*          KBOT and KTOP together determine an isolated block
+*          along the diagonal of the Hessenberg matrix.
+*
+*     KBOT    (input) INTEGER
+*          It is assumed without a check that either
+*          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
+*          determine an isolated block along the diagonal of the
+*          Hessenberg matrix.
+*
+*     NW      (input) INTEGER
+*          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
+*
+*     H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
+*          On input the initial N-by-N section of H stores the
+*          Hessenberg matrix undergoing aggressive early deflation.
+*          On output H has been transformed by an orthogonal
+*          similarity transformation, perturbed, and the returned
+*          to Hessenberg form that (it is to be hoped) has some
+*          zero subdiagonal entries.
+*
+*     LDH     (input) integer
+*          Leading dimension of H just as declared in the calling
+*          subroutine.  N .LE. LDH
+*
+*     ILOZ    (input) INTEGER
+*     IHIZ    (input) INTEGER
+*          Specify the rows of Z to which transformations must be
+*          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
+*
+*     Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,IHI)
+*          IF WANTZ is .TRUE., then on output, the orthogonal
+*          similarity transformation mentioned above has been
+*          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
+*          If WANTZ is .FALSE., then Z is unreferenced.
+*
+*     LDZ     (input) integer
+*          The leading dimension of Z just as declared in the
+*          calling subroutine.  1 .LE. LDZ.
+*
+*     NS      (output) integer
+*          The number of unconverged (ie approximate) eigenvalues
+*          returned in SR and SI that may be used as shifts by the
+*          calling subroutine.
+*
+*     ND      (output) integer
+*          The number of converged eigenvalues uncovered by this
+*          subroutine.
+*
+*     SR      (output) DOUBLE PRECISION array, dimension KBOT
+*     SI      (output) DOUBLE PRECISION array, dimension KBOT
+*          On output, the real and imaginary parts of approximate
+*          eigenvalues that may be used for shifts are stored in
+*          SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
+*          SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
+*          The real and imaginary parts of converged eigenvalues
+*          are stored in SR(KBOT-ND+1) through SR(KBOT) and
+*          SI(KBOT-ND+1) through SI(KBOT), respectively.
+*
+*     V       (workspace) DOUBLE PRECISION array, dimension (LDV,NW)
+*          An NW-by-NW work array.
+*
+*     LDV     (input) integer scalar
+*          The leading dimension of V just as declared in the
+*          calling subroutine.  NW .LE. LDV
+*
+*     NH      (input) integer scalar
+*          The number of columns of T.  NH.GE.NW.
+*
+*     T       (workspace) DOUBLE PRECISION array, dimension (LDT,NW)
+*
+*     LDT     (input) integer
+*          The leading dimension of T just as declared in the
+*          calling subroutine.  NW .LE. LDT
+*
+*     NV      (input) integer
+*          The number of rows of work array WV available for
+*          workspace.  NV.GE.NW.
+*
+*     WV      (workspace) DOUBLE PRECISION array, dimension (LDWV,NW)
+*
+*     LDWV    (input) integer
+*          The leading dimension of W just as declared in the
+*          calling subroutine.  NW .LE. LDV
+*
+*     WORK    (workspace) DOUBLE PRECISION array, dimension LWORK.
+*          On exit, WORK(1) is set to an estimate of the optimal value
+*          of LWORK for the given values of N, NW, KTOP and KBOT.
+*
+*     LWORK   (input) integer
+*          The dimension of the work array WORK.  LWORK = 2*NW
+*          suffices, but greater efficiency may result from larger
+*          values of LWORK.
+*
+*          If LWORK = -1, then a workspace query is assumed; DLAQR2
+*          only estimates the optimal workspace size for the given
+*          values of N, NW, KTOP and KBOT.  The estimate is returned
+*          in WORK(1).  No error message related to LWORK is issued
+*          by XERBLA.  Neither H nor Z are accessed.
+*
+*     ================================================================
+*     Based on contributions by
+*        Karen Braman and Ralph Byers, Department of Mathematics,
+*        University of Kansas, USA
+*
+*     ================================================================
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0d0, ONE = 1.0d0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
+     $                   SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP
+      INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
+     $                   KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2,
+     $                   LWKOPT
+      LOGICAL            BULGE, SORTED
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR,
+     $                   DLANV2, DLARF, DLARFG, DLASET, DORGHR, DTREXC
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, INT, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     ==== Estimate optimal workspace. ====
+*
+      JW = MIN( NW, KBOT-KTOP+1 )
+      IF( JW.LE.2 ) THEN
+         LWKOPT = 1
+      ELSE
+*
+*        ==== Workspace query call to DGEHRD ====
+*
+         CALL DGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
+         LWK1 = INT( WORK( 1 ) )
+*
+*        ==== Workspace query call to DORGHR ====
+*
+         CALL DORGHR( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
+         LWK2 = INT( WORK( 1 ) )
+*
+*        ==== Optimal workspace ====
+*
+         LWKOPT = JW + MAX( LWK1, LWK2 )
+      END IF
+*
+*     ==== Quick return in case of workspace query. ====
+*
+      IF( LWORK.EQ.-1 ) THEN
+         WORK( 1 ) = DBLE( LWKOPT )
+         RETURN
+      END IF
+*
+*     ==== Nothing to do ...
+*     ... for an empty active block ... ====
+      NS = 0
+      ND = 0
+      IF( KTOP.GT.KBOT )
+     $   RETURN
+*     ... nor for an empty deflation window. ====
+      IF( NW.LT.1 )
+     $   RETURN
+*
+*     ==== Machine constants ====
+*
+      SAFMIN = DLAMCH( 'SAFE MINIMUM' )
+      SAFMAX = ONE / SAFMIN
+      CALL DLABAD( SAFMIN, SAFMAX )
+      ULP = DLAMCH( 'PRECISION' )
+      SMLNUM = SAFMIN*( DBLE( N ) / ULP )
+*
+*     ==== Setup deflation window ====
+*
+      JW = MIN( NW, KBOT-KTOP+1 )
+      KWTOP = KBOT - JW + 1
+      IF( KWTOP.EQ.KTOP ) THEN
+         S = ZERO
+      ELSE
+         S = H( KWTOP, KWTOP-1 )
+      END IF
+*
+      IF( KBOT.EQ.KWTOP ) THEN
+*
+*        ==== 1-by-1 deflation window: not much to do ====
+*
+         SR( KWTOP ) = H( KWTOP, KWTOP )
+         SI( KWTOP ) = ZERO
+         NS = 1
+         ND = 0
+         IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) )
+     $        THEN
+            NS = 0
+            ND = 1
+            IF( KWTOP.GT.KTOP )
+     $         H( KWTOP, KWTOP-1 ) = ZERO
+         END IF
+         RETURN
+      END IF
+*
+*     ==== Convert to spike-triangular form.  (In case of a
+*     .    rare QR failure, this routine continues to do
+*     .    aggressive early deflation using that part of
+*     .    the deflation window that converged using INFQR
+*     .    here and there to keep track.) ====
+*
+      CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
+      CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
+*
+      CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
+      CALL DLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),
+     $             SI( KWTOP ), 1, JW, V, LDV, INFQR )
+*
+*     ==== DTREXC needs a clean margin near the diagonal ====
+*
+      DO 10 J = 1, JW - 3
+         T( J+2, J ) = ZERO
+         T( J+3, J ) = ZERO
+   10 CONTINUE
+      IF( JW.GT.2 )
+     $   T( JW, JW-2 ) = ZERO
+*
+*     ==== Deflation detection loop ====
+*
+      NS = JW
+      ILST = INFQR + 1
+   20 CONTINUE
+      IF( ILST.LE.NS ) THEN
+         IF( NS.EQ.1 ) THEN
+            BULGE = .FALSE.
+         ELSE
+            BULGE = T( NS, NS-1 ).NE.ZERO
+         END IF
+*
+*        ==== Small spike tip test for deflation ====
+*
+         IF( .NOT.BULGE ) THEN
+*
+*           ==== Real eigenvalue ====
+*
+            FOO = ABS( T( NS, NS ) )
+            IF( FOO.EQ.ZERO )
+     $         FOO = ABS( S )
+            IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN
+*
+*              ==== Deflatable ====
+*
+               NS = NS - 1
+            ELSE
+*
+*              ==== Undeflatable.   Move it up out of the way.
+*              .    (DTREXC can not fail in this case.) ====
+*
+               IFST = NS
+               CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
+     $                      INFO )
+               ILST = ILST + 1
+            END IF
+         ELSE
+*
+*           ==== Complex conjugate pair ====
+*
+            FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )*
+     $            SQRT( ABS( T( NS-1, NS ) ) )
+            IF( FOO.EQ.ZERO )
+     $         FOO = ABS( S )
+            IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE.
+     $          MAX( SMLNUM, ULP*FOO ) ) THEN
+*
+*              ==== Deflatable ====
+*
+               NS = NS - 2
+            ELSE
+*
+*              ==== Undflatable. Move them up out of the way.
+*              .    Fortunately, DTREXC does the right thing with
+*              .    ILST in case of a rare exchange failure. ====
+*
+               IFST = NS
+               CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
+     $                      INFO )
+               ILST = ILST + 2
+            END IF
+         END IF
+*
+*        ==== End deflation detection loop ====
+*
+         GO TO 20
+      END IF
+*
+*        ==== Return to Hessenberg form ====
+*
+      IF( NS.EQ.0 )
+     $   S = ZERO
+*
+      IF( NS.LT.JW ) THEN
+*
+*        ==== sorting diagonal blocks of T improves accuracy for
+*        .    graded matrices.  Bubble sort deals well with
+*        .    exchange failures. ====
+*
+         SORTED = .false.
+         I = NS + 1
+   30    CONTINUE
+         IF( SORTED )
+     $      GO TO 50
+         SORTED = .true.
+*
+         KEND = I - 1
+         I = INFQR + 1
+         IF( I.EQ.NS ) THEN
+            K = I + 1
+         ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
+            K = I + 1
+         ELSE
+            K = I + 2
+         END IF
+   40    CONTINUE
+         IF( K.LE.KEND ) THEN
+            IF( K.EQ.I+1 ) THEN
+               EVI = ABS( T( I, I ) )
+            ELSE
+               EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )*
+     $               SQRT( ABS( T( I, I+1 ) ) )
+            END IF
+*
+            IF( K.EQ.KEND ) THEN
+               EVK = ABS( T( K, K ) )
+            ELSE IF( T( K+1, K ).EQ.ZERO ) THEN
+               EVK = ABS( T( K, K ) )
+            ELSE
+               EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )*
+     $               SQRT( ABS( T( K, K+1 ) ) )
+            END IF
+*
+            IF( EVI.GE.EVK ) THEN
+               I = K
+            ELSE
+               SORTED = .false.
+               IFST = I
+               ILST = K
+               CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
+     $                      INFO )
+               IF( INFO.EQ.0 ) THEN
+                  I = ILST
+               ELSE
+                  I = K
+               END IF
+            END IF
+            IF( I.EQ.KEND ) THEN
+               K = I + 1
+            ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
+               K = I + 1
+            ELSE
+               K = I + 2
+            END IF
+            GO TO 40
+         END IF
+         GO TO 30
+   50    CONTINUE
+      END IF
+*
+*     ==== Restore shift/eigenvalue array from T ====
+*
+      I = JW
+   60 CONTINUE
+      IF( I.GE.INFQR+1 ) THEN
+         IF( I.EQ.INFQR+1 ) THEN
+            SR( KWTOP+I-1 ) = T( I, I )
+            SI( KWTOP+I-1 ) = ZERO
+            I = I - 1
+         ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN
+            SR( KWTOP+I-1 ) = T( I, I )
+            SI( KWTOP+I-1 ) = ZERO
+            I = I - 1
+         ELSE
+            AA = T( I-1, I-1 )
+            CC = T( I, I-1 )
+            BB = T( I-1, I )
+            DD = T( I, I )
+            CALL DLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ),
+     $                   SI( KWTOP+I-2 ), SR( KWTOP+I-1 ),
+     $                   SI( KWTOP+I-1 ), CS, SN )
+            I = I - 2
+         END IF
+         GO TO 60
+      END IF
+*
+      IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
+         IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
+*
+*           ==== Reflect spike back into lower triangle ====
+*
+            CALL DCOPY( NS, V, LDV, WORK, 1 )
+            BETA = WORK( 1 )
+            CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU )
+            WORK( 1 ) = ONE
+*
+            CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
+*
+            CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT,
+     $                  WORK( JW+1 ) )
+            CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
+     $                  WORK( JW+1 ) )
+            CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
+     $                  WORK( JW+1 ) )
+*
+            CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
+     $                   LWORK-JW, INFO )
+         END IF
+*
+*        ==== Copy updated reduced window into place ====
+*
+         IF( KWTOP.GT.1 )
+     $      H( KWTOP, KWTOP-1 ) = S*V( 1, 1 )
+         CALL DLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
+         CALL DCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
+     $               LDH+1 )
+*
+*        ==== Accumulate orthogonal matrix in order update
+*        .    H and Z, if requested.  (A modified version
+*        .    of  DORGHR that accumulates block Householder
+*        .    transformations into V directly might be
+*        .    marginally more efficient than the following.) ====
+*
+         IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
+            CALL DORGHR( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
+     $                   LWORK-JW, INFO )
+            CALL DGEMM( 'N', 'N', JW, NS, NS, ONE, V, LDV, T, LDT, ZERO,
+     $                  WV, LDWV )
+            CALL DLACPY( 'A', JW, NS, WV, LDWV, V, LDV )
+         END IF
+*
+*        ==== Update vertical slab in H ====
+*
+         IF( WANTT ) THEN
+            LTOP = 1
+         ELSE
+            LTOP = KTOP
+         END IF
+         DO 70 KROW = LTOP, KWTOP - 1, NV
+            KLN = MIN( NV, KWTOP-KROW )
+            CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
+     $                  LDH, V, LDV, ZERO, WV, LDWV )
+            CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
+   70    CONTINUE
+*
+*        ==== Update horizontal slab in H ====
+*
+         IF( WANTT ) THEN
+            DO 80 KCOL = KBOT + 1, N, NH
+               KLN = MIN( NH, N-KCOL+1 )
+               CALL DGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
+     $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
+               CALL DLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
+     $                      LDH )
+   80       CONTINUE
+         END IF
+*
+*        ==== Update vertical slab in Z ====
+*
+         IF( WANTZ ) THEN
+            DO 90 KROW = ILOZ, IHIZ, NV
+               KLN = MIN( NV, IHIZ-KROW+1 )
+               CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
+     $                     LDZ, V, LDV, ZERO, WV, LDWV )
+               CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
+     $                      LDZ )
+   90       CONTINUE
+         END IF
+      END IF
+*
+*     ==== Return the number of deflations ... ====
+*
+      ND = JW - NS
+*
+*     ==== ... and the number of shifts. (Subtracting
+*     .    INFQR from the spike length takes care
+*     .    of the case of a rare QR failure while
+*     .    calculating eigenvalues of the deflation
+*     .    window.)  ====
+*
+      NS = NS - INFQR
+*
+*      ==== Return optimal workspace. ====
+*
+      WORK( 1 ) = DBLE( LWKOPT )
+*
+*     ==== End of DLAQR2 ====
+*
+      END
author	jason <jason@8a072113-8704-0410-8d35-dd094bca7971>	2008-10-28 01:38:50 +0000
committer	jason <jason@8a072113-8704-0410-8d35-dd094bca7971>	2008-10-28 01:38:50 +0000
commit	baba851215b44ac3b60b9248eb02bcce7eb76247 (patch)
tree	8c0f5c006875532a30d4409f5e94b0f310ff00a7 /SRC/dlaqr2.f