Move LAPACK trunk into position.

author: jason <jason@8a072113-8704-0410-8d35-dd094bca7971> 2008-10-28 01:38:50 +0000
committer: jason <jason@8a072113-8704-0410-8d35-dd094bca7971> 2008-10-28 01:38:50 +0000
commit: baba851215b44ac3b60b9248eb02bcce7eb76247 (patch)
tree: 8c0f5c006875532a30d4409f5e94b0f310ff00a7 /SRC/cgbtrf.f
1 files changed, 442 insertions, 0 deletions
diff --git a/SRC/cgbtrf.f b/SRC/cgbtrf.f
new file mode 100644
index 00000000..88758b97
--- /dev/null
+++ b/SRC/cgbtrf.f
@@ -0,0 +1,442 @@
+      SUBROUTINE CGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     November 2006
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, KL, KU, LDAB, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGBTRF computes an LU factorization of a complex m-by-n band matrix A
+*  using partial pivoting with row interchanges.
+*
+*  This is the blocked version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows KL+1 to
+*          2*KL+KU+1; rows 1 to KL of the array need not be set.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
+*
+*          On exit, details of the factorization: U is stored as an
+*          upper triangular band matrix with KL+KU superdiagonals in
+*          rows 1 to KL+KU+1, and the multipliers used during the
+*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
+*          See below for further details.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
+*               has been completed, but the factor U is exactly
+*               singular, and division by zero will occur if it is used
+*               to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  M = N = 6, KL = 2, KU = 1:
+*
+*  On entry:                       On exit:
+*
+*      *    *    *    +    +    +       *    *    *   u14  u25  u36
+*      *    *    +    +    +    +       *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
+*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
+*
+*  Array elements marked * are not used by the routine; elements marked
+*  + need not be set on entry, but are required by the routine to store
+*  elements of U because of fill-in resulting from the row interchanges.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+      INTEGER            NBMAX, LDWORK
+      PARAMETER          ( NBMAX = 64, LDWORK = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
+     $                   JU, K2, KM, KV, NB, NW
+      COMPLEX            TEMP
+*     ..
+*     .. Local Arrays ..
+      COMPLEX            WORK13( LDWORK, NBMAX ),
+     $                   WORK31( LDWORK, NBMAX )
+*     ..
+*     .. External Functions ..
+      INTEGER            ICAMAX, ILAENV
+      EXTERNAL           ICAMAX, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CGBTF2, CGEMM, CGERU, CLASWP, CSCAL,
+     $                   CSWAP, CTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     KV is the number of superdiagonals in the factor U, allowing for
+*     fill-in
+*
+      KV = KU + KL
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KL+KV+1 ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGBTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment
+*
+      NB = ILAENV( 1, 'CGBTRF', ' ', M, N, KL, KU )
+*
+*     The block size must not exceed the limit set by the size of the
+*     local arrays WORK13 and WORK31.
+*
+      NB = MIN( NB, NBMAX )
+*
+      IF( NB.LE.1 .OR. NB.GT.KL ) THEN
+*
+*        Use unblocked code
+*
+         CALL CGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+*        Zero the superdiagonal elements of the work array WORK13
+*
+         DO 20 J = 1, NB
+            DO 10 I = 1, J - 1
+               WORK13( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+*
+*        Zero the subdiagonal elements of the work array WORK31
+*
+         DO 40 J = 1, NB
+            DO 30 I = J + 1, NB
+               WORK31( I, J ) = ZERO
+   30       CONTINUE
+   40    CONTINUE
+*
+*        Gaussian elimination with partial pivoting
+*
+*        Set fill-in elements in columns KU+2 to KV to zero
+*
+         DO 60 J = KU + 2, MIN( KV, N )
+            DO 50 I = KV - J + 2, KL
+               AB( I, J ) = ZERO
+   50       CONTINUE
+   60    CONTINUE
+*
+*        JU is the index of the last column affected by the current
+*        stage of the factorization
+*
+         JU = 1
+*
+         DO 180 J = 1, MIN( M, N ), NB
+            JB = MIN( NB, MIN( M, N )-J+1 )
+*
+*           The active part of the matrix is partitioned
+*
+*              A11   A12   A13
+*              A21   A22   A23
+*              A31   A32   A33
+*
+*           Here A11, A21 and A31 denote the current block of JB columns
+*           which is about to be factorized. The number of rows in the
+*           partitioning are JB, I2, I3 respectively, and the numbers
+*           of columns are JB, J2, J3. The superdiagonal elements of A13
+*           and the subdiagonal elements of A31 lie outside the band.
+*
+            I2 = MIN( KL-JB, M-J-JB+1 )
+            I3 = MIN( JB, M-J-KL+1 )
+*
+*           J2 and J3 are computed after JU has been updated.
+*
+*           Factorize the current block of JB columns
+*
+            DO 80 JJ = J, J + JB - 1
+*
+*              Set fill-in elements in column JJ+KV to zero
+*
+               IF( JJ+KV.LE.N ) THEN
+                  DO 70 I = 1, KL
+                     AB( I, JJ+KV ) = ZERO
+   70             CONTINUE
+               END IF
+*
+*              Find pivot and test for singularity. KM is the number of
+*              subdiagonal elements in the current column.
+*
+               KM = MIN( KL, M-JJ )
+               JP = ICAMAX( KM+1, AB( KV+1, JJ ), 1 )
+               IPIV( JJ ) = JP + JJ - J
+               IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
+                  JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
+                  IF( JP.NE.1 ) THEN
+*
+*                    Apply interchange to columns J to J+JB-1
+*
+                     IF( JP+JJ-1.LT.J+KL ) THEN
+*
+                        CALL CSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                              AB( KV+JP+JJ-J, J ), LDAB-1 )
+                     ELSE
+*
+*                       The interchange affects columns J to JJ-1 of A31
+*                       which are stored in the work array WORK31
+*
+                        CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                              WORK31( JP+JJ-J-KL, 1 ), LDWORK )
+                        CALL CSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
+     $                              AB( KV+JP, JJ ), LDAB-1 )
+                     END IF
+                  END IF
+*
+*                 Compute multipliers
+*
+                  CALL CSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
+     $                        1 )
+*
+*                 Update trailing submatrix within the band and within
+*                 the current block. JM is the index of the last column
+*                 which needs to be updated.
+*
+                  JM = MIN( JU, J+JB-1 )
+                  IF( JM.GT.JJ )
+     $               CALL CGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
+     $                           AB( KV, JJ+1 ), LDAB-1,
+     $                           AB( KV+1, JJ+1 ), LDAB-1 )
+               ELSE
+*
+*                 If pivot is zero, set INFO to the index of the pivot
+*                 unless a zero pivot has already been found.
+*
+                  IF( INFO.EQ.0 )
+     $               INFO = JJ
+               END IF
+*
+*              Copy current column of A31 into the work array WORK31
+*
+               NW = MIN( JJ-J+1, I3 )
+               IF( NW.GT.0 )
+     $            CALL CCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
+     $                        WORK31( 1, JJ-J+1 ), 1 )
+   80       CONTINUE
+            IF( J+JB.LE.N ) THEN
+*
+*              Apply the row interchanges to the other blocks.
+*
+               J2 = MIN( JU-J+1, KV ) - JB
+               J3 = MAX( 0, JU-J-KV+1 )
+*
+*              Use CLASWP to apply the row interchanges to A12, A22, and
+*              A32.
+*
+               CALL CLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
+     $                      IPIV( J ), 1 )
+*
+*              Adjust the pivot indices.
+*
+               DO 90 I = J, J + JB - 1
+                  IPIV( I ) = IPIV( I ) + J - 1
+   90          CONTINUE
+*
+*              Apply the row interchanges to A13, A23, and A33
+*              columnwise.
+*
+               K2 = J - 1 + JB + J2
+               DO 110 I = 1, J3
+                  JJ = K2 + I
+                  DO 100 II = J + I - 1, J + JB - 1
+                     IP = IPIV( II )
+                     IF( IP.NE.II ) THEN
+                        TEMP = AB( KV+1+II-JJ, JJ )
+                        AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
+                        AB( KV+1+IP-JJ, JJ ) = TEMP
+                     END IF
+  100             CONTINUE
+  110          CONTINUE
+*
+*              Update the relevant part of the trailing submatrix
+*
+               IF( J2.GT.0 ) THEN
+*
+*                 Update A12
+*
+                  CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                        JB, J2, ONE, AB( KV+1, J ), LDAB-1,
+     $                        AB( KV+1-JB, J+JB ), LDAB-1 )
+*
+                  IF( I2.GT.0 ) THEN
+*
+*                    Update A22
+*
+                     CALL CGEMM( 'No transpose', 'No transpose', I2, J2,
+     $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
+     $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
+     $                           AB( KV+1, J+JB ), LDAB-1 )
+                  END IF
+*
+                  IF( I3.GT.0 ) THEN
+*
+*                    Update A32
+*
+                     CALL CGEMM( 'No transpose', 'No transpose', I3, J2,
+     $                           JB, -ONE, WORK31, LDWORK,
+     $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
+     $                           AB( KV+KL+1-JB, J+JB ), LDAB-1 )
+                  END IF
+               END IF
+*
+               IF( J3.GT.0 ) THEN
+*
+*                 Copy the lower triangle of A13 into the work array
+*                 WORK13
+*
+                  DO 130 JJ = 1, J3
+                     DO 120 II = JJ, JB
+                        WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
+  120                CONTINUE
+  130             CONTINUE
+*
+*                 Update A13 in the work array
+*
+                  CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                        JB, J3, ONE, AB( KV+1, J ), LDAB-1,
+     $                        WORK13, LDWORK )
+*
+                  IF( I2.GT.0 ) THEN
+*
+*                    Update A23
+*
+                     CALL CGEMM( 'No transpose', 'No transpose', I2, J3,
+     $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
+     $                           WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
+     $                           LDAB-1 )
+                  END IF
+*
+                  IF( I3.GT.0 ) THEN
+*
+*                    Update A33
+*
+                     CALL CGEMM( 'No transpose', 'No transpose', I3, J3,
+     $                           JB, -ONE, WORK31, LDWORK, WORK13,
+     $                           LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
+                  END IF
+*
+*                 Copy the lower triangle of A13 back into place
+*
+                  DO 150 JJ = 1, J3
+                     DO 140 II = JJ, JB
+                        AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
+  140                CONTINUE
+  150             CONTINUE
+               END IF
+            ELSE
+*
+*              Adjust the pivot indices.
+*
+               DO 160 I = J, J + JB - 1
+                  IPIV( I ) = IPIV( I ) + J - 1
+  160          CONTINUE
+            END IF
+*
+*           Partially undo the interchanges in the current block to
+*           restore the upper triangular form of A31 and copy the upper
+*           triangle of A31 back into place
+*
+            DO 170 JJ = J + JB - 1, J, -1
+               JP = IPIV( JJ ) - JJ + 1
+               IF( JP.NE.1 ) THEN
+*
+*                 Apply interchange to columns J to JJ-1
+*
+                  IF( JP+JJ-1.LT.J+KL ) THEN
+*
+*                    The interchange does not affect A31
+*
+                     CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                           AB( KV+JP+JJ-J, J ), LDAB-1 )
+                  ELSE
+*
+*                    The interchange does affect A31
+*
+                     CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                           WORK31( JP+JJ-J-KL, 1 ), LDWORK )
+                  END IF
+               END IF
+*
+*              Copy the current column of A31 back into place
+*
+               NW = MIN( I3, JJ-J+1 )
+               IF( NW.GT.0 )
+     $            CALL CCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
+     $                        AB( KV+KL+1-JJ+J, JJ ), 1 )
+  170       CONTINUE
+  180    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CGBTRF
+*
+      END
author	jason <jason@8a072113-8704-0410-8d35-dd094bca7971>	2008-10-28 01:38:50 +0000
committer	jason <jason@8a072113-8704-0410-8d35-dd094bca7971>	2008-10-28 01:38:50 +0000
commit	baba851215b44ac3b60b9248eb02bcce7eb76247 (patch)
tree	8c0f5c006875532a30d4409f5e94b0f310ff00a7 /SRC/cgbtrf.f