aboutsummaryrefslogtreecommitdiff
path: root/SRC/cgbtrf.f
diff options
context:
space:
mode:
authorjason <jason@8a072113-8704-0410-8d35-dd094bca7971>2008-10-28 01:38:50 +0000
committerjason <jason@8a072113-8704-0410-8d35-dd094bca7971>2008-10-28 01:38:50 +0000
commitbaba851215b44ac3b60b9248eb02bcce7eb76247 (patch)
tree8c0f5c006875532a30d4409f5e94b0f310ff00a7 /SRC/cgbtrf.f
Move LAPACK trunk into position.
Diffstat (limited to 'SRC/cgbtrf.f')
-rw-r--r--SRC/cgbtrf.f442
1 files changed, 442 insertions, 0 deletions
diff --git a/SRC/cgbtrf.f b/SRC/cgbtrf.f
new file mode 100644
index 00000000..88758b97
--- /dev/null
+++ b/SRC/cgbtrf.f
@@ -0,0 +1,442 @@
+ SUBROUTINE CGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* November 2006
+*
+* .. Scalar Arguments ..
+ INTEGER INFO, KL, KU, LDAB, M, N
+* ..
+* .. Array Arguments ..
+ INTEGER IPIV( * )
+ COMPLEX AB( LDAB, * )
+* ..
+*
+* Purpose
+* =======
+*
+* CGBTRF computes an LU factorization of a complex m-by-n band matrix A
+* using partial pivoting with row interchanges.
+*
+* This is the blocked version of the algorithm, calling Level 3 BLAS.
+*
+* Arguments
+* =========
+*
+* M (input) INTEGER
+* The number of rows of the matrix A. M >= 0.
+*
+* N (input) INTEGER
+* The number of columns of the matrix A. N >= 0.
+*
+* KL (input) INTEGER
+* The number of subdiagonals within the band of A. KL >= 0.
+*
+* KU (input) INTEGER
+* The number of superdiagonals within the band of A. KU >= 0.
+*
+* AB (input/output) COMPLEX array, dimension (LDAB,N)
+* On entry, the matrix A in band storage, in rows KL+1 to
+* 2*KL+KU+1; rows 1 to KL of the array need not be set.
+* The j-th column of A is stored in the j-th column of the
+* array AB as follows:
+* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
+*
+* On exit, details of the factorization: U is stored as an
+* upper triangular band matrix with KL+KU superdiagonals in
+* rows 1 to KL+KU+1, and the multipliers used during the
+* factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
+* See below for further details.
+*
+* LDAB (input) INTEGER
+* The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
+*
+* IPIV (output) INTEGER array, dimension (min(M,N))
+* The pivot indices; for 1 <= i <= min(M,N), row i of the
+* matrix was interchanged with row IPIV(i).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
+* has been completed, but the factor U is exactly
+* singular, and division by zero will occur if it is used
+* to solve a system of equations.
+*
+* Further Details
+* ===============
+*
+* The band storage scheme is illustrated by the following example, when
+* M = N = 6, KL = 2, KU = 1:
+*
+* On entry: On exit:
+*
+* * * * + + + * * * u14 u25 u36
+* * * + + + + * * u13 u24 u35 u46
+* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
+* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
+* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
+* a31 a42 a53 a64 * * m31 m42 m53 m64 * *
+*
+* Array elements marked * are not used by the routine; elements marked
+* + need not be set on entry, but are required by the routine to store
+* elements of U because of fill-in resulting from the row interchanges.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ COMPLEX ONE, ZERO
+ PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ),
+ $ ZERO = ( 0.0E+0, 0.0E+0 ) )
+ INTEGER NBMAX, LDWORK
+ PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 )
+* ..
+* .. Local Scalars ..
+ INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
+ $ JU, K2, KM, KV, NB, NW
+ COMPLEX TEMP
+* ..
+* .. Local Arrays ..
+ COMPLEX WORK13( LDWORK, NBMAX ),
+ $ WORK31( LDWORK, NBMAX )
+* ..
+* .. External Functions ..
+ INTEGER ICAMAX, ILAENV
+ EXTERNAL ICAMAX, ILAENV
+* ..
+* .. External Subroutines ..
+ EXTERNAL CCOPY, CGBTF2, CGEMM, CGERU, CLASWP, CSCAL,
+ $ CSWAP, CTRSM, XERBLA
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN
+* ..
+* .. Executable Statements ..
+*
+* KV is the number of superdiagonals in the factor U, allowing for
+* fill-in
+*
+ KV = KU + KL
+*
+* Test the input parameters.
+*
+ INFO = 0
+ IF( M.LT.0 ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( KL.LT.0 ) THEN
+ INFO = -3
+ ELSE IF( KU.LT.0 ) THEN
+ INFO = -4
+ ELSE IF( LDAB.LT.KL+KV+1 ) THEN
+ INFO = -6
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'CGBTRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( M.EQ.0 .OR. N.EQ.0 )
+ $ RETURN
+*
+* Determine the block size for this environment
+*
+ NB = ILAENV( 1, 'CGBTRF', ' ', M, N, KL, KU )
+*
+* The block size must not exceed the limit set by the size of the
+* local arrays WORK13 and WORK31.
+*
+ NB = MIN( NB, NBMAX )
+*
+ IF( NB.LE.1 .OR. NB.GT.KL ) THEN
+*
+* Use unblocked code
+*
+ CALL CGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
+ ELSE
+*
+* Use blocked code
+*
+* Zero the superdiagonal elements of the work array WORK13
+*
+ DO 20 J = 1, NB
+ DO 10 I = 1, J - 1
+ WORK13( I, J ) = ZERO
+ 10 CONTINUE
+ 20 CONTINUE
+*
+* Zero the subdiagonal elements of the work array WORK31
+*
+ DO 40 J = 1, NB
+ DO 30 I = J + 1, NB
+ WORK31( I, J ) = ZERO
+ 30 CONTINUE
+ 40 CONTINUE
+*
+* Gaussian elimination with partial pivoting
+*
+* Set fill-in elements in columns KU+2 to KV to zero
+*
+ DO 60 J = KU + 2, MIN( KV, N )
+ DO 50 I = KV - J + 2, KL
+ AB( I, J ) = ZERO
+ 50 CONTINUE
+ 60 CONTINUE
+*
+* JU is the index of the last column affected by the current
+* stage of the factorization
+*
+ JU = 1
+*
+ DO 180 J = 1, MIN( M, N ), NB
+ JB = MIN( NB, MIN( M, N )-J+1 )
+*
+* The active part of the matrix is partitioned
+*
+* A11 A12 A13
+* A21 A22 A23
+* A31 A32 A33
+*
+* Here A11, A21 and A31 denote the current block of JB columns
+* which is about to be factorized. The number of rows in the
+* partitioning are JB, I2, I3 respectively, and the numbers
+* of columns are JB, J2, J3. The superdiagonal elements of A13
+* and the subdiagonal elements of A31 lie outside the band.
+*
+ I2 = MIN( KL-JB, M-J-JB+1 )
+ I3 = MIN( JB, M-J-KL+1 )
+*
+* J2 and J3 are computed after JU has been updated.
+*
+* Factorize the current block of JB columns
+*
+ DO 80 JJ = J, J + JB - 1
+*
+* Set fill-in elements in column JJ+KV to zero
+*
+ IF( JJ+KV.LE.N ) THEN
+ DO 70 I = 1, KL
+ AB( I, JJ+KV ) = ZERO
+ 70 CONTINUE
+ END IF
+*
+* Find pivot and test for singularity. KM is the number of
+* subdiagonal elements in the current column.
+*
+ KM = MIN( KL, M-JJ )
+ JP = ICAMAX( KM+1, AB( KV+1, JJ ), 1 )
+ IPIV( JJ ) = JP + JJ - J
+ IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
+ JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
+ IF( JP.NE.1 ) THEN
+*
+* Apply interchange to columns J to J+JB-1
+*
+ IF( JP+JJ-1.LT.J+KL ) THEN
+*
+ CALL CSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
+ $ AB( KV+JP+JJ-J, J ), LDAB-1 )
+ ELSE
+*
+* The interchange affects columns J to JJ-1 of A31
+* which are stored in the work array WORK31
+*
+ CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
+ $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
+ CALL CSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
+ $ AB( KV+JP, JJ ), LDAB-1 )
+ END IF
+ END IF
+*
+* Compute multipliers
+*
+ CALL CSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
+ $ 1 )
+*
+* Update trailing submatrix within the band and within
+* the current block. JM is the index of the last column
+* which needs to be updated.
+*
+ JM = MIN( JU, J+JB-1 )
+ IF( JM.GT.JJ )
+ $ CALL CGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
+ $ AB( KV, JJ+1 ), LDAB-1,
+ $ AB( KV+1, JJ+1 ), LDAB-1 )
+ ELSE
+*
+* If pivot is zero, set INFO to the index of the pivot
+* unless a zero pivot has already been found.
+*
+ IF( INFO.EQ.0 )
+ $ INFO = JJ
+ END IF
+*
+* Copy current column of A31 into the work array WORK31
+*
+ NW = MIN( JJ-J+1, I3 )
+ IF( NW.GT.0 )
+ $ CALL CCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
+ $ WORK31( 1, JJ-J+1 ), 1 )
+ 80 CONTINUE
+ IF( J+JB.LE.N ) THEN
+*
+* Apply the row interchanges to the other blocks.
+*
+ J2 = MIN( JU-J+1, KV ) - JB
+ J3 = MAX( 0, JU-J-KV+1 )
+*
+* Use CLASWP to apply the row interchanges to A12, A22, and
+* A32.
+*
+ CALL CLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
+ $ IPIV( J ), 1 )
+*
+* Adjust the pivot indices.
+*
+ DO 90 I = J, J + JB - 1
+ IPIV( I ) = IPIV( I ) + J - 1
+ 90 CONTINUE
+*
+* Apply the row interchanges to A13, A23, and A33
+* columnwise.
+*
+ K2 = J - 1 + JB + J2
+ DO 110 I = 1, J3
+ JJ = K2 + I
+ DO 100 II = J + I - 1, J + JB - 1
+ IP = IPIV( II )
+ IF( IP.NE.II ) THEN
+ TEMP = AB( KV+1+II-JJ, JJ )
+ AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
+ AB( KV+1+IP-JJ, JJ ) = TEMP
+ END IF
+ 100 CONTINUE
+ 110 CONTINUE
+*
+* Update the relevant part of the trailing submatrix
+*
+ IF( J2.GT.0 ) THEN
+*
+* Update A12
+*
+ CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+ $ JB, J2, ONE, AB( KV+1, J ), LDAB-1,
+ $ AB( KV+1-JB, J+JB ), LDAB-1 )
+*
+ IF( I2.GT.0 ) THEN
+*
+* Update A22
+*
+ CALL CGEMM( 'No transpose', 'No transpose', I2, J2,
+ $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
+ $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
+ $ AB( KV+1, J+JB ), LDAB-1 )
+ END IF
+*
+ IF( I3.GT.0 ) THEN
+*
+* Update A32
+*
+ CALL CGEMM( 'No transpose', 'No transpose', I3, J2,
+ $ JB, -ONE, WORK31, LDWORK,
+ $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
+ $ AB( KV+KL+1-JB, J+JB ), LDAB-1 )
+ END IF
+ END IF
+*
+ IF( J3.GT.0 ) THEN
+*
+* Copy the lower triangle of A13 into the work array
+* WORK13
+*
+ DO 130 JJ = 1, J3
+ DO 120 II = JJ, JB
+ WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
+ 120 CONTINUE
+ 130 CONTINUE
+*
+* Update A13 in the work array
+*
+ CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+ $ JB, J3, ONE, AB( KV+1, J ), LDAB-1,
+ $ WORK13, LDWORK )
+*
+ IF( I2.GT.0 ) THEN
+*
+* Update A23
+*
+ CALL CGEMM( 'No transpose', 'No transpose', I2, J3,
+ $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
+ $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
+ $ LDAB-1 )
+ END IF
+*
+ IF( I3.GT.0 ) THEN
+*
+* Update A33
+*
+ CALL CGEMM( 'No transpose', 'No transpose', I3, J3,
+ $ JB, -ONE, WORK31, LDWORK, WORK13,
+ $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
+ END IF
+*
+* Copy the lower triangle of A13 back into place
+*
+ DO 150 JJ = 1, J3
+ DO 140 II = JJ, JB
+ AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
+ 140 CONTINUE
+ 150 CONTINUE
+ END IF
+ ELSE
+*
+* Adjust the pivot indices.
+*
+ DO 160 I = J, J + JB - 1
+ IPIV( I ) = IPIV( I ) + J - 1
+ 160 CONTINUE
+ END IF
+*
+* Partially undo the interchanges in the current block to
+* restore the upper triangular form of A31 and copy the upper
+* triangle of A31 back into place
+*
+ DO 170 JJ = J + JB - 1, J, -1
+ JP = IPIV( JJ ) - JJ + 1
+ IF( JP.NE.1 ) THEN
+*
+* Apply interchange to columns J to JJ-1
+*
+ IF( JP+JJ-1.LT.J+KL ) THEN
+*
+* The interchange does not affect A31
+*
+ CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
+ $ AB( KV+JP+JJ-J, J ), LDAB-1 )
+ ELSE
+*
+* The interchange does affect A31
+*
+ CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
+ $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
+ END IF
+ END IF
+*
+* Copy the current column of A31 back into place
+*
+ NW = MIN( I3, JJ-J+1 )
+ IF( NW.GT.0 )
+ $ CALL CCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
+ $ AB( KV+KL+1-JJ+J, JJ ), 1 )
+ 170 CONTINUE
+ 180 CONTINUE
+ END IF
+*
+ RETURN
+*
+* End of CGBTRF
+*
+ END