aboutsummaryrefslogtreecommitdiff
path: root/SRC/VARIANTS
diff options
context:
space:
mode:
authorjason <jason@8a072113-8704-0410-8d35-dd094bca7971>2008-10-28 01:38:50 +0000
committerjason <jason@8a072113-8704-0410-8d35-dd094bca7971>2008-10-28 01:38:50 +0000
commitbaba851215b44ac3b60b9248eb02bcce7eb76247 (patch)
tree8c0f5c006875532a30d4409f5e94b0f310ff00a7 /SRC/VARIANTS
Move LAPACK trunk into position.
Diffstat (limited to 'SRC/VARIANTS')
-rw-r--r--SRC/VARIANTS/Makefile67
-rw-r--r--SRC/VARIANTS/README84
-rw-r--r--SRC/VARIANTS/cholesky/RL/cpotrf.f187
-rw-r--r--SRC/VARIANTS/cholesky/RL/dpotrf.f186
-rw-r--r--SRC/VARIANTS/cholesky/RL/spotrf.f186
-rw-r--r--SRC/VARIANTS/cholesky/RL/zpotrf.f187
-rw-r--r--SRC/VARIANTS/cholesky/TOP/cpotrf.f181
-rw-r--r--SRC/VARIANTS/cholesky/TOP/dpotrf.f182
-rw-r--r--SRC/VARIANTS/cholesky/TOP/spotrf.f181
-rw-r--r--SRC/VARIANTS/cholesky/TOP/zpotrf.f181
-rw-r--r--SRC/VARIANTS/lu/CR/cgetrf.f165
-rw-r--r--SRC/VARIANTS/lu/CR/dgetrf.f165
-rw-r--r--SRC/VARIANTS/lu/CR/sgetrf.f165
-rw-r--r--SRC/VARIANTS/lu/CR/zgetrf.f165
-rw-r--r--SRC/VARIANTS/lu/LL/cgetrf.f190
-rw-r--r--SRC/VARIANTS/lu/LL/dgetrf.f189
-rw-r--r--SRC/VARIANTS/lu/LL/sgetrf.f190
-rw-r--r--SRC/VARIANTS/lu/LL/zgetrf.f190
-rw-r--r--SRC/VARIANTS/lu/REC/cgetrf.f224
-rw-r--r--SRC/VARIANTS/lu/REC/dgetrf.f220
-rw-r--r--SRC/VARIANTS/lu/REC/sgetrf.f220
-rw-r--r--SRC/VARIANTS/lu/REC/zgetrf.f224
-rw-r--r--SRC/VARIANTS/qr/LL/cgeqrf.f343
-rw-r--r--SRC/VARIANTS/qr/LL/dgeqrf.f344
-rw-r--r--SRC/VARIANTS/qr/LL/sceil.f28
-rw-r--r--SRC/VARIANTS/qr/LL/sgeqrf.f343
-rw-r--r--SRC/VARIANTS/qr/LL/zgeqrf.f343
27 files changed, 5330 insertions, 0 deletions
diff --git a/SRC/VARIANTS/Makefile b/SRC/VARIANTS/Makefile
new file mode 100644
index 00000000..42446eb5
--- /dev/null
+++ b/SRC/VARIANTS/Makefile
@@ -0,0 +1,67 @@
+include ../../make.inc
+
+#######################################################################
+# This is the makefile to create a the variants libraries for LAPACK.
+# The files are organized as follows:
+# CHOLRL -- Right looking block version of the algorithm, calling Level 3 BLAS
+# CHOLTOP -- Top looking block version of the algorithm, calling Level 3 BLAS
+# LUCR -- Crout Level 3 BLAS version of LU factorization
+# LULL -- left-looking Level 3 BLAS version of LU factorization
+# QRLL -- left-looking Level 3 BLAS version of QR factorization
+# LUREC -- an iterative version of Sivan Toledo's recursive LU algorithm[1].
+# For square matrices, this iterative versions should
+# be within a factor of two of the optimum number of memory transfers.
+#
+# [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with
+# Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
+# 1065-1081. http://dx.doi.org/10.1137/S0895479896297744
+#######################################################################
+
+VARIANTSDIR=LIB
+
+CHOLRL = cholesky/RL/cpotrf.o cholesky/RL/dpotrf.o cholesky/RL/spotrf.o cholesky/RL/zpotrf.o
+
+CHOLTOP = cholesky/TOP/cpotrf.o cholesky/TOP/dpotrf.o cholesky/TOP/spotrf.o cholesky/TOP/zpotrf.o
+
+LUCR = lu/CR/cgetrf.o lu/CR/dgetrf.o lu/CR/sgetrf.o lu/CR/zgetrf.o
+
+LULL = lu/LL/cgetrf.o lu/LL/dgetrf.o lu/LL/sgetrf.o lu/LL/zgetrf.o
+
+LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o
+
+QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o qr/LL/sceil.o
+
+
+all: cholrl choltop lucr lull lurec qrll
+
+cholrl: $(CHOLRL)
+ $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/cholrl.a $(CHOLRL)
+ $(RANLIB) $(VARIANTSDIR)/cholrl.a
+
+choltop: $(CHOLTOP)
+ $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/choltop.a $(CHOLTOP)
+ $(RANLIB) $(VARIANTSDIR)/choltop.a
+
+lucr: $(LUCR)
+ $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lucr.a $(LUCR)
+ $(RANLIB) $(VARIANTSDIR)/lucr.a
+
+lull: $(LULL)
+ $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lull.a $(LULL)
+ $(RANLIB) $(VARIANTSDIR)/lull.a
+
+lurec: $(LUREC)
+ $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lurec.a $(LUREC)
+ $(RANLIB) $(VARIANTSDIR)/lurec.a
+
+qrll: $(QRLL)
+ $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/qrll.a $(QRLL)
+ $(RANLIB) $(VARIANTSDIR)/qrll.a
+
+
+.f.o:
+ $(FORTRAN) $(OPTS) -c $< -o $@
+
+clean:
+ rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL) \
+ $(VARIANTSDIR)/*.a \ No newline at end of file
diff --git a/SRC/VARIANTS/README b/SRC/VARIANTS/README
new file mode 100644
index 00000000..6b4f3258
--- /dev/null
+++ b/SRC/VARIANTS/README
@@ -0,0 +1,84 @@
+ ===============
+ = README File =
+ ===============
+
+This README File is for the LAPACK driver variants.
+It is composed of 5 sections:
+ - Description: contents a quick description of each of the variants. For a more detailed description please refer to LAWN XXX.
+ - Build
+ - Testing
+ - Linking your program
+ - Support
+
+Author: Julie LANGOU, May 2008
+
+===============
+= DESCRIPTION =
+===============
+
+This directory contains several variants of LAPACK routines in single/double/complex/double complex precision:
+ - [sdcz]getrf with LU Crout Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/lu/CR
+ - [sdcz]getrf with LU Left Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/lu/LL
+ - [sdcz]getrf with Sivan Toledo's recursive LU algorithm [1] - Directory: SRC/VARIANTS/lu/REC
+ - [sdcz]geqrf with QR Left Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/qr/LL
+ - [sdcz]potrf with Cholesky Right Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/RL
+ - [sdcz]potrf with Cholesky Top Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/TOP
+
+References:For a more detailed description please refer to
+ - [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
+ 1065-1081. http://dx.doi.org/10.1137/S0895479896297744
+ - [2]LAWN XXX
+
+=========
+= BUILD =
+=========
+
+These variants are compiled by default in the build process but they are not tested by default.
+The build process creates one new library per variants in the four arithmetics (singel/double/comple/double complex).
+The libraries are in the SRC/VARIANTS/LIB directory.
+
+Corresponding libraries created in SRC/VARIANTS/LIB:
+ - LU Crout : lucr.a
+ - LU Left Looking : lull.a
+ - LU Sivan Toledo's recursive : lurec.a
+ - QR Left Looking : qrll.a
+ - Cholesky Right Looking : cholrl.a
+ - Cholesky Top : choltop.a
+
+
+===========
+= TESTING =
+===========
+
+To test these variants you can type 'make variants-testing'
+This will rerun the linear methods testings once per variants and append the short name of the variants to the output files.
+You should then see the following files in the TESTING directory:
+[scdz]test_cholrl.out
+[scdz]test_choltop.out
+[scdz]test_lucr.out
+[scdz]test_lull.out
+[scdz]test_lurec.out
+[scdz]test_qrll.out
+
+========================
+= LINKING YOUR PROGRAM =
+========================
+
+You just need to add the variants methods library in your linking sequence before your lapack libary.
+Here is a quick example for LU
+
+Default using LU Right Looking version:
+ $(FORTRAN) -c myprog.f
+ $(FORTRAN) -o myexe myprog.o $(LAPACKLIB) $(BLASLIB)
+
+Using LU Left Looking version:
+ $(FORTRAN) -c myprog.f
+ $(FORTRAN) -o myexe myprog.o $(PATH TO LAPACK/SRC/VARIANTS/LIB)/lull.a $(LAPACKLIB) $(BLASLIB)
+
+===========
+= SUPPORT =
+===========
+
+You can use either LAPACK forum or the LAPACK mailing list to get support.
+LAPACK forum : http://icl.cs.utk.edu/lapack-forum
+LAPACK mailing list : lapack@cs.utk.edu
diff --git a/SRC/VARIANTS/cholesky/RL/cpotrf.f b/SRC/VARIANTS/cholesky/RL/cpotrf.f
new file mode 100644
index 00000000..a6d194cb
--- /dev/null
+++ b/SRC/VARIANTS/cholesky/RL/cpotrf.f
@@ -0,0 +1,187 @@
+ SUBROUTINE CPOTRF ( UPLO, N, A, LDA, INFO )
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ CHARACTER UPLO
+ INTEGER INFO, LDA, N
+* ..
+* .. Array Arguments ..
+ COMPLEX A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* CPOTRF computes the Cholesky factorization of a real Hermitian
+* positive definite matrix A.
+*
+* The factorization has the form
+* A = U**H * U, if UPLO = 'U', or
+* A = L * L**H, if UPLO = 'L',
+* where U is an upper triangular matrix and L is lower triangular.
+*
+* This is the right looking block version of the algorithm, calling Level 3 BLAS.
+*
+* Arguments
+* =========
+*
+* UPLO (input) CHARACTER*1
+* = 'U': Upper triangle of A is stored;
+* = 'L': Lower triangle of A is stored.
+*
+* N (input) INTEGER
+* The order of the matrix A. N >= 0.
+*
+* A (input/output) COMPLEX array, dimension (LDA,N)
+* On entry, the Hermitian matrix A. If UPLO = 'U', the leading
+* N-by-N upper triangular part of A contains the upper
+* triangular part of the matrix A, and the strictly lower
+* triangular part of A is not referenced. If UPLO = 'L', the
+* leading N-by-N lower triangular part of A contains the lower
+* triangular part of the matrix A, and the strictly upper
+* triangular part of A is not referenced.
+*
+* On exit, if INFO = 0, the factor U or L from the Cholesky
+* factorization A = U**H*U or A = L*L**H.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,N).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, the leading minor of order i is not
+* positive definite, and the factorization could not be
+* completed.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ REAL ONE
+ COMPLEX CONE
+ PARAMETER ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ) )
+* ..
+* .. Local Scalars ..
+ LOGICAL UPPER
+ INTEGER J, JB, NB
+* ..
+* .. External Functions ..
+ LOGICAL LSAME
+ INTEGER ILAENV
+ EXTERNAL LSAME, ILAENV
+* ..
+* .. External Subroutines ..
+ EXTERNAL CGEMM, CPOTF2, CHERK, CTRSM, XERBLA
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ UPPER = LSAME( UPLO, 'U' )
+ IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'CPOTRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( N.EQ.0 )
+ $ RETURN
+*
+* Determine the block size for this environment.
+*
+ NB = ILAENV( 1, 'CPOTRF', UPLO, N, -1, -1, -1 )
+ IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+* Use unblocked code.
+*
+ CALL CPOTF2( UPLO, N, A, LDA, INFO )
+ ELSE
+*
+* Use blocked code.
+*
+ IF( UPPER ) THEN
+*
+* Compute the Cholesky factorization A = U'*U.
+*
+ DO 10 J = 1, N, NB
+*
+* Update and factorize the current diagonal block and test
+* for non-positive-definiteness.
+*
+ JB = MIN( NB, N-J+1 )
+
+ CALL CPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+
+ IF( INFO.NE.0 )
+ $ GO TO 30
+
+ IF( J+JB.LE.N ) THEN
+*
+* Updating the trailing submatrix.
+*
+ CALL CTRSM( 'Left', 'Upper', 'Conjugate Transpose',
+ $ 'Non-unit', JB, N-J-JB+1, CONE, A( J, J ),
+ $ LDA, A( J, J+JB ), LDA )
+ CALL CHERK( 'Upper', 'Conjugate transpose', N-J-JB+1,
+ $ JB, -ONE, A( J, J+JB ), LDA,
+ $ ONE, A( J+JB, J+JB ), LDA )
+ END IF
+ 10 CONTINUE
+*
+ ELSE
+*
+* Compute the Cholesky factorization A = L*L'.
+*
+ DO 20 J = 1, N, NB
+*
+* Update and factorize the current diagonal block and test
+* for non-positive-definiteness.
+*
+ JB = MIN( NB, N-J+1 )
+
+ CALL CPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+
+ IF( INFO.NE.0 )
+ $ GO TO 30
+
+ IF( J+JB.LE.N ) THEN
+*
+* Updating the trailing submatrix.
+*
+ CALL CTRSM( 'Right', 'Lower', 'Conjugate Transpose',
+ $ 'Non-unit', N-J-JB+1, JB, CONE, A( J, J ),
+ $ LDA, A( J+JB, J ), LDA )
+
+ CALL CHERK( 'Lower', 'No Transpose', N-J-JB+1, JB,
+ $ -ONE, A( J+JB, J ), LDA,
+ $ ONE, A( J+JB, J+JB ), LDA )
+ END IF
+ 20 CONTINUE
+ END IF
+ END IF
+ GO TO 40
+*
+ 30 CONTINUE
+ INFO = INFO + J - 1
+*
+ 40 CONTINUE
+ RETURN
+*
+* End of CPOTRF
+*
+ END
diff --git a/SRC/VARIANTS/cholesky/RL/dpotrf.f b/SRC/VARIANTS/cholesky/RL/dpotrf.f
new file mode 100644
index 00000000..72603304
--- /dev/null
+++ b/SRC/VARIANTS/cholesky/RL/dpotrf.f
@@ -0,0 +1,186 @@
+ SUBROUTINE DPOTRF ( UPLO, N, A, LDA, INFO )
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ CHARACTER UPLO
+ INTEGER INFO, LDA, N
+* ..
+* .. Array Arguments ..
+ DOUBLE PRECISION A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* DPOTRF computes the Cholesky factorization of a real symmetric
+* positive definite matrix A.
+*
+* The factorization has the form
+* A = U**T * U, if UPLO = 'U', or
+* A = L * L**T, if UPLO = 'L',
+* where U is an upper triangular matrix and L is lower triangular.
+*
+* This is the right looking block version of the algorithm, calling Level 3 BLAS.
+*
+* Arguments
+* =========
+*
+* UPLO (input) CHARACTER*1
+* = 'U': Upper triangle of A is stored;
+* = 'L': Lower triangle of A is stored.
+*
+* N (input) INTEGER
+* The order of the matrix A. N >= 0.
+*
+* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+* On entry, the symmetric matrix A. If UPLO = 'U', the leading
+* N-by-N upper triangular part of A contains the upper
+* triangular part of the matrix A, and the strictly lower
+* triangular part of A is not referenced. If UPLO = 'L', the
+* leading N-by-N lower triangular part of A contains the lower
+* triangular part of the matrix A, and the strictly upper
+* triangular part of A is not referenced.
+*
+* On exit, if INFO = 0, the factor U or L from the Cholesky
+* factorization A = U**T*U or A = L*L**T.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,N).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, the leading minor of order i is not
+* positive definite, and the factorization could not be
+* completed.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ DOUBLE PRECISION ONE
+ PARAMETER ( ONE = 1.0D+0 )
+* ..
+* .. Local Scalars ..
+ LOGICAL UPPER
+ INTEGER J, JB, NB
+* ..
+* .. External Functions ..
+ LOGICAL LSAME
+ INTEGER ILAENV
+ EXTERNAL LSAME, ILAENV
+* ..
+* .. External Subroutines ..
+ EXTERNAL DGEMM, DPOTF2, DSYRK, DTRSM, XERBLA
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ UPPER = LSAME( UPLO, 'U' )
+ IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'DPOTRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( N.EQ.0 )
+ $ RETURN
+*
+* Determine the block size for this environment.
+*
+ NB = ILAENV( 1, 'DPOTRF', UPLO, N, -1, -1, -1 )
+ IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+* Use unblocked code.
+*
+ CALL DPOTF2( UPLO, N, A, LDA, INFO )
+ ELSE
+*
+* Use blocked code.
+*
+ IF( UPPER ) THEN
+*
+* Compute the Cholesky factorization A = U'*U.
+*
+ DO 10 J = 1, N, NB
+*
+* Update and factorize the current diagonal block and test
+* for non-positive-definiteness.
+*
+ JB = MIN( NB, N-J+1 )
+
+ CALL DPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+
+ IF( INFO.NE.0 )
+ $ GO TO 30
+
+ IF( J+JB.LE.N ) THEN
+*
+* Updating the trailing submatrix.
+*
+ CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit',
+ $ JB, N-J-JB+1, ONE, A( J, J ), LDA,
+ $ A( J, J+JB ), LDA )
+ CALL DSYRK( 'Upper', 'Transpose', N-J-JB+1, JB, -ONE,
+ $ A( J, J+JB ), LDA,
+ $ ONE, A( J+JB, J+JB ), LDA )
+ END IF
+ 10 CONTINUE
+*
+ ELSE
+*
+* Compute the Cholesky factorization A = L*L'.
+*
+ DO 20 J = 1, N, NB
+*
+* Update and factorize the current diagonal block and test
+* for non-positive-definiteness.
+*
+ JB = MIN( NB, N-J+1 )
+
+ CALL DPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+
+ IF( INFO.NE.0 )
+ $ GO TO 30
+
+ IF( J+JB.LE.N ) THEN
+*
+* Updating the trailing submatrix.
+*
+ CALL DTRSM( 'Right', 'Lower', 'Transpose', 'Non-unit',
+ $ N-J-JB+1, JB, ONE, A( J, J ), LDA,
+ $ A( J+JB, J ), LDA )
+
+ CALL DSYRK( 'Lower', 'No Transpose', N-J-JB+1, JB,
+ $ -ONE, A( J+JB, J ), LDA,
+ $ ONE, A( J+JB, J+JB ), LDA )
+ END IF
+ 20 CONTINUE
+ END IF
+ END IF
+ GO TO 40
+*
+ 30 CONTINUE
+ INFO = INFO + J - 1
+*
+ 40 CONTINUE
+ RETURN
+*
+* End of DPOTRF
+*
+ END
diff --git a/SRC/VARIANTS/cholesky/RL/spotrf.f b/SRC/VARIANTS/cholesky/RL/spotrf.f
new file mode 100644
index 00000000..3375902a
--- /dev/null
+++ b/SRC/VARIANTS/cholesky/RL/spotrf.f
@@ -0,0 +1,186 @@
+ SUBROUTINE SPOTRF ( UPLO, N, A, LDA, INFO )
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ CHARACTER UPLO
+ INTEGER INFO, LDA, N
+* ..
+* .. Array Arguments ..
+ REAL A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* SPOTRF computes the Cholesky factorization of a real symmetric
+* positive definite matrix A.
+*
+* The factorization has the form
+* A = U**T * U, if UPLO = 'U', or
+* A = L * L**T, if UPLO = 'L',
+* where U is an upper triangular matrix and L is lower triangular.
+*
+* This is the right looking block version of the algorithm, calling Level 3 BLAS.
+*
+* Arguments
+* =========
+*
+* UPLO (input) CHARACTER*1
+* = 'U': Upper triangle of A is stored;
+* = 'L': Lower triangle of A is stored.
+*
+* N (input) INTEGER
+* The order of the matrix A. N >= 0.
+*
+* A (input/output) REAL array, dimension (LDA,N)
+* On entry, the symmetric matrix A. If UPLO = 'U', the leading
+* N-by-N upper triangular part of A contains the upper
+* triangular part of the matrix A, and the strictly lower
+* triangular part of A is not referenced. If UPLO = 'L', the
+* leading N-by-N lower triangular part of A contains the lower
+* triangular part of the matrix A, and the strictly upper
+* triangular part of A is not referenced.
+*
+* On exit, if INFO = 0, the factor U or L from the Cholesky
+* factorization A = U**T*U or A = L*L**T.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,N).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, the leading minor of order i is not
+* positive definite, and the factorization could not be
+* completed.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ REAL ONE
+ PARAMETER ( ONE = 1.0E+0 )
+* ..
+* .. Local Scalars ..
+ LOGICAL UPPER
+ INTEGER J, JB, NB
+* ..
+* .. External Functions ..
+ LOGICAL LSAME
+ INTEGER ILAENV
+ EXTERNAL LSAME, ILAENV
+* ..
+* .. External Subroutines ..
+ EXTERNAL SGEMM, SPOTF2, SSYRK, STRSM, XERBLA
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ UPPER = LSAME( UPLO, 'U' )
+ IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'SPOTRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( N.EQ.0 )
+ $ RETURN
+*
+* Determine the block size for this environment.
+*
+ NB = ILAENV( 1, 'SPOTRF', UPLO, N, -1, -1, -1 )
+ IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+* Use unblocked code.
+*
+ CALL SPOTF2( UPLO, N, A, LDA, INFO )
+ ELSE
+*
+* Use blocked code.
+*
+ IF( UPPER ) THEN
+*
+* Compute the Cholesky factorization A = U'*U.
+*
+ DO 10 J = 1, N, NB
+*
+* Update and factorize the current diagonal block and test
+* for non-positive-definiteness.
+*
+ JB = MIN( NB, N-J+1 )
+
+ CALL SPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+
+ IF( INFO.NE.0 )
+ $ GO TO 30
+
+ IF( J+JB.LE.N ) THEN
+*
+* Updating the trailing submatrix.
+*
+ CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit',
+ $ JB, N-J-JB+1, ONE, A( J, J ), LDA,
+ $ A( J, J+JB ), LDA )
+ CALL SSYRK( 'Upper', 'Transpose', N-J-JB+1, JB, -ONE,
+ $ A( J, J+JB ), LDA,
+ $ ONE, A( J+JB, J+JB ), LDA )
+ END IF
+ 10 CONTINUE
+*
+ ELSE
+*
+* Compute the Cholesky factorization A = L*L'.
+*
+ DO 20 J = 1, N, NB
+*
+* Update and factorize the current diagonal block and test
+* for non-positive-definiteness.
+*
+ JB = MIN( NB, N-J+1 )
+
+ CALL SPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+
+ IF( INFO.NE.0 )
+ $ GO TO 30
+
+ IF( J+JB.LE.N ) THEN
+*
+* Updating the trailing submatrix.
+*
+ CALL STRSM( 'Right', 'Lower', 'Transpose', 'Non-unit',
+ $ N-J-JB+1, JB, ONE, A( J, J ), LDA,
+ $ A( J+JB, J ), LDA )
+
+ CALL SSYRK( 'Lower', 'No Transpose', N-J-JB+1, JB,
+ $ -ONE, A( J+JB, J ), LDA,
+ $ ONE, A( J+JB, J+JB ), LDA )
+ END IF
+ 20 CONTINUE
+ END IF
+ END IF
+ GO TO 40
+*
+ 30 CONTINUE
+ INFO = INFO + J - 1
+*
+ 40 CONTINUE
+ RETURN
+*
+* End of SPOTRF
+*
+ END
diff --git a/SRC/VARIANTS/cholesky/RL/zpotrf.f b/SRC/VARIANTS/cholesky/RL/zpotrf.f
new file mode 100644
index 00000000..b2bce7f6
--- /dev/null
+++ b/SRC/VARIANTS/cholesky/RL/zpotrf.f
@@ -0,0 +1,187 @@
+ SUBROUTINE ZPOTRF ( UPLO, N, A, LDA, INFO )
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ CHARACTER UPLO
+ INTEGER INFO, LDA, N
+* ..
+* .. Array Arguments ..
+ COMPLEX*16 A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* ZPOTRF computes the Cholesky factorization of a real Hermitian
+* positive definite matrix A.
+*
+* The factorization has the form
+* A = U**H * U, if UPLO = 'U', or
+* A = L * L**H, if UPLO = 'L',
+* where U is an upper triangular matrix and L is lower triangular.
+*
+* This is the right looking block version of the algorithm, calling Level 3 BLAS.
+*
+* Arguments
+* =========
+*
+* UPLO (input) CHARACTER*1
+* = 'U': Upper triangle of A is stored;
+* = 'L': Lower triangle of A is stored.
+*
+* N (input) INTEGER
+* The order of the matrix A. N >= 0.
+*
+* A (input/output) COMPLEX*16 array, dimension (LDA,N)
+* On entry, the Hermitian matrix A. If UPLO = 'U', the leading
+* N-by-N upper triangular part of A contains the upper
+* triangular part of the matrix A, and the strictly lower
+* triangular part of A is not referenced. If UPLO = 'L', the
+* leading N-by-N lower triangular part of A contains the lower
+* triangular part of the matrix A, and the strictly upper
+* triangular part of A is not referenced.
+*
+* On exit, if INFO = 0, the factor U or L from the Cholesky
+* factorization A = U**H*U or A = L*L**H.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,N).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, the leading minor of order i is not
+* positive definite, and the factorization could not be
+* completed.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ DOUBLE PRECISION ONE
+ COMPLEX*16 CONE
+ PARAMETER ( ONE = 1.0D+0, CONE = ( 1.0D+0, 0.0D+0 ) )
+* ..
+* .. Local Scalars ..
+ LOGICAL UPPER
+ INTEGER J, JB, NB
+* ..
+* .. External Functions ..
+ LOGICAL LSAME
+ INTEGER ILAENV
+ EXTERNAL LSAME, ILAENV
+* ..
+* .. External Subroutines ..
+ EXTERNAL ZGEMM, ZPOTF2, ZHERK, ZTRSM, XERBLA
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ UPPER = LSAME( UPLO, 'U' )
+ IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'ZPOTRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( N.EQ.0 )
+ $ RETURN
+*
+* Determine the block size for this environment.
+*
+ NB = ILAENV( 1, 'ZPOTRF', UPLO, N, -1, -1, -1 )
+ IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+* Use unblocked code.
+*
+ CALL ZPOTF2( UPLO, N, A, LDA, INFO )
+ ELSE
+*
+* Use blocked code.
+*
+ IF( UPPER ) THEN
+*
+* Compute the Cholesky factorization A = U'*U.
+*
+ DO 10 J = 1, N, NB
+*
+* Update and factorize the current diagonal block and test
+* for non-positive-definiteness.
+*
+ JB = MIN( NB, N-J+1 )
+
+ CALL ZPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+
+ IF( INFO.NE.0 )
+ $ GO TO 30
+
+ IF( J+JB.LE.N ) THEN
+*
+* Updating the trailing submatrix.
+*
+ CALL ZTRSM( 'Left', 'Upper', 'Conjugate Transpose',
+ $ 'Non-unit', JB, N-J-JB+1, CONE, A( J, J ),
+ $ LDA, A( J, J+JB ), LDA )
+ CALL ZHERK( 'Upper', 'Conjugate transpose', N-J-JB+1,
+ $ JB, -ONE, A( J, J+JB ), LDA,
+ $ ONE, A( J+JB, J+JB ), LDA )
+ END IF
+ 10 CONTINUE
+*
+ ELSE
+*
+* Compute the Cholesky factorization A = L*L'.
+*
+ DO 20 J = 1, N, NB
+*
+* Update and factorize the current diagonal block and test
+* for non-positive-definiteness.
+*
+ JB = MIN( NB, N-J+1 )
+
+ CALL ZPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+
+ IF( INFO.NE.0 )
+ $ GO TO 30
+
+ IF( J+JB.LE.N ) THEN
+*
+* Updating the trailing submatrix.
+*
+ CALL ZTRSM( 'Right', 'Lower', 'Conjugate Transpose',
+ $ 'Non-unit', N-J-JB+1, JB, CONE, A( J, J ),
+ $ LDA, A( J+JB, J ), LDA )
+
+ CALL ZHERK( 'Lower', 'No Transpose', N-J-JB+1, JB,
+ $ -ONE, A( J+JB, J ), LDA,
+ $ ONE, A( J+JB, J+JB ), LDA )
+ END IF
+ 20 CONTINUE
+ END IF
+ END IF
+ GO TO 40
+*
+ 30 CONTINUE
+ INFO = INFO + J - 1
+*
+ 40 CONTINUE
+ RETURN
+*
+* End of ZPOTRF
+*
+ END
diff --git a/SRC/VARIANTS/cholesky/TOP/cpotrf.f b/SRC/VARIANTS/cholesky/TOP/cpotrf.f
new file mode 100644
index 00000000..54ae1bb9
--- /dev/null
+++ b/SRC/VARIANTS/cholesky/TOP/cpotrf.f
@@ -0,0 +1,181 @@
+ SUBROUTINE CPOTRF ( UPLO, N, A, LDA, INFO )
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ CHARACTER UPLO
+ INTEGER INFO, LDA, N
+* ..
+* .. Array Arguments ..
+ COMPLEX A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* CPOTRF computes the Cholesky factorization of a real symmetric
+* positive definite matrix A.
+*
+* The factorization has the form
+* A = U**H * U, if UPLO = 'U', or
+* A = L * L**H, if UPLO = 'L',
+* where U is an upper triangular matrix and L is lower triangular.
+*
+* This is the top-looking block version of the algorithm, calling Level 3 BLAS.
+*
+* Arguments
+* =========
+*
+* UPLO (input) CHARACTER*1
+* = 'U': Upper triangle of A is stored;
+* = 'L': Lower triangle of A is stored.
+*
+* N (input) INTEGER
+* The order of the matrix A. N >= 0.
+*
+* A (input/output) COMPLEX array, dimension (LDA,N)
+* On entry, the symmetric matrix A. If UPLO = 'U', the leading
+* N-by-N upper triangular part of A contains the upper
+* triangular part of the matrix A, and the strictly lower
+* triangular part of A is not referenced. If UPLO = 'L', the
+* leading N-by-N lower triangular part of A contains the lower
+* triangular part of the matrix A, and the strictly upper
+* triangular part of A is not referenced.
+*
+* On exit, if INFO = 0, the factor U or L from the Cholesky
+* factorization A = U**H*U or A = L*L**H.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,N).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, the leading minor of order i is not
+* positive definite, and the factorization could not be
+* completed.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ REAL ONE
+ COMPLEX CONE
+ PARAMETER ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ) )
+* ..
+* .. Local Scalars ..
+ LOGICAL UPPER
+ INTEGER J, JB, NB
+* ..
+* .. External Functions ..
+ LOGICAL LSAME
+ INTEGER ILAENV
+ EXTERNAL LSAME, ILAENV
+* ..
+* .. External Subroutines ..
+ EXTERNAL CGEMM, CPOTF2, CHERK, CTRSM, XERBLA
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ UPPER = LSAME( UPLO, 'U' )
+ IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'CPOTRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( N.EQ.0 )
+ $ RETURN
+*
+* Determine the block size for this environment.
+*
+ NB = ILAENV( 1, 'CPOTRF', UPLO, N, -1, -1, -1 )
+ IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+* Use unblocked code.
+*
+ CALL CPOTF2( UPLO, N, A, LDA, INFO )
+ ELSE
+*
+* Use blocked code.
+*
+ IF( UPPER ) THEN
+*
+* Compute the Cholesky factorization A = U'*U.
+*
+ DO 10 J = 1, N, NB
+
+ JB = MIN( NB, N-J+1 )
+*
+* Compute the current block.
+*
+ CALL CTRSM( 'Left', 'Upper', 'Conjugate Transpose',
+ $ 'Non-unit', J-1, JB, CONE, A( 1, 1 ), LDA,
+ $ A( 1, J ), LDA )
+
+ CALL CHERK( 'Upper', 'Conjugate Transpose', JB, J-1,
+ $ -ONE, A( 1, J ), LDA, ONE, A( J, J ), LDA )
+*
+* Update and factorize the current diagonal block and test
+* for non-positive-definiteness.
+*
+ CALL CPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+ IF( INFO.NE.0 )
+ $ GO TO 30
+
+ 10 CONTINUE
+*
+ ELSE
+*
+* Compute the Cholesky factorization A = L*L'.
+*
+ DO 20 J = 1, N, NB
+
+ JB = MIN( NB, N-J+1 )
+*
+* Compute the current block.
+*
+ CALL CTRSM( 'Right', 'Lower', 'Conjugate Transpose',
+ $ 'Non-unit', JB, J-1, CONE, A( 1, 1 ), LDA,
+ $ A( J, 1 ), LDA )
+
+ CALL CHERK( 'Lower', 'No Transpose', JB, J-1,
+ $ -ONE, A( J, 1 ), LDA,
+ $ ONE, A( J, J ), LDA )
+*
+* Update and factorize the current diagonal block and test
+* for non-positive-definiteness.
+*
+ CALL CPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+ IF( INFO.NE.0 )
+ $ GO TO 30
+
+ 20 CONTINUE
+ END IF
+ END IF
+ GO TO 40
+*
+ 30 CONTINUE
+ INFO = INFO + J - 1
+*
+ 40 CONTINUE
+ RETURN
+*
+* End of CPOTRF
+*
+ END
diff --git a/SRC/VARIANTS/cholesky/TOP/dpotrf.f b/SRC/VARIANTS/cholesky/TOP/dpotrf.f
new file mode 100644
index 00000000..8dd2c45b
--- /dev/null
+++ b/SRC/VARIANTS/cholesky/TOP/dpotrf.f
@@ -0,0 +1,182 @@
+ SUBROUTINE DPOTRF ( UPLO, N, A, LDA, INFO )
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ CHARACTER UPLO
+ INTEGER INFO, LDA, N
+* ..
+* .. Array Arguments ..
+ DOUBLE PRECISION A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* DPOTRF computes the Cholesky factorization of a real symmetric
+* positive definite matrix A.
+*
+* The factorization has the form
+* A = U**T * U, if UPLO = 'U', or
+* A = L * L**T, if UPLO = 'L',
+* where U is an upper triangular matrix and L is lower triangular.
+*
+* This is the top-looking block version of the algorithm, calling Level 3 BLAS.
+*
+* Arguments
+* =========
+*
+* UPLO (input) CHARACTER*1
+* = 'U': Upper triangle of A is stored;
+* = 'L': Lower triangle of A is stored.
+*
+* N (input) INTEGER
+* The order of the matrix A. N >= 0.
+*
+* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+* On entry, the symmetric matrix A. If UPLO = 'U', the leading
+* N-by-N upper triangular part of A contains the upper
+* triangular part of the matrix A, and the strictly lower
+* triangular part of A is not referenced. If UPLO = 'L', the
+* leading N-by-N lower triangular part of A contains the lower
+* triangular part of the matrix A, and the strictly upper
+* triangular part of A is not referenced.
+*
+* On exit, if INFO = 0, the factor U or L from the Cholesky
+* factorization A = U**T*U or A = L*L**T.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,N).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, the leading minor of order i is not
+* positive definite, and the factorization could not be
+* completed.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ DOUBLE PRECISION ONE
+ PARAMETER ( ONE = 1.0D+0 )
+* ..
+* .. Local Scalars ..
+ LOGICAL UPPER
+ INTEGER J, JB, NB
+* ..
+* .. External Functions ..
+ LOGICAL LSAME
+ INTEGER ILAENV
+ EXTERNAL LSAME, ILAENV
+* ..
+* .. External Subroutines ..
+ EXTERNAL DGEMM, DPOTF2, DSYRK, DTRSM, XERBLA
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ UPPER = LSAME( UPLO, 'U' )
+ IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'DPOTRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( N.EQ.0 )
+ $ RETURN
+*
+* Determine the block size for this environment.
+*
+ NB = ILAENV( 1, 'DPOTRF', UPLO, N, -1, -1, -1 )
+ IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+* Use unblocked code.
+*
+ CALL DPOTF2( UPLO, N, A, LDA, INFO )
+ ELSE
+*
+* Use blocked code.
+*
+ IF( UPPER ) THEN
+*
+* Compute the Cholesky factorization A = U'*U.
+*
+ DO 10 J = 1, N, NB
+
+ JB = MIN( NB, N-J+1 )
+*
+* Compute the current block.
+*
+ CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit',
+ $ J-1, JB, ONE, A( 1, 1 ), LDA,
+ $ A( 1, J ), LDA )
+
+ CALL DSYRK( 'Upper', 'Transpose', JB, J-1, -ONE,
+ $ A( 1, J ), LDA,
+ $ ONE, A( J, J ), LDA )
+*
+* Update and factorize the current diagonal block and test
+* for non-positive-definiteness.
+*
+ CALL DPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+ IF( INFO.NE.0 )
+ $ GO TO 30
+
+ 10 CONTINUE
+*
+ ELSE
+*
+* Compute the Cholesky factorization A = L*L'.
+*
+ DO 20 J = 1, N, NB
+
+ JB = MIN( NB, N-J+1 )
+*
+* Compute the current block.
+*
+ CALL DTRSM( 'Right', 'Lower', 'Transpose', 'Non-unit',
+ $ JB, J-1, ONE, A( 1, 1 ), LDA,
+ $ A( J, 1 ), LDA )
+
+ CALL DSYRK( 'Lower', 'No Transpose', JB, J-1,
+ $ -ONE, A( J, 1 ), LDA,
+ $ ONE, A( J, J ), LDA )
+
+*
+* Update and factorize the current diagonal block and test
+* for non-positive-definiteness.
+*
+ CALL DPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+ IF( INFO.NE.0 )
+ $ GO TO 30
+
+ 20 CONTINUE
+ END IF
+ END IF
+ GO TO 40
+*
+ 30 CONTINUE
+ INFO = INFO + J - 1
+*
+ 40 CONTINUE
+ RETURN
+*
+* End of DPOTRF
+*
+ END
diff --git a/SRC/VARIANTS/cholesky/TOP/spotrf.f b/SRC/VARIANTS/cholesky/TOP/spotrf.f
new file mode 100644
index 00000000..08b41b48
--- /dev/null
+++ b/SRC/VARIANTS/cholesky/TOP/spotrf.f
@@ -0,0 +1,181 @@
+ SUBROUTINE SPOTRF ( UPLO, N, A, LDA, INFO )
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ CHARACTER UPLO
+ INTEGER INFO, LDA, N
+* ..
+* .. Array Arguments ..
+ REAL A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* SPOTRF computes the Cholesky factorization of a real symmetric
+* positive definite matrix A.
+*
+* The factorization has the form
+* A = U**T * U, if UPLO = 'U', or
+* A = L * L**T, if UPLO = 'L',
+* where U is an upper triangular matrix and L is lower triangular.
+*
+* This is the top-looking block version of the algorithm, calling Level 3 BLAS.
+*
+* Arguments
+* =========
+*
+* UPLO (input) CHARACTER*1
+* = 'U': Upper triangle of A is stored;
+* = 'L': Lower triangle of A is stored.
+*
+* N (input) INTEGER
+* The order of the matrix A. N >= 0.
+*
+* A (input/output) REAL array, dimension (LDA,N)
+* On entry, the symmetric matrix A. If UPLO = 'U', the leading
+* N-by-N upper triangular part of A contains the upper
+* triangular part of the matrix A, and the strictly lower
+* triangular part of A is not referenced. If UPLO = 'L', the
+* leading N-by-N lower triangular part of A contains the lower
+* triangular part of the matrix A, and the strictly upper
+* triangular part of A is not referenced.
+*
+* On exit, if INFO = 0, the factor U or L from the Cholesky
+* factorization A = U**T*U or A = L*L**T.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,N).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, the leading minor of order i is not
+* positive definite, and the factorization could not be
+* completed.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ REAL ONE
+ PARAMETER ( ONE = 1.0E+0 )
+* ..
+* .. Local Scalars ..
+ LOGICAL UPPER
+ INTEGER J, JB, NB
+* ..
+* .. External Functions ..
+ LOGICAL LSAME
+ INTEGER ILAENV
+ EXTERNAL LSAME, ILAENV
+* ..
+* .. External Subroutines ..
+ EXTERNAL SGEMM, SPOTF2, SSYRK, STRSM, XERBLA
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ UPPER = LSAME( UPLO, 'U' )
+ IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'SPOTRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( N.EQ.0 )
+ $ RETURN
+*
+* Determine the block size for this environment.
+*
+ NB = ILAENV( 1, 'SPOTRF', UPLO, N, -1, -1, -1 )
+ IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+* Use unblocked code.
+*
+ CALL SPOTF2( UPLO, N, A, LDA, INFO )
+ ELSE
+*
+* Use blocked code.
+*
+ IF( UPPER ) THEN
+*
+* Compute the Cholesky factorization A = U'*U.
+*
+ DO 10 J = 1, N, NB
+
+ JB = MIN( NB, N-J+1 )
+*
+* Compute the current block.
+*
+ CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit',
+ $ J-1, JB, ONE, A( 1, 1 ), LDA,
+ $ A( 1, J ), LDA )
+
+ CALL SSYRK( 'Upper', 'Transpose', JB, J-1, -ONE,
+ $ A( 1, J ), LDA,
+ $ ONE, A( J, J ), LDA )
+*
+* Update and factorize the current diagonal block and test
+* for non-positive-definiteness.
+*
+ CALL SPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+ IF( INFO.NE.0 )
+ $ GO TO 30
+
+ 10 CONTINUE
+*
+ ELSE
+*
+* Compute the Cholesky factorization A = L*L'.
+*
+ DO 20 J = 1, N, NB
+
+ JB = MIN( NB, N-J+1 )
+*
+* Compute the current block.
+*
+ CALL STRSM( 'Right', 'Lower', 'Transpose', 'Non-unit',
+ $ JB, J-1, ONE, A( 1, 1 ), LDA,
+ $ A( J, 1 ), LDA )
+
+ CALL SSYRK( 'Lower', 'No Transpose', JB, J-1,
+ $ -ONE, A( J, 1 ), LDA,
+ $ ONE, A( J, J ), LDA )
+*
+* Update and factorize the current diagonal block and test
+* for non-positive-definiteness.
+*
+ CALL SPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+ IF( INFO.NE.0 )
+ $ GO TO 30
+
+ 20 CONTINUE
+ END IF
+ END IF
+ GO TO 40
+*
+ 30 CONTINUE
+ INFO = INFO + J - 1
+*
+ 40 CONTINUE
+ RETURN
+*
+* End of SPOTRF
+*
+ END
diff --git a/SRC/VARIANTS/cholesky/TOP/zpotrf.f b/SRC/VARIANTS/cholesky/TOP/zpotrf.f
new file mode 100644
index 00000000..4eae3d1b
--- /dev/null
+++ b/SRC/VARIANTS/cholesky/TOP/zpotrf.f
@@ -0,0 +1,181 @@
+ SUBROUTINE ZPOTRF ( UPLO, N, A, LDA, INFO )
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ CHARACTER UPLO
+ INTEGER INFO, LDA, N
+* ..
+* .. Array Arguments ..
+ COMPLEX*16 A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* ZPOTRF computes the Cholesky factorization of a real symmetric
+* positive definite matrix A.
+*
+* The factorization has the form
+* A = U**H * U, if UPLO = 'U', or
+* A = L * L**H, if UPLO = 'L',
+* where U is an upper triangular matrix and L is lower triangular.
+*
+* This is the top-looking block version of the algorithm, calling Level 3 BLAS.
+*
+* Arguments
+* =========
+*
+* UPLO (input) CHARACTER*1
+* = 'U': Upper triangle of A is stored;
+* = 'L': Lower triangle of A is stored.
+*
+* N (input) INTEGER
+* The order of the matrix A. N >= 0.
+*
+* A (input/output) COMPLEX*16 array, dimension (LDA,N)
+* On entry, the symmetric matrix A. If UPLO = 'U', the leading
+* N-by-N upper triangular part of A contains the upper
+* triangular part of the matrix A, and the strictly lower
+* triangular part of A is not referenced. If UPLO = 'L', the
+* leading N-by-N lower triangular part of A contains the lower
+* triangular part of the matrix A, and the strictly upper
+* triangular part of A is not referenced.
+*
+* On exit, if INFO = 0, the factor U or L from the Cholesky
+* factorization A = U**H*U or A = L*L**H.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,N).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, the leading minor of order i is not
+* positive definite, and the factorization could not be
+* completed.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ DOUBLE PRECISION ONE
+ COMPLEX*16 CONE
+ PARAMETER ( ONE = 1.0D+0, CONE = ( 1.0D+0, 0.0D+0 ) )
+* ..
+* .. Local Scalars ..
+ LOGICAL UPPER
+ INTEGER J, JB, NB
+* ..
+* .. External Functions ..
+ LOGICAL LSAME
+ INTEGER ILAENV
+ EXTERNAL LSAME, ILAENV
+* ..
+* .. External Subroutines ..
+ EXTERNAL ZGEMM, ZPOTF2, ZHERK, ZTRSM, XERBLA
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ UPPER = LSAME( UPLO, 'U' )
+ IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'ZPOTRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( N.EQ.0 )
+ $ RETURN
+*
+* Determine the block size for this environment.
+*
+ NB = ILAENV( 1, 'ZPOTRF', UPLO, N, -1, -1, -1 )
+ IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+* Use unblocked code.
+*
+ CALL ZPOTF2( UPLO, N, A, LDA, INFO )
+ ELSE
+*
+* Use blocked code.
+*
+ IF( UPPER ) THEN
+*
+* Compute the Cholesky factorization A = U'*U.
+*
+ DO 10 J = 1, N, NB
+
+ JB = MIN( NB, N-J+1 )
+*
+* Compute the current block.
+*
+ CALL ZTRSM( 'Left', 'Upper', 'Conjugate Transpose',
+ $ 'Non-unit', J-1, JB, CONE, A( 1, 1 ), LDA,
+ $ A( 1, J ), LDA )
+
+ CALL ZHERK( 'Upper', 'Conjugate Transpose', JB, J-1,
+ $ -ONE, A( 1, J ), LDA, ONE, A( J, J ), LDA )
+*
+* Update and factorize the current diagonal block and test
+* for non-positive-definiteness.
+*
+ CALL ZPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+ IF( INFO.NE.0 )
+ $ GO TO 30
+
+ 10 CONTINUE
+*
+ ELSE
+*
+* Compute the Cholesky factorization A = L*L'.
+*
+ DO 20 J = 1, N, NB
+
+ JB = MIN( NB, N-J+1 )
+*
+* Compute the current block.
+*
+ CALL ZTRSM( 'Right', 'Lower', 'Conjugate Transpose',
+ $ 'Non-unit', JB, J-1, CONE, A( 1, 1 ), LDA,
+ $ A( J, 1 ), LDA )
+
+ CALL ZHERK( 'Lower', 'No Transpose', JB, J-1,
+ $ -ONE, A( J, 1 ), LDA,
+ $ ONE, A( J, J ), LDA )
+*
+* Update and factorize the current diagonal block and test
+* for non-positive-definiteness.
+*
+ CALL ZPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+ IF( INFO.NE.0 )
+ $ GO TO 30
+
+ 20 CONTINUE
+ END IF
+ END IF
+ GO TO 40
+*
+ 30 CONTINUE
+ INFO = INFO + J - 1
+*
+ 40 CONTINUE
+ RETURN
+*
+* End of ZPOTRF
+*
+ END
diff --git a/SRC/VARIANTS/lu/CR/cgetrf.f b/SRC/VARIANTS/lu/CR/cgetrf.f
new file mode 100644
index 00000000..7d6403e1
--- /dev/null
+++ b/SRC/VARIANTS/lu/CR/cgetrf.f
@@ -0,0 +1,165 @@
+ SUBROUTINE CGETRF ( M, N, A, LDA, IPIV, INFO)
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ INTEGER INFO, LDA, M, N
+* ..
+* .. Array Arguments ..
+ INTEGER IPIV( * )
+ COMPLEX A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* CGETRF computes an LU factorization of a general M-by-N matrix A
+* using partial pivoting with row interchanges.
+*
+* The factorization has the form
+* A = P * L * U
+* where P is a permutation matrix, L is lower triangular with unit
+* diagonal elements (lower trapezoidal if m > n), and U is upper
+* triangular (upper trapezoidal if m < n).
+*
+* This is the Crout Level 3 BLAS version of the algorithm.
+*
+* Arguments
+* =========
+*
+* M (input) INTEGER
+* The number of rows of the matrix A. M >= 0.
+*
+* N (input) INTEGER
+* The number of columns of the matrix A. N >= 0.
+*
+* A (input/output) COMPLEX array, dimension (LDA,N)
+* On entry, the M-by-N matrix to be factored.
+* On exit, the factors L and U from the factorization
+* A = P*L*U; the unit diagonal elements of L are not stored.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,M).
+*
+* IPIV (output) INTEGER array, dimension (min(M,N))
+* The pivot indices; for 1 <= i <= min(M,N), row i of the
+* matrix was interchanged with row IPIV(i).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, U(i,i) is exactly zero. The factorization
+* has been completed, but the factor U is exactly
+* singular, and division by zero will occur if it is used
+* to solve a system of equations.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ COMPLEX ONE
+ PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) )
+* ..
+* .. Local Scalars ..
+ INTEGER I, IINFO, J, JB, NB
+* ..
+* .. External Subroutines ..
+ EXTERNAL CGEMM, CGETF2, CLASWP, CTRSM, XERBLA
+* ..
+* .. External Functions ..
+ INTEGER ILAENV
+ EXTERNAL ILAENV
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN, MOD
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ IF( M.LT.0 ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'CGETRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( M.EQ.0 .OR. N.EQ.0 )
+ $ RETURN
+*
+* Determine the block size for this environment.
+*
+ NB = ILAENV( 1, 'CGETRF', ' ', M, N, -1, -1 )
+ IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+* Use unblocked code.
+*
+ CALL CGETF2( M, N, A, LDA, IPIV, INFO )
+ ELSE
+*
+* Use blocked code.
+*
+ DO 20 J = 1, MIN( M, N ), NB
+ JB = MIN( MIN( M, N )-J+1, NB )
+*
+* Update current block.
+*
+ CALL CGEMM( 'No transpose', 'No transpose',
+ $ M-J+1, JB, J-1, -ONE,
+ $ A( J, 1 ), LDA, A( 1, J ), LDA, ONE,
+ $ A( J, J ), LDA )
+
+*
+* Factor diagonal and subdiagonal blocks and test for exact
+* singularity.
+*
+ CALL CGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+* Adjust INFO and the pivot indices.
+*
+ IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+ $ INFO = IINFO + J - 1
+ DO 10 I = J, MIN( M, J+JB-1 )
+ IPIV( I ) = J - 1 + IPIV( I )
+ 10 CONTINUE
+*
+* Apply interchanges to column 1:J-1
+*
+ CALL CLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 )
+*
+ IF ( J+JB.LE.N ) THEN
+*
+* Apply interchanges to column J+JB:N
+*
+ CALL CLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1,
+ $ IPIV, 1 )
+*
+ CALL CGEMM( 'No transpose', 'No transpose',
+ $ JB, N-J-JB+1, J-1, -ONE,
+ $ A( J, 1 ), LDA, A( 1, J+JB ), LDA, ONE,
+ $ A( J, J+JB ), LDA )
+*
+* Compute block row of U.
+*
+ CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+ $ JB, N-J-JB+1, ONE, A( J, J ), LDA,
+ $ A( J, J+JB ), LDA )
+ END IF
+
+ 20 CONTINUE
+
+ END IF
+ RETURN
+*
+* End of CGETRF
+*
+ END
diff --git a/SRC/VARIANTS/lu/CR/dgetrf.f b/SRC/VARIANTS/lu/CR/dgetrf.f
new file mode 100644
index 00000000..e1b4121e
--- /dev/null
+++ b/SRC/VARIANTS/lu/CR/dgetrf.f
@@ -0,0 +1,165 @@
+ SUBROUTINE DGETRF ( M, N, A, LDA, IPIV, INFO)
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ INTEGER INFO, LDA, M, N
+* ..
+* .. Array Arguments ..
+ INTEGER IPIV( * )
+ DOUBLE PRECISION A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* DGETRF computes an LU factorization of a general M-by-N matrix A
+* using partial pivoting with row interchanges.
+*
+* The factorization has the form
+* A = P * L * U
+* where P is a permutation matrix, L is lower triangular with unit
+* diagonal elements (lower trapezoidal if m > n), and U is upper
+* triangular (upper trapezoidal if m < n).
+*
+* This is the Crout Level 3 BLAS version of the algorithm.
+*
+* Arguments
+* =========
+*
+* M (input) INTEGER
+* The number of rows of the matrix A. M >= 0.
+*
+* N (input) INTEGER
+* The number of columns of the matrix A. N >= 0.
+*
+* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+* On entry, the M-by-N matrix to be factored.
+* On exit, the factors L and U from the factorization
+* A = P*L*U; the unit diagonal elements of L are not stored.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,M).
+*
+* IPIV (output) INTEGER array, dimension (min(M,N))
+* The pivot indices; for 1 <= i <= min(M,N), row i of the
+* matrix was interchanged with row IPIV(i).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, U(i,i) is exactly zero. The factorization
+* has been completed, but the factor U is exactly
+* singular, and division by zero will occur if it is used
+* to solve a system of equations.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ DOUBLE PRECISION ONE
+ PARAMETER ( ONE = 1.0D+0 )
+* ..
+* .. Local Scalars ..
+ INTEGER I, IINFO, J, JB, NB
+* ..
+* .. External Subroutines ..
+ EXTERNAL DGEMM, DGETF2, DLASWP, DTRSM, XERBLA
+* ..
+* .. External Functions ..
+ INTEGER ILAENV
+ EXTERNAL ILAENV
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN, MOD
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ IF( M.LT.0 ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'DGETRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( M.EQ.0 .OR. N.EQ.0 )
+ $ RETURN
+*
+* Determine the block size for this environment.
+*
+ NB = ILAENV( 1, 'DGETRF', ' ', M, N, -1, -1 )
+ IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+* Use unblocked code.
+*
+ CALL DGETF2( M, N, A, LDA, IPIV, INFO )
+ ELSE
+*
+* Use blocked code.
+*
+ DO 20 J = 1, MIN( M, N ), NB
+ JB = MIN( MIN( M, N )-J+1, NB )
+*
+* Update current block.
+*
+ CALL DGEMM( 'No transpose', 'No transpose',
+ $ M-J+1, JB, J-1, -ONE,
+ $ A( J, 1 ), LDA, A( 1, J ), LDA, ONE,
+ $ A( J, J ), LDA )
+
+*
+* Factor diagonal and subdiagonal blocks and test for exact
+* singularity.
+*
+ CALL DGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+* Adjust INFO and the pivot indices.
+*
+ IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+ $ INFO = IINFO + J - 1
+ DO 10 I = J, MIN( M, J+JB-1 )
+ IPIV( I ) = J - 1 + IPIV( I )
+ 10 CONTINUE
+*
+* Apply interchanges to column 1:J-1
+*
+ CALL DLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 )
+*
+ IF ( J+JB.LE.N ) THEN
+*
+* Apply interchanges to column J+JB:N
+*
+ CALL DLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1,
+ $ IPIV, 1 )
+*
+ CALL DGEMM( 'No transpose', 'No transpose',
+ $ JB, N-J-JB+1, J-1, -ONE,
+ $ A( J, 1 ), LDA, A( 1, J+JB ), LDA, ONE,
+ $ A( J, J+JB ), LDA )
+*
+* Compute block row of U.
+*
+ CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+ $ JB, N-J-JB+1, ONE, A( J, J ), LDA,
+ $ A( J, J+JB ), LDA )
+ END IF
+
+ 20 CONTINUE
+
+ END IF
+ RETURN
+*
+* End of DGETRF
+*
+ END
diff --git a/SRC/VARIANTS/lu/CR/sgetrf.f b/SRC/VARIANTS/lu/CR/sgetrf.f
new file mode 100644
index 00000000..238ec119
--- /dev/null
+++ b/SRC/VARIANTS/lu/CR/sgetrf.f
@@ -0,0 +1,165 @@
+ SUBROUTINE SGETRF ( M, N, A, LDA, IPIV, INFO)
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ INTEGER INFO, LDA, M, N
+* ..
+* .. Array Arguments ..
+ INTEGER IPIV( * )
+ REAL A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* SGETRF computes an LU factorization of a general M-by-N matrix A
+* using partial pivoting with row interchanges.
+*
+* The factorization has the form
+* A = P * L * U
+* where P is a permutation matrix, L is lower triangular with unit
+* diagonal elements (lower trapezoidal if m > n), and U is upper
+* triangular (upper trapezoidal if m < n).
+*
+* This is the Crout Level 3 BLAS version of the algorithm.
+*
+* Arguments
+* =========
+*
+* M (input) INTEGER
+* The number of rows of the matrix A. M >= 0.
+*
+* N (input) INTEGER
+* The number of columns of the matrix A. N >= 0.
+*
+* A (input/output) REAL array, dimension (LDA,N)
+* On entry, the M-by-N matrix to be factored.
+* On exit, the factors L and U from the factorization
+* A = P*L*U; the unit diagonal elements of L are not stored.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,M).
+*
+* IPIV (output) INTEGER array, dimension (min(M,N))
+* The pivot indices; for 1 <= i <= min(M,N), row i of the
+* matrix was interchanged with row IPIV(i).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, U(i,i) is exactly zero. The factorization
+* has been completed, but the factor U is exactly
+* singular, and division by zero will occur if it is used
+* to solve a system of equations.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ REAL ONE
+ PARAMETER ( ONE = 1.0E+0 )
+* ..
+* .. Local Scalars ..
+ INTEGER I, IINFO, J, JB, NB
+* ..
+* .. External Subroutines ..
+ EXTERNAL SGEMM, SGETF2, SLASWP, STRSM, XERBLA
+* ..
+* .. External Functions ..
+ INTEGER ILAENV
+ EXTERNAL ILAENV
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN, MOD
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ IF( M.LT.0 ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'SGETRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( M.EQ.0 .OR. N.EQ.0 )
+ $ RETURN
+*
+* Determine the block size for this environment.
+*
+ NB = ILAENV( 1, 'SGETRF', ' ', M, N, -1, -1 )
+ IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+* Use unblocked code.
+*
+ CALL SGETF2( M, N, A, LDA, IPIV, INFO )
+ ELSE
+*
+* Use blocked code.
+*
+ DO 20 J = 1, MIN( M, N ), NB
+ JB = MIN( MIN( M, N )-J+1, NB )
+*
+* Update current block.
+*
+ CALL SGEMM( 'No transpose', 'No transpose',
+ $ M-J+1, JB, J-1, -ONE,
+ $ A( J, 1 ), LDA, A( 1, J ), LDA, ONE,
+ $ A( J, J ), LDA )
+
+*
+* Factor diagonal and subdiagonal blocks and test for exact
+* singularity.
+*
+ CALL SGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+* Adjust INFO and the pivot indices.
+*
+ IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+ $ INFO = IINFO + J - 1
+ DO 10 I = J, MIN( M, J+JB-1 )
+ IPIV( I ) = J - 1 + IPIV( I )
+ 10 CONTINUE
+*
+* Apply interchanges to column 1:J-1
+*
+ CALL SLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 )
+*
+ IF ( J+JB.LE.N ) THEN
+*
+* Apply interchanges to column J+JB:N
+*
+ CALL SLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1,
+ $ IPIV, 1 )
+*
+ CALL SGEMM( 'No transpose', 'No transpose',
+ $ JB, N-J-JB+1, J-1, -ONE,
+ $ A( J, 1 ), LDA, A( 1, J+JB ), LDA, ONE,
+ $ A( J, J+JB ), LDA )
+*
+* Compute block row of U.
+*
+ CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+ $ JB, N-J-JB+1, ONE, A( J, J ), LDA,
+ $ A( J, J+JB ), LDA )
+ END IF
+
+ 20 CONTINUE
+
+ END IF
+ RETURN
+*
+* End of SGETRF
+*
+ END
diff --git a/SRC/VARIANTS/lu/CR/zgetrf.f b/SRC/VARIANTS/lu/CR/zgetrf.f
new file mode 100644
index 00000000..2dafefbf
--- /dev/null
+++ b/SRC/VARIANTS/lu/CR/zgetrf.f
@@ -0,0 +1,165 @@
+ SUBROUTINE ZGETRF ( M, N, A, LDA, IPIV, INFO)
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ INTEGER INFO, LDA, M, N
+* ..
+* .. Array Arguments ..
+ INTEGER IPIV( * )
+ COMPLEX*16 A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* ZGETRF computes an LU factorization of a general M-by-N matrix A
+* using partial pivoting with row interchanges.
+*
+* The factorization has the form
+* A = P * L * U
+* where P is a permutation matrix, L is lower triangular with unit
+* diagonal elements (lower trapezoidal if m > n), and U is upper
+* triangular (upper trapezoidal if m < n).
+*
+* This is the Crout Level 3 BLAS version of the algorithm.
+*
+* Arguments
+* =========
+*
+* M (input) INTEGER
+* The number of rows of the matrix A. M >= 0.
+*
+* N (input) INTEGER
+* The number of columns of the matrix A. N >= 0.
+*
+* A (input/output) COMPLEX*16 array, dimension (LDA,N)
+* On entry, the M-by-N matrix to be factored.
+* On exit, the factors L and U from the factorization
+* A = P*L*U; the unit diagonal elements of L are not stored.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,M).
+*
+* IPIV (output) INTEGER array, dimension (min(M,N))
+* The pivot indices; for 1 <= i <= min(M,N), row i of the
+* matrix was interchanged with row IPIV(i).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, U(i,i) is exactly zero. The factorization
+* has been completed, but the factor U is exactly
+* singular, and division by zero will occur if it is used
+* to solve a system of equations.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ COMPLEX*16 ONE
+ PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) )
+* ..
+* .. Local Scalars ..
+ INTEGER I, IINFO, J, JB, NB
+* ..
+* .. External Subroutines ..
+ EXTERNAL ZGEMM, ZGETF2, ZLASWP, ZTRSM, XERBLA
+* ..
+* .. External Functions ..
+ INTEGER ILAENV
+ EXTERNAL ILAENV
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN, MOD
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ IF( M.LT.0 ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'ZGETRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( M.EQ.0 .OR. N.EQ.0 )
+ $ RETURN
+*
+* Determine the block size for this environment.
+*
+ NB = ILAENV( 1, 'ZGETRF', ' ', M, N, -1, -1 )
+ IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+* Use unblocked code.
+*
+ CALL ZGETF2( M, N, A, LDA, IPIV, INFO )
+ ELSE
+*
+* Use blocked code.
+*
+ DO 20 J = 1, MIN( M, N ), NB
+ JB = MIN( MIN( M, N )-J+1, NB )
+*
+* Update current block.
+*
+ CALL ZGEMM( 'No transpose', 'No transpose',
+ $ M-J+1, JB, J-1, -ONE,
+ $ A( J, 1 ), LDA, A( 1, J ), LDA, ONE,
+ $ A( J, J ), LDA )
+
+*
+* Factor diagonal and subdiagonal blocks and test for exact
+* singularity.
+*
+ CALL ZGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+* Adjust INFO and the pivot indices.
+*
+ IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+ $ INFO = IINFO + J - 1
+ DO 10 I = J, MIN( M, J+JB-1 )
+ IPIV( I ) = J - 1 + IPIV( I )
+ 10 CONTINUE
+*
+* Apply interchanges to column 1:J-1
+*
+ CALL ZLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 )
+*
+ IF ( J+JB.LE.N ) THEN
+*
+* Apply interchanges to column J+JB:N
+*
+ CALL ZLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1,
+ $ IPIV, 1 )
+*
+ CALL ZGEMM( 'No transpose', 'No transpose',
+ $ JB, N-J-JB+1, J-1, -ONE,
+ $ A( J, 1 ), LDA, A( 1, J+JB ), LDA, ONE,
+ $ A( J, J+JB ), LDA )
+*
+* Compute block row of U.
+*
+ CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+ $ JB, N-J-JB+1, ONE, A( J, J ), LDA,
+ $ A( J, J+JB ), LDA )
+ END IF
+
+ 20 CONTINUE
+
+ END IF
+ RETURN
+*
+* End of ZGETRF
+*
+ END
diff --git a/SRC/VARIANTS/lu/LL/cgetrf.f b/SRC/VARIANTS/lu/LL/cgetrf.f
new file mode 100644
index 00000000..189362b0
--- /dev/null
+++ b/SRC/VARIANTS/lu/LL/cgetrf.f
@@ -0,0 +1,190 @@
+ SUBROUTINE CGETRF ( M, N, A, LDA, IPIV, INFO)
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ INTEGER INFO, LDA, M, N
+* ..
+* .. Array Arguments ..
+ INTEGER IPIV( * )
+ COMPLEX A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* CGETRF computes an LU factorization of a general M-by-N matrix A
+* using partial pivoting with row interchanges.
+*
+* The factorization has the form
+* A = P * L * U
+* where P is a permutation matrix, L is lower triangular with unit
+* diagonal elements (lower trapezoidal if m > n), and U is upper
+* triangular (upper trapezoidal if m < n).
+*
+* This is the left-looking Level 3 BLAS version of the algorithm.
+*
+* Arguments
+* =========
+*
+* M (input) INTEGER
+* The number of rows of the matrix A. M >= 0.
+*
+* N (input) INTEGER
+* The number of columns of the matrix A. N >= 0.
+*
+* A (input/output) COMPLEX array, dimension (LDA,N)
+* On entry, the M-by-N matrix to be factored.
+* On exit, the factors L and U from the factorization
+* A = P*L*U; the unit diagonal elements of L are not stored.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,M).
+*
+* IPIV (output) INTEGER array, dimension (min(M,N))
+* The pivot indices; for 1 <= i <= min(M,N), row i of the
+* matrix was interchanged with row IPIV(i).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, U(i,i) is exactly zero. The factorization
+* has been completed, but the factor U is exactly
+* singular, and division by zero will occur if it is used
+* to solve a system of equations.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ COMPLEX ONE
+ PARAMETER ( ONE = (1.0E+0, 0.0E+0) )
+* ..
+* .. Local Scalars ..
+ INTEGER I, IINFO, J, JB, K, NB
+* ..
+* .. External Subroutines ..
+ EXTERNAL CGEMM, CGETF2, CLASWP, CTRSM, XERBLA
+* ..
+* .. External Functions ..
+ INTEGER ILAENV
+ EXTERNAL ILAENV
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ IF( M.LT.0 ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'CGETRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( M.EQ.0 .OR. N.EQ.0 )
+ $ RETURN
+*
+* Determine the block size for this environment.
+*
+ NB = ILAENV( 1, 'CGETRF', ' ', M, N, -1, -1 )
+ IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+* Use unblocked code.
+*
+ CALL CGETF2( M, N, A, LDA, IPIV, INFO )
+
+ ELSE
+*
+* Use blocked code.
+*
+ DO 20 J = 1, MIN( M, N ), NB
+ JB = MIN( MIN( M, N )-J+1, NB )
+*
+*
+* Update before factoring the current panel
+*
+ DO 30 K = 1, J-NB, NB
+*
+* Apply interchanges to rows K:K+NB-1.
+*
+ CALL CLASWP( JB, A(1, J), LDA, K, K+NB-1, IPIV, 1 )
+*
+* Compute block row of U.
+*
+ CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+ $ NB, JB, ONE, A( K, K ), LDA,
+ $ A( K, J ), LDA )
+*
+* Update trailing submatrix.
+*
+ CALL CGEMM( 'No transpose', 'No transpose',
+ $ M-K-NB+1, JB, NB, -ONE,
+ $ A( K+NB, K ), LDA, A( K, J ), LDA, ONE,
+ $ A( K+NB, J ), LDA )
+ 30 CONTINUE
+*
+* Factor diagonal and subdiagonal blocks and test for exact
+* singularity.
+*
+ CALL CGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+* Adjust INFO and the pivot indices.
+*
+ IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+ $ INFO = IINFO + J - 1
+ DO 10 I = J, MIN( M, J+JB-1 )
+ IPIV( I ) = J - 1 + IPIV( I )
+ 10 CONTINUE
+*
+ 20 CONTINUE
+
+*
+* Apply interchanges to the left-overs
+*
+ DO 40 K = 1, MIN( M, N ), NB
+ CALL CLASWP( K-1, A( 1, 1 ), LDA, K,
+ $ MIN (K+NB-1, MIN ( M, N )), IPIV, 1 )
+ 40 CONTINUE
+*
+* Apply update to the M+1:N columns when N > M
+*
+ IF ( N.GT.M ) THEN
+
+ CALL CLASWP( N-M, A(1, M+1), LDA, 1, M, IPIV, 1 )
+
+ DO 50 K = 1, M, NB
+
+ JB = MIN( M-K+1, NB )
+*
+ CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+ $ JB, N-M, ONE, A( K, K ), LDA,
+ $ A( K, M+1 ), LDA )
+
+*
+ IF ( K+NB.LE.M ) THEN
+ CALL CGEMM( 'No transpose', 'No transpose',
+ $ M-K-NB+1, N-M, NB, -ONE,
+ $ A( K+NB, K ), LDA, A( K, M+1 ), LDA, ONE,
+ $ A( K+NB, M+1 ), LDA )
+ END IF
+ 50 CONTINUE
+ END IF
+*
+ END IF
+ RETURN
+*
+* End of CGETRF
+*
+ END
diff --git a/SRC/VARIANTS/lu/LL/dgetrf.f b/SRC/VARIANTS/lu/LL/dgetrf.f
new file mode 100644
index 00000000..8231805c
--- /dev/null
+++ b/SRC/VARIANTS/lu/LL/dgetrf.f
@@ -0,0 +1,189 @@
+ SUBROUTINE DGETRF ( M, N, A, LDA, IPIV, INFO)
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ INTEGER INFO, LDA, M, N
+* ..
+* .. Array Arguments ..
+ INTEGER IPIV( * )
+ DOUBLE PRECISION A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* DGETRF computes an LU factorization of a general M-by-N matrix A
+* using partial pivoting with row interchanges.
+*
+* The factorization has the form
+* A = P * L * U
+* where P is a permutation matrix, L is lower triangular with unit
+* diagonal elements (lower trapezoidal if m > n), and U is upper
+* triangular (upper trapezoidal if m < n).
+*
+* This is the left-looking Level 3 BLAS version of the algorithm.
+*
+* Arguments
+* =========
+*
+* M (input) INTEGER
+* The number of rows of the matrix A. M >= 0.
+*
+* N (input) INTEGER
+* The number of columns of the matrix A. N >= 0.
+*
+* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+* On entry, the M-by-N matrix to be factored.
+* On exit, the factors L and U from the factorization
+* A = P*L*U; the unit diagonal elements of L are not stored.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,M).
+*
+* IPIV (output) INTEGER array, dimension (min(M,N))
+* The pivot indices; for 1 <= i <= min(M,N), row i of the
+* matrix was interchanged with row IPIV(i).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, U(i,i) is exactly zero. The factorization
+* has been completed, but the factor U is exactly
+* singular, and division by zero will occur if it is used
+* to solve a system of equations.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ DOUBLE PRECISION ONE
+ PARAMETER ( ONE = 1.0D+0 )
+* ..
+* .. Local Scalars ..
+ INTEGER I, IINFO, J, JB, K, NB
+* ..
+* .. External Subroutines ..
+ EXTERNAL DGEMM, DGETF2, DLASWP, DTRSM, XERBLA
+* ..
+* .. External Functions ..
+ INTEGER ILAENV
+ EXTERNAL ILAENV
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ IF( M.LT.0 ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'DGETRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( M.EQ.0 .OR. N.EQ.0 )
+ $ RETURN
+*
+* Determine the block size for this environment.
+*
+ NB = ILAENV( 1, 'DGETRF', ' ', M, N, -1, -1 )
+ IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+* Use unblocked code.
+*
+ CALL DGETF2( M, N, A, LDA, IPIV, INFO )
+
+ ELSE
+*
+* Use blocked code.
+*
+ DO 20 J = 1, MIN( M, N ), NB
+ JB = MIN( MIN( M, N )-J+1, NB )
+*
+* Update before factoring the current panel
+*
+ DO 30 K = 1, J-NB, NB
+*
+* Apply interchanges to rows K:K+NB-1.
+*
+ CALL DLASWP( JB, A(1, J), LDA, K, K+NB-1, IPIV, 1 )
+*
+* Compute block row of U.
+*
+ CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+ $ NB, JB, ONE, A( K, K ), LDA,
+ $ A( K, J ), LDA )
+*
+* Update trailing submatrix.
+*
+ CALL DGEMM( 'No transpose', 'No transpose',
+ $ M-K-NB+1, JB, NB, -ONE,
+ $ A( K+NB, K ), LDA, A( K, J ), LDA, ONE,
+ $ A( K+NB, J ), LDA )
+ 30 CONTINUE
+*
+* Factor diagonal and subdiagonal blocks and test for exact
+* singularity.
+*
+ CALL DGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+* Adjust INFO and the pivot indices.
+*
+ IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+ $ INFO = IINFO + J - 1
+ DO 10 I = J, MIN( M, J+JB-1 )
+ IPIV( I ) = J - 1 + IPIV( I )
+ 10 CONTINUE
+*
+ 20 CONTINUE
+
+*
+* Apply interchanges to the left-overs
+*
+ DO 40 K = 1, MIN( M, N ), NB
+ CALL DLASWP( K-1, A( 1, 1 ), LDA, K,
+ $ MIN (K+NB-1, MIN ( M, N )), IPIV, 1 )
+ 40 CONTINUE
+*
+* Apply update to the M+1:N columns when N > M
+*
+ IF ( N.GT.M ) THEN
+
+ CALL DLASWP( N-M, A(1, M+1), LDA, 1, M, IPIV, 1 )
+
+ DO 50 K = 1, M, NB
+
+ JB = MIN( M-K+1, NB )
+*
+ CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+ $ JB, N-M, ONE, A( K, K ), LDA,
+ $ A( K, M+1 ), LDA )
+
+*
+ IF ( K+NB.LE.M ) THEN
+ CALL DGEMM( 'No transpose', 'No transpose',
+ $ M-K-NB+1, N-M, NB, -ONE,
+ $ A( K+NB, K ), LDA, A( K, M+1 ), LDA, ONE,
+ $ A( K+NB, M+1 ), LDA )
+ END IF
+ 50 CONTINUE
+ END IF
+*
+ END IF
+ RETURN
+*
+* End of DGETRF
+*
+ END
diff --git a/SRC/VARIANTS/lu/LL/sgetrf.f b/SRC/VARIANTS/lu/LL/sgetrf.f
new file mode 100644
index 00000000..856c1a7e
--- /dev/null
+++ b/SRC/VARIANTS/lu/LL/sgetrf.f
@@ -0,0 +1,190 @@
+ SUBROUTINE SGETRF ( M, N, A, LDA, IPIV, INFO)
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ INTEGER INFO, LDA, M, N
+* ..
+* .. Array Arguments ..
+ INTEGER IPIV( * )
+ REAL A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* SGETRF computes an LU factorization of a general M-by-N matrix A
+* using partial pivoting with row interchanges.
+*
+* The factorization has the form
+* A = P * L * U
+* where P is a permutation matrix, L is lower triangular with unit
+* diagonal elements (lower trapezoidal if m > n), and U is upper
+* triangular (upper trapezoidal if m < n).
+*
+* This is the left-looking Level 3 BLAS version of the algorithm.
+*
+* Arguments
+* =========
+*
+* M (input) INTEGER
+* The number of rows of the matrix A. M >= 0.
+*
+* N (input) INTEGER
+* The number of columns of the matrix A. N >= 0.
+*
+* A (input/output) REAL array, dimension (LDA,N)
+* On entry, the M-by-N matrix to be factored.
+* On exit, the factors L and U from the factorization
+* A = P*L*U; the unit diagonal elements of L are not stored.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,M).
+*
+* IPIV (output) INTEGER array, dimension (min(M,N))
+* The pivot indices; for 1 <= i <= min(M,N), row i of the
+* matrix was interchanged with row IPIV(i).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, U(i,i) is exactly zero. The factorization
+* has been completed, but the factor U is exactly
+* singular, and division by zero will occur if it is used
+* to solve a system of equations.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ REAL ONE
+ PARAMETER ( ONE = 1.0E+0 )
+* ..
+* .. Local Scalars ..
+ INTEGER I, IINFO, J, JB, K, NB
+* ..
+* .. External Subroutines ..
+ EXTERNAL SGEMM, SGETF2, SLASWP, STRSM, XERBLA
+* ..
+* .. External Functions ..
+ INTEGER ILAENV
+ EXTERNAL ILAENV
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ IF( M.LT.0 ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'SGETRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( M.EQ.0 .OR. N.EQ.0 )
+ $ RETURN
+*
+* Determine the block size for this environment.
+*
+ NB = ILAENV( 1, 'SGETRF', ' ', M, N, -1, -1 )
+ IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+* Use unblocked code.
+*
+ CALL SGETF2( M, N, A, LDA, IPIV, INFO )
+
+ ELSE
+*
+* Use blocked code.
+*
+ DO 20 J = 1, MIN( M, N ), NB
+ JB = MIN( MIN( M, N )-J+1, NB )
+*
+*
+* Update before factoring the current panel
+*
+ DO 30 K = 1, J-NB, NB
+*
+* Apply interchanges to rows K:K+NB-1.
+*
+ CALL SLASWP( JB, A(1, J), LDA, K, K+NB-1, IPIV, 1 )
+*
+* Compute block row of U.
+*
+ CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+ $ NB, JB, ONE, A( K, K ), LDA,
+ $ A( K, J ), LDA )
+*
+* Update trailing submatrix.
+*
+ CALL SGEMM( 'No transpose', 'No transpose',
+ $ M-K-NB+1, JB, NB, -ONE,
+ $ A( K+NB, K ), LDA, A( K, J ), LDA, ONE,
+ $ A( K+NB, J ), LDA )
+ 30 CONTINUE
+*
+* Factor diagonal and subdiagonal blocks and test for exact
+* singularity.
+*
+ CALL SGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+* Adjust INFO and the pivot indices.
+*
+ IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+ $ INFO = IINFO + J - 1
+ DO 10 I = J, MIN( M, J+JB-1 )
+ IPIV( I ) = J - 1 + IPIV( I )
+ 10 CONTINUE
+*
+ 20 CONTINUE
+
+*
+* Apply interchanges to the left-overs
+*
+ DO 40 K = 1, MIN( M, N ), NB
+ CALL SLASWP( K-1, A( 1, 1 ), LDA, K,
+ $ MIN (K+NB-1, MIN ( M, N )), IPIV, 1 )
+ 40 CONTINUE
+*
+* Apply update to the M+1:N columns when N > M
+*
+ IF ( N.GT.M ) THEN
+
+ CALL SLASWP( N-M, A(1, M+1), LDA, 1, M, IPIV, 1 )
+
+ DO 50 K = 1, M, NB
+
+ JB = MIN( M-K+1, NB )
+*
+ CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+ $ JB, N-M, ONE, A( K, K ), LDA,
+ $ A( K, M+1 ), LDA )
+
+*
+ IF ( K+NB.LE.M ) THEN
+ CALL SGEMM( 'No transpose', 'No transpose',
+ $ M-K-NB+1, N-M, NB, -ONE,
+ $ A( K+NB, K ), LDA, A( K, M+1 ), LDA, ONE,
+ $ A( K+NB, M+1 ), LDA )
+ END IF
+ 50 CONTINUE
+ END IF
+*
+ END IF
+ RETURN
+*
+* End of SGETRF
+*
+ END
diff --git a/SRC/VARIANTS/lu/LL/zgetrf.f b/SRC/VARIANTS/lu/LL/zgetrf.f
new file mode 100644
index 00000000..a6f9c0ff
--- /dev/null
+++ b/SRC/VARIANTS/lu/LL/zgetrf.f
@@ -0,0 +1,190 @@
+ SUBROUTINE ZGETRF ( M, N, A, LDA, IPIV, INFO)
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ INTEGER INFO, LDA, M, N
+* ..
+* .. Array Arguments ..
+ INTEGER IPIV( * )
+ COMPLEX*16 A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* ZGETRF computes an LU factorization of a general M-by-N matrix A
+* using partial pivoting with row interchanges.
+*
+* The factorization has the form
+* A = P * L * U
+* where P is a permutation matrix, L is lower triangular with unit
+* diagonal elements (lower trapezoidal if m > n), and U is upper
+* triangular (upper trapezoidal if m < n).
+*
+* This is the left-looking Level 3 BLAS version of the algorithm.
+*
+* Arguments
+* =========
+*
+* M (input) INTEGER
+* The number of rows of the matrix A. M >= 0.
+*
+* N (input) INTEGER
+* The number of columns of the matrix A. N >= 0.
+*
+* A (input/output) COMPLEX*16 array, dimension (LDA,N)
+* On entry, the M-by-N matrix to be factored.
+* On exit, the factors L and U from the factorization
+* A = P*L*U; the unit diagonal elements of L are not stored.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,M).
+*
+* IPIV (output) INTEGER array, dimension (min(M,N))
+* The pivot indices; for 1 <= i <= min(M,N), row i of the
+* matrix was interchanged with row IPIV(i).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, U(i,i) is exactly zero. The factorization
+* has been completed, but the factor U is exactly
+* singular, and division by zero will occur if it is used
+* to solve a system of equations.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ COMPLEX*16 ONE
+ PARAMETER ( ONE = (1.0D+0, 0.0D+0) )
+* ..
+* .. Local Scalars ..
+ INTEGER I, IINFO, J, JB, K, NB
+* ..
+* .. External Subroutines ..
+ EXTERNAL ZGEMM, ZGETF2, ZLASWP, ZTRSM, XERBLA
+* ..
+* .. External Functions ..
+ INTEGER ILAENV
+ EXTERNAL ILAENV
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ IF( M.LT.0 ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'ZGETRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( M.EQ.0 .OR. N.EQ.0 )
+ $ RETURN
+*
+* Determine the block size for this environment.
+*
+ NB = ILAENV( 1, 'ZGETRF', ' ', M, N, -1, -1 )
+ IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+* Use unblocked code.
+*
+ CALL ZGETF2( M, N, A, LDA, IPIV, INFO )
+
+ ELSE
+*
+* Use blocked code.
+*
+ DO 20 J = 1, MIN( M, N ), NB
+ JB = MIN( MIN( M, N )-J+1, NB )
+*
+*
+* Update before factoring the current panel
+*
+ DO 30 K = 1, J-NB, NB
+*
+* Apply interchanges to rows K:K+NB-1.
+*
+ CALL ZLASWP( JB, A(1, J), LDA, K, K+NB-1, IPIV, 1 )
+*
+* Compute block row of U.
+*
+ CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+ $ NB, JB, ONE, A( K, K ), LDA,
+ $ A( K, J ), LDA )
+*
+* Update trailing submatrix.
+*
+ CALL ZGEMM( 'No transpose', 'No transpose',
+ $ M-K-NB+1, JB, NB, -ONE,
+ $ A( K+NB, K ), LDA, A( K, J ), LDA, ONE,
+ $ A( K+NB, J ), LDA )
+ 30 CONTINUE
+*
+* Factor diagonal and subdiagonal blocks and test for exact
+* singularity.
+*
+ CALL ZGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+* Adjust INFO and the pivot indices.
+*
+ IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+ $ INFO = IINFO + J - 1
+ DO 10 I = J, MIN( M, J+JB-1 )
+ IPIV( I ) = J - 1 + IPIV( I )
+ 10 CONTINUE
+*
+ 20 CONTINUE
+
+*
+* Apply interchanges to the left-overs
+*
+ DO 40 K = 1, MIN( M, N ), NB
+ CALL ZLASWP( K-1, A( 1, 1 ), LDA, K,
+ $ MIN (K+NB-1, MIN ( M, N )), IPIV, 1 )
+ 40 CONTINUE
+*
+* Apply update to the M+1:N columns when N > M
+*
+ IF ( N.GT.M ) THEN
+
+ CALL ZLASWP( N-M, A(1, M+1), LDA, 1, M, IPIV, 1 )
+
+ DO 50 K = 1, M, NB
+
+ JB = MIN( M-K+1, NB )
+*
+ CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+ $ JB, N-M, ONE, A( K, K ), LDA,
+ $ A( K, M+1 ), LDA )
+
+*
+ IF ( K+NB.LE.M ) THEN
+ CALL ZGEMM( 'No transpose', 'No transpose',
+ $ M-K-NB+1, N-M, NB, -ONE,
+ $ A( K+NB, K ), LDA, A( K, M+1 ), LDA, ONE,
+ $ A( K+NB, M+1 ), LDA )
+ END IF
+ 50 CONTINUE
+ END IF
+*
+ END IF
+ RETURN
+*
+* End of ZGETRF
+*
+ END
diff --git a/SRC/VARIANTS/lu/REC/cgetrf.f b/SRC/VARIANTS/lu/REC/cgetrf.f
new file mode 100644
index 00000000..d8a8a90b
--- /dev/null
+++ b/SRC/VARIANTS/lu/REC/cgetrf.f
@@ -0,0 +1,224 @@
+ SUBROUTINE CGETRF( M, N, A, LDA, IPIV, INFO )
+ IMPLICIT NONE
+*
+* -- LAPACK routine (version 3.X) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* May 2008
+*
+* .. Scalar Arguments ..
+ INTEGER INFO, LDA, M, N
+* ..
+* .. Array Arguments ..
+ INTEGER IPIV( * )
+ COMPLEX A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* CGETRF computes an LU factorization of a general M-by-N matrix A
+* using partial pivoting with row interchanges.
+*
+* The factorization has the form
+* A = P * L * U
+* where P is a permutation matrix, L is lower triangular with unit
+* diagonal elements (lower trapezoidal if m > n), and U is upper
+* triangular (upper trapezoidal if m < n).
+*
+* This code implements an iterative version of Sivan Toledo's recursive
+* LU algorithm[1]. For square matrices, this iterative versions should
+* be within a factor of two of the optimum number of memory transfers.
+*
+* The pattern is as follows, with the large blocks of U being updated
+* in one call to DTRSM, and the dotted lines denoting sections that
+* have had all pending permutations applied:
+*
+* 1 2 3 4 5 6 7 8
+* +-+-+---+-------+------
+* | |1| | |
+* |.+-+ 2 | |
+* | | | | |
+* |.|.+-+-+ 4 |
+* | | | |1| |
+* | | |.+-+ |
+* | | | | | |
+* |.|.|.|.+-+-+---+ 8
+* | | | | | |1| |
+* | | | | |.+-+ 2 |
+* | | | | | | | |
+* | | | | |.|.+-+-+
+* | | | | | | | |1|
+* | | | | | | |.+-+
+* | | | | | | | | |
+* |.|.|.|.|.|.|.|.+-----
+* | | | | | | | | |
+*
+* The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in
+* the binary expansion of the current column. Each Schur update is
+* applied as soon as the necessary portion of U is available.
+*
+* [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with
+* Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
+* 1065-1081. http://dx.doi.org/10.1137/S0895479896297744
+*
+* Arguments
+* =========
+*
+* M (input) INTEGER
+* The number of rows of the matrix A. M >= 0.
+*
+* N (input) INTEGER
+* The number of columns of the matrix A. N >= 0.
+*
+* A (input/output) COMPLEX array, dimension (LDA,N)
+* On entry, the M-by-N matrix to be factored.
+* On exit, the factors L and U from the factorization
+* A = P*L*U; the unit diagonal elements of L are not stored.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,M).
+*
+* IPIV (output) INTEGER array, dimension (min(M,N))
+* The pivot indices; for 1 <= i <= min(M,N), row i of the
+* matrix was interchanged with row IPIV(i).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, U(i,i) is exactly zero. The factorization
+* has been completed, but the factor U is exactly
+* singular, and division by zero will occur if it is used
+* to solve a system of equations.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ COMPLEX ONE, NEGONE
+ REAL ZERO
+ PARAMETER ( ONE = (1.0E+0, 0.0E+0) )
+ PARAMETER ( NEGONE = (-1.0E+0, 0.0E+0) )
+ PARAMETER ( ZERO = 0.0E+0 )
+* ..
+* .. Local Scalars ..
+ REAL SFMIN, PIVMAG
+ COMPLEX TMP
+ INTEGER I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD
+ INTEGER KSTART, IPIVSTART, JPIVSTART, KCOLS
+* ..
+* .. External Functions ..
+ REAL SLAMCH
+ INTEGER ICAMAX
+ LOGICAL SISNAN
+ EXTERNAL SLAMCH, ICAMAX, SISNAN
+* ..
+* .. External Subroutines ..
+ EXTERNAL CTRSM, CSCAL, XERBLA, CLASWP
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN, IAND, ABS
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ IF( M.LT.0 ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'CGETRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( M.EQ.0 .OR. N.EQ.0 )
+ $ RETURN
+*
+* Compute machine safe minimum
+*
+ SFMIN = SLAMCH( 'S' )
+*
+ NSTEP = MIN( M, N )
+ DO J = 1, NSTEP
+ KAHEAD = IAND( J, -J )
+ KSTART = J + 1 - KAHEAD
+ KCOLS = MIN( KAHEAD, M-J )
+*
+* Find pivot.
+*
+ JP = J - 1 + ICAMAX( M-J+1, A( J, J ), 1 )
+ IPIV( J ) = JP
+
+! Permute just this column.
+ IF (JP .NE. J) THEN
+ TMP = A( J, J )
+ A( J, J ) = A( JP, J )
+ A( JP, J ) = TMP
+ END IF
+
+! Apply pending permutations to L
+ NTOPIV = 1
+ IPIVSTART = J
+ JPIVSTART = J - NTOPIV
+ DO WHILE ( NTOPIV .LT. KAHEAD )
+ CALL CLASWP( NTOPIV, A( 1, JPIVSTART ), LDA, IPIVSTART, J,
+ $ IPIV, 1 )
+ IPIVSTART = IPIVSTART - NTOPIV;
+ NTOPIV = NTOPIV * 2;
+ JPIVSTART = JPIVSTART - NTOPIV;
+ END DO
+
+! Permute U block to match L
+ CALL CLASWP( KCOLS, A( 1,J+1 ), LDA, KSTART, J, IPIV, 1 )
+
+! Factor the current column
+ PIVMAG = ABS( A( J, J ) )
+ IF( PIVMAG.NE.ZERO .AND. .NOT.SISNAN( PIVMAG ) ) THEN
+ IF( PIVMAG .GE. SFMIN ) THEN
+ CALL CSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
+ ELSE
+ DO I = 1, M-J
+ A( J+I, J ) = A( J+I, J ) / A( J, J )
+ END DO
+ END IF
+ ELSE IF( PIVMAG .EQ. ZERO .AND. INFO .EQ. 0 ) THEN
+ INFO = J
+ END IF
+
+! Solve for U block.
+ CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', KAHEAD,
+ $ KCOLS, ONE, A( KSTART, KSTART ), LDA,
+ $ A( KSTART, J+1 ), LDA )
+! Schur complement.
+ CALL CGEMM( 'No transpose', 'No transpose', M-J,
+ $ KCOLS, KAHEAD, NEGONE, A( J+1, KSTART ), LDA,
+ $ A( KSTART, J+1 ), LDA, ONE, A( J+1, J+1 ), LDA )
+ END DO
+
+! Handle pivot permutations on the way out of the recursion
+ NPIVED = IAND( NSTEP, -NSTEP )
+ J = NSTEP - NPIVED
+ DO WHILE ( J .GT. 0 )
+ NTOPIV = IAND( J, -J )
+ CALL CLASWP( NTOPIV, A( 1, J-NTOPIV+1 ), LDA, J+1, NSTEP,
+ $ IPIV, 1 )
+ J = J - NTOPIV
+ END DO
+
+! If short and wide, handle the rest of the columns.
+ IF ( M .LT. N ) THEN
+ CALL CLASWP( N-M, A( 1, M+KCOLS+1 ), LDA, 1, M, IPIV, 1 )
+ CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', M,
+ $ N-M, ONE, A, LDA, A( 1,M+KCOLS+1 ), LDA )
+ END IF
+
+ RETURN
+*
+* End of CGETRF
+*
+ END
diff --git a/SRC/VARIANTS/lu/REC/dgetrf.f b/SRC/VARIANTS/lu/REC/dgetrf.f
new file mode 100644
index 00000000..f8c2caf1
--- /dev/null
+++ b/SRC/VARIANTS/lu/REC/dgetrf.f
@@ -0,0 +1,220 @@
+ SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
+ IMPLICIT NONE
+*
+* -- LAPACK routine (version 3.X) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* May 2008
+*
+* .. Scalar Arguments ..
+ INTEGER INFO, LDA, M, N
+* ..
+* .. Array Arguments ..
+ INTEGER IPIV( * )
+ DOUBLE PRECISION A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* DGETRF computes an LU factorization of a general M-by-N matrix A
+* using partial pivoting with row interchanges.
+*
+* The factorization has the form
+* A = P * L * U
+* where P is a permutation matrix, L is lower triangular with unit
+* diagonal elements (lower trapezoidal if m > n), and U is upper
+* triangular (upper trapezoidal if m < n).
+*
+* This code implements an iterative version of Sivan Toledo's recursive
+* LU algorithm[1]. For square matrices, this iterative versions should
+* be within a factor of two of the optimum number of memory transfers.
+*
+* The pattern is as follows, with the large blocks of U being updated
+* in one call to DTRSM, and the dotted lines denoting sections that
+* have had all pending permutations applied:
+*
+* 1 2 3 4 5 6 7 8
+* +-+-+---+-------+------
+* | |1| | |
+* |.+-+ 2 | |
+* | | | | |
+* |.|.+-+-+ 4 |
+* | | | |1| |
+* | | |.+-+ |
+* | | | | | |
+* |.|.|.|.+-+-+---+ 8
+* | | | | | |1| |
+* | | | | |.+-+ 2 |
+* | | | | | | | |
+* | | | | |.|.+-+-+
+* | | | | | | | |1|
+* | | | | | | |.+-+
+* | | | | | | | | |
+* |.|.|.|.|.|.|.|.+-----
+* | | | | | | | | |
+*
+* The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in
+* the binary expansion of the current column. Each Schur update is
+* applied as soon as the necessary portion of U is available.
+*
+* [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with
+* Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
+* 1065-1081. http://dx.doi.org/10.1137/S0895479896297744
+*
+* Arguments
+* =========
+*
+* M (input) INTEGER
+* The number of rows of the matrix A. M >= 0.
+*
+* N (input) INTEGER
+* The number of columns of the matrix A. N >= 0.
+*
+* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+* On entry, the M-by-N matrix to be factored.
+* On exit, the factors L and U from the factorization
+* A = P*L*U; the unit diagonal elements of L are not stored.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,M).
+*
+* IPIV (output) INTEGER array, dimension (min(M,N))
+* The pivot indices; for 1 <= i <= min(M,N), row i of the
+* matrix was interchanged with row IPIV(i).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, U(i,i) is exactly zero. The factorization
+* has been completed, but the factor U is exactly
+* singular, and division by zero will occur if it is used
+* to solve a system of equations.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ DOUBLE PRECISION ONE, ZERO, NEGONE
+ PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+ PARAMETER ( NEGONE = -1.0D+0 )
+* ..
+* .. Local Scalars ..
+ DOUBLE PRECISION SFMIN, TMP
+ INTEGER I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD
+ INTEGER KSTART, IPIVSTART, JPIVSTART, KCOLS
+* ..
+* .. External Functions ..
+ DOUBLE PRECISION DLAMCH
+ INTEGER IDAMAX
+ LOGICAL DISNAN
+ EXTERNAL DLAMCH, IDAMAX, DISNAN
+* ..
+* .. External Subroutines ..
+ EXTERNAL DTRSM, DSCAL, XERBLA, DLASWP
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN, IAND
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ IF( M.LT.0 ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'DGETRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( M.EQ.0 .OR. N.EQ.0 )
+ $ RETURN
+*
+* Compute machine safe minimum
+*
+ SFMIN = DLAMCH( 'S' )
+*
+ NSTEP = MIN( M, N )
+ DO J = 1, NSTEP
+ KAHEAD = IAND( J, -J )
+ KSTART = J + 1 - KAHEAD
+ KCOLS = MIN( KAHEAD, M-J )
+*
+* Find pivot.
+*
+ JP = J - 1 + IDAMAX( M-J+1, A( J, J ), 1 )
+ IPIV( J ) = JP
+
+! Permute just this column.
+ IF (JP .NE. J) THEN
+ TMP = A( J, J )
+ A( J, J ) = A( JP, J )
+ A( JP, J ) = TMP
+ END IF
+
+! Apply pending permutations to L
+ NTOPIV = 1
+ IPIVSTART = J
+ JPIVSTART = J - NTOPIV
+ DO WHILE ( NTOPIV .LT. KAHEAD )
+ CALL DLASWP( NTOPIV, A( 1, JPIVSTART ), LDA, IPIVSTART, J,
+ $ IPIV, 1 )
+ IPIVSTART = IPIVSTART - NTOPIV;
+ NTOPIV = NTOPIV * 2;
+ JPIVSTART = JPIVSTART - NTOPIV;
+ END DO
+
+! Permute U block to match L
+ CALL DLASWP( KCOLS, A( 1,J+1 ), LDA, KSTART, J, IPIV, 1 )
+
+! Factor the current column
+ IF( A( J, J ).NE.ZERO .AND. .NOT.DISNAN( A( J, J ) ) ) THEN
+ IF( ABS(A( J, J )) .GE. SFMIN ) THEN
+ CALL DSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
+ ELSE
+ DO I = 1, M-J
+ A( J+I, J ) = A( J+I, J ) / A( J, J )
+ END DO
+ END IF
+ ELSE IF( A( J,J ) .EQ. ZERO .AND. INFO .EQ. 0 ) THEN
+ INFO = J
+ END IF
+
+! Solve for U block.
+ CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', KAHEAD,
+ $ KCOLS, ONE, A( KSTART, KSTART ), LDA,
+ $ A( KSTART, J+1 ), LDA )
+! Schur complement.
+ CALL DGEMM( 'No transpose', 'No transpose', M-J,
+ $ KCOLS, KAHEAD, NEGONE, A( J+1, KSTART ), LDA,
+ $ A( KSTART, J+1 ), LDA, ONE, A( J+1, J+1 ), LDA )
+ END DO
+
+! Handle pivot permutations on the way out of the recursion
+ NPIVED = IAND( NSTEP, -NSTEP )
+ J = NSTEP - NPIVED
+ DO WHILE ( J .GT. 0 )
+ NTOPIV = IAND( J, -J )
+ CALL DLASWP( NTOPIV, A( 1, J-NTOPIV+1 ), LDA, J+1, NSTEP,
+ $ IPIV, 1 )
+ J = J - NTOPIV
+ END DO
+
+! If short and wide, handle the rest of the columns.
+ IF ( M .LT. N ) THEN
+ CALL DLASWP( N-M, A( 1, M+KCOLS+1 ), LDA, 1, M, IPIV, 1 )
+ CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', M,
+ $ N-M, ONE, A, LDA, A( 1,M+KCOLS+1 ), LDA )
+ END IF
+
+ RETURN
+*
+* End of DGETRF
+*
+ END
diff --git a/SRC/VARIANTS/lu/REC/sgetrf.f b/SRC/VARIANTS/lu/REC/sgetrf.f
new file mode 100644
index 00000000..1890f987
--- /dev/null
+++ b/SRC/VARIANTS/lu/REC/sgetrf.f
@@ -0,0 +1,220 @@
+ SUBROUTINE SGETRF( M, N, A, LDA, IPIV, INFO )
+ IMPLICIT NONE
+*
+* -- LAPACK routine (version 3.X) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* May 2008
+*
+* .. Scalar Arguments ..
+ INTEGER INFO, LDA, M, N
+* ..
+* .. Array Arguments ..
+ INTEGER IPIV( * )
+ REAL A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* SGETRF computes an LU factorization of a general M-by-N matrix A
+* using partial pivoting with row interchanges.
+*
+* The factorization has the form
+* A = P * L * U
+* where P is a permutation matrix, L is lower triangular with unit
+* diagonal elements (lower trapezoidal if m > n), and U is upper
+* triangular (upper trapezoidal if m < n).
+*
+* This code implements an iterative version of Sivan Toledo's recursive
+* LU algorithm[1]. For square matrices, this iterative versions should
+* be within a factor of two of the optimum number of memory transfers.
+*
+* The pattern is as follows, with the large blocks of U being updated
+* in one call to STRSM, and the dotted lines denoting sections that
+* have had all pending permutations applied:
+*
+* 1 2 3 4 5 6 7 8
+* +-+-+---+-------+------
+* | |1| | |
+* |.+-+ 2 | |
+* | | | | |
+* |.|.+-+-+ 4 |
+* | | | |1| |
+* | | |.+-+ |
+* | | | | | |
+* |.|.|.|.+-+-+---+ 8
+* | | | | | |1| |
+* | | | | |.+-+ 2 |
+* | | | | | | | |
+* | | | | |.|.+-+-+
+* | | | | | | | |1|
+* | | | | | | |.+-+
+* | | | | | | | | |
+* |.|.|.|.|.|.|.|.+-----
+* | | | | | | | | |
+*
+* The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in
+* the binary expansion of the current column. Each Schur update is
+* applied as soon as the necessary portion of U is available.
+*
+* [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with
+* Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
+* 1065-1081. http://dx.doi.org/10.1137/S0895479896297744
+*
+* Arguments
+* =========
+*
+* M (input) INTEGER
+* The number of rows of the matrix A. M >= 0.
+*
+* N (input) INTEGER
+* The number of columns of the matrix A. N >= 0.
+*
+* A (input/output) REAL array, dimension (LDA,N)
+* On entry, the M-by-N matrix to be factored.
+* On exit, the factors L and U from the factorization
+* A = P*L*U; the unit diagonal elements of L are not stored.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,M).
+*
+* IPIV (output) INTEGER array, dimension (min(M,N))
+* The pivot indices; for 1 <= i <= min(M,N), row i of the
+* matrix was interchanged with row IPIV(i).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, U(i,i) is exactly zero. The factorization
+* has been completed, but the factor U is exactly
+* singular, and division by zero will occur if it is used
+* to solve a system of equations.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ REAL ONE, ZERO, NEGONE
+ PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+ PARAMETER ( NEGONE = -1.0E+0 )
+* ..
+* .. Local Scalars ..
+ REAL SFMIN, TMP
+ INTEGER I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD
+ INTEGER KSTART, IPIVSTART, JPIVSTART, KCOLS
+* ..
+* .. External Functions ..
+ REAL SLAMCH
+ INTEGER ISAMAX
+ LOGICAL SISNAN
+ EXTERNAL SLAMCH, ISAMAX, SISNAN
+* ..
+* .. External Subroutines ..
+ EXTERNAL STRSM, SSCAL, XERBLA, SLASWP
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN, IAND
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ IF( M.LT.0 ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'SGETRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( M.EQ.0 .OR. N.EQ.0 )
+ $ RETURN
+*
+* Compute machine safe minimum
+*
+ SFMIN = SLAMCH( 'S' )
+*
+ NSTEP = MIN( M, N )
+ DO J = 1, NSTEP
+ KAHEAD = IAND( J, -J )
+ KSTART = J + 1 - KAHEAD
+ KCOLS = MIN( KAHEAD, M-J )
+*
+* Find pivot.
+*
+ JP = J - 1 + ISAMAX( M-J+1, A( J, J ), 1 )
+ IPIV( J ) = JP
+
+! Permute just this column.
+ IF (JP .NE. J) THEN
+ TMP = A( J, J )
+ A( J, J ) = A( JP, J )
+ A( JP, J ) = TMP
+ END IF
+
+! Apply pending permutations to L
+ NTOPIV = 1
+ IPIVSTART = J
+ JPIVSTART = J - NTOPIV
+ DO WHILE ( NTOPIV .LT. KAHEAD )
+ CALL SLASWP( NTOPIV, A( 1, JPIVSTART ), LDA, IPIVSTART, J,
+ $ IPIV, 1 )
+ IPIVSTART = IPIVSTART - NTOPIV;
+ NTOPIV = NTOPIV * 2;
+ JPIVSTART = JPIVSTART - NTOPIV;
+ END DO
+
+! Permute U block to match L
+ CALL SLASWP( KCOLS, A( 1,J+1 ), LDA, KSTART, J, IPIV, 1 )
+
+! Factor the current column
+ IF( A( J, J ).NE.ZERO .AND. .NOT.SISNAN( A( J, J ) ) ) THEN
+ IF( ABS(A( J, J )) .GE. SFMIN ) THEN
+ CALL SSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
+ ELSE
+ DO I = 1, M-J
+ A( J+I, J ) = A( J+I, J ) / A( J, J )
+ END DO
+ END IF
+ ELSE IF( A( J,J ) .EQ. ZERO .AND. INFO .EQ. 0 ) THEN
+ INFO = J
+ END IF
+
+! Solve for U block.
+ CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', KAHEAD,
+ $ KCOLS, ONE, A( KSTART, KSTART ), LDA,
+ $ A( KSTART, J+1 ), LDA )
+! Schur complement.
+ CALL SGEMM( 'No transpose', 'No transpose', M-J,
+ $ KCOLS, KAHEAD, NEGONE, A( J+1, KSTART ), LDA,
+ $ A( KSTART, J+1 ), LDA, ONE, A( J+1, J+1 ), LDA )
+ END DO
+
+! Handle pivot permutations on the way out of the recursion
+ NPIVED = IAND( NSTEP, -NSTEP )
+ J = NSTEP - NPIVED
+ DO WHILE ( J .GT. 0 )
+ NTOPIV = IAND( J, -J )
+ CALL SLASWP( NTOPIV, A( 1, J-NTOPIV+1 ), LDA, J+1, NSTEP,
+ $ IPIV, 1 )
+ J = J - NTOPIV
+ END DO
+
+! If short and wide, handle the rest of the columns.
+ IF ( M .LT. N ) THEN
+ CALL SLASWP( N-M, A( 1, M+KCOLS+1 ), LDA, 1, M, IPIV, 1 )
+ CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', M,
+ $ N-M, ONE, A, LDA, A( 1,M+KCOLS+1 ), LDA )
+ END IF
+
+ RETURN
+*
+* End of SGETRF
+*
+ END
diff --git a/SRC/VARIANTS/lu/REC/zgetrf.f b/SRC/VARIANTS/lu/REC/zgetrf.f
new file mode 100644
index 00000000..e7b75b00
--- /dev/null
+++ b/SRC/VARIANTS/lu/REC/zgetrf.f
@@ -0,0 +1,224 @@
+ SUBROUTINE ZGETRF( M, N, A, LDA, IPIV, INFO )
+ IMPLICIT NONE
+*
+* -- LAPACK routine (version 3.X) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* May 2008
+*
+* .. Scalar Arguments ..
+ INTEGER INFO, LDA, M, N
+* ..
+* .. Array Arguments ..
+ INTEGER IPIV( * )
+ COMPLEX*16 A( LDA, * )
+* ..
+*
+* Purpose
+* =======
+*
+* ZGETRF computes an LU factorization of a general M-by-N matrix A
+* using partial pivoting with row interchanges.
+*
+* The factorization has the form
+* A = P * L * U
+* where P is a permutation matrix, L is lower triangular with unit
+* diagonal elements (lower trapezoidal if m > n), and U is upper
+* triangular (upper trapezoidal if m < n).
+*
+* This code implements an iterative version of Sivan Toledo's recursive
+* LU algorithm[1]. For square matrices, this iterative versions should
+* be within a factor of two of the optimum number of memory transfers.
+*
+* The pattern is as follows, with the large blocks of U being updated
+* in one call to DTRSM, and the dotted lines denoting sections that
+* have had all pending permutations applied:
+*
+* 1 2 3 4 5 6 7 8
+* +-+-+---+-------+------
+* | |1| | |
+* |.+-+ 2 | |
+* | | | | |
+* |.|.+-+-+ 4 |
+* | | | |1| |
+* | | |.+-+ |
+* | | | | | |
+* |.|.|.|.+-+-+---+ 8
+* | | | | | |1| |
+* | | | | |.+-+ 2 |
+* | | | | | | | |
+* | | | | |.|.+-+-+
+* | | | | | | | |1|
+* | | | | | | |.+-+
+* | | | | | | | | |
+* |.|.|.|.|.|.|.|.+-----
+* | | | | | | | | |
+*
+* The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in
+* the binary expansion of the current column. Each Schur update is
+* applied as soon as the necessary portion of U is available.
+*
+* [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with
+* Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
+* 1065-1081. http://dx.doi.org/10.1137/S0895479896297744
+*
+* Arguments
+* =========
+*
+* M (input) INTEGER
+* The number of rows of the matrix A. M >= 0.
+*
+* N (input) INTEGER
+* The number of columns of the matrix A. N >= 0.
+*
+* A (input/output) COMPLEX*16 array, dimension (LDA,N)
+* On entry, the M-by-N matrix to be factored.
+* On exit, the factors L and U from the factorization
+* A = P*L*U; the unit diagonal elements of L are not stored.
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,M).
+*
+* IPIV (output) INTEGER array, dimension (min(M,N))
+* The pivot indices; for 1 <= i <= min(M,N), row i of the
+* matrix was interchanged with row IPIV(i).
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+* > 0: if INFO = i, U(i,i) is exactly zero. The factorization
+* has been completed, but the factor U is exactly
+* singular, and division by zero will occur if it is used
+* to solve a system of equations.
+*
+* =====================================================================
+*
+* .. Parameters ..
+ COMPLEX*16 ONE, NEGONE
+ DOUBLE PRECISION ZERO
+ PARAMETER ( ONE = (1.0D+0, 0.0D+0) )
+ PARAMETER ( NEGONE = (-1.0D+0, 0.0D+0) )
+ PARAMETER ( ZERO = 0.0D+0 )
+* ..
+* .. Local Scalars ..
+ DOUBLE PRECISION SFMIN, PIVMAG
+ COMPLEX*16 TMP
+ INTEGER I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD
+ INTEGER KSTART, IPIVSTART, JPIVSTART, KCOLS
+* ..
+* .. External Functions ..
+ DOUBLE PRECISION DLAMCH
+ INTEGER IZAMAX
+ LOGICAL DISNAN
+ EXTERNAL DLAMCH, IZAMAX, DISNAN
+* ..
+* .. External Subroutines ..
+ EXTERNAL ZTRSM, ZSCAL, XERBLA, ZLASWP
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN, IAND, ABS
+* ..
+* .. Executable Statements ..
+*
+* Test the input parameters.
+*
+ INFO = 0
+ IF( M.LT.0 ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+ INFO = -4
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'ZGETRF', -INFO )
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( M.EQ.0 .OR. N.EQ.0 )
+ $ RETURN
+*
+* Compute machine safe minimum
+*
+ SFMIN = DLAMCH( 'S' )
+*
+ NSTEP = MIN( M, N )
+ DO J = 1, NSTEP
+ KAHEAD = IAND( J, -J )
+ KSTART = J + 1 - KAHEAD
+ KCOLS = MIN( KAHEAD, M-J )
+*
+* Find pivot.
+*
+ JP = J - 1 + IZAMAX( M-J+1, A( J, J ), 1 )
+ IPIV( J ) = JP
+
+! Permute just this column.
+ IF (JP .NE. J) THEN
+ TMP = A( J, J )
+ A( J, J ) = A( JP, J )
+ A( JP, J ) = TMP
+ END IF
+
+! Apply pending permutations to L
+ NTOPIV = 1
+ IPIVSTART = J
+ JPIVSTART = J - NTOPIV
+ DO WHILE ( NTOPIV .LT. KAHEAD )
+ CALL ZLASWP( NTOPIV, A( 1, JPIVSTART ), LDA, IPIVSTART, J,
+ $ IPIV, 1 )
+ IPIVSTART = IPIVSTART - NTOPIV;
+ NTOPIV = NTOPIV * 2;
+ JPIVSTART = JPIVSTART - NTOPIV;
+ END DO
+
+! Permute U block to match L
+ CALL ZLASWP( KCOLS, A( 1,J+1 ), LDA, KSTART, J, IPIV, 1 )
+
+! Factor the current column
+ PIVMAG = ABS( A( J, J ) )
+ IF( PIVMAG.NE.ZERO .AND. .NOT.DISNAN( PIVMAG ) ) THEN
+ IF( PIVMAG .GE. SFMIN ) THEN
+ CALL ZSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
+ ELSE
+ DO I = 1, M-J
+ A( J+I, J ) = A( J+I, J ) / A( J, J )
+ END DO
+ END IF
+ ELSE IF( PIVMAG .EQ. ZERO .AND. INFO .EQ. 0 ) THEN
+ INFO = J
+ END IF
+
+! Solve for U block.
+ CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', KAHEAD,
+ $ KCOLS, ONE, A( KSTART, KSTART ), LDA,
+ $ A( KSTART, J+1 ), LDA )
+! Schur complement.
+ CALL ZGEMM( 'No transpose', 'No transpose', M-J,
+ $ KCOLS, KAHEAD, NEGONE, A( J+1, KSTART ), LDA,
+ $ A( KSTART, J+1 ), LDA, ONE, A( J+1, J+1 ), LDA )
+ END DO
+
+! Handle pivot permutations on the way out of the recursion
+ NPIVED = IAND( NSTEP, -NSTEP )
+ J = NSTEP - NPIVED
+ DO WHILE ( J .GT. 0 )
+ NTOPIV = IAND( J, -J )
+ CALL ZLASWP( NTOPIV, A( 1, J-NTOPIV+1 ), LDA, J+1, NSTEP,
+ $ IPIV, 1 )
+ J = J - NTOPIV
+ END DO
+
+! If short and wide, handle the rest of the columns.
+ IF ( M .LT. N ) THEN
+ CALL ZLASWP( N-M, A( 1, M+KCOLS+1 ), LDA, 1, M, IPIV, 1 )
+ CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', M,
+ $ N-M, ONE, A, LDA, A( 1,M+KCOLS+1 ), LDA )
+ END IF
+
+ RETURN
+*
+* End of ZGETRF
+*
+ END
diff --git a/SRC/VARIANTS/qr/LL/cgeqrf.f b/SRC/VARIANTS/qr/LL/cgeqrf.f
new file mode 100644
index 00000000..413bc90c
--- /dev/null
+++ b/SRC/VARIANTS/qr/LL/cgeqrf.f
@@ -0,0 +1,343 @@
+ SUBROUTINE CGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ INTEGER INFO, LDA, LWORK, M, N
+* ..
+* .. Array Arguments ..
+ COMPLEX A( LDA, * ), TAU( * ), WORK( * )
+* ..
+*
+* Purpose
+* =======
+*
+* CGEQRF computes a QR factorization of a real M-by-N matrix A:
+* A = Q * R.
+*
+* This is the left-looking Level 3 BLAS version of the algorithm.
+*
+* Arguments
+* =========
+*
+* M (input) INTEGER
+* The number of rows of the matrix A. M >= 0.
+*
+* N (input) INTEGER
+* The number of columns of the matrix A. N >= 0.
+*
+* A (input/output) COMPLEX array, dimension (LDA,N)
+* On entry, the M-by-N matrix A.
+* On exit, the elements on and above the diagonal of the array
+* contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+* upper triangular if m >= n); the elements below the diagonal,
+* with the array TAU, represent the orthogonal matrix Q as a
+* product of min(m,n) elementary reflectors (see Further
+* Details).
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,M).
+*
+* TAU (output) COMPLEX array, dimension (min(M,N))
+* The scalar factors of the elementary reflectors (see Further
+* Details).
+*
+* WORK (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
+* On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+* LWORK (input) INTEGER
+*
+* The dimension of the array WORK. The dimension can be divided into three parts.
+*
+* 1) The part for the triangular factor T. If the very last T is not bigger
+* than any of the rest, then this part is NB x ceiling(K/NB), otherwise,
+* NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T
+*
+* 2) The part for the very last T when T is bigger than any of the rest T.
+* The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB,
+* where K = min(M,N), NX is calculated by
+* NX = MAX( 0, ILAENV( 3, 'CGEQRF', ' ', M, N, -1, -1 ) )
+*
+* 3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB)
+*
+* So LWORK = part1 + part2 + part3
+*
+* If LWORK = -1, then a workspace query is assumed; the routine
+* only calculates the optimal size of the WORK array, returns
+* this value as the first entry of the WORK array, and no error
+* message related to LWORK is issued by XERBLA.
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+*
+* Further Details
+* ===============
+*
+* The matrix Q is represented as a product of elementary reflectors
+*
+* Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+* Each H(i) has the form
+*
+* H(i) = I - tau * v * v'
+*
+* where tau is a real scalar, and v is a real vector with
+* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+* and tau in TAU(i).
+*
+* =====================================================================
+*
+* .. Local Scalars ..
+ LOGICAL LQUERY
+ INTEGER I, IB, IINFO, IWS, J, K, LWKOPT, NB,
+ $ NBMIN, NX, LBWORK, NT, LLWORK
+* ..
+* .. External Subroutines ..
+ EXTERNAL CGEQR2, CLARFB, CLARFT, XERBLA
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN
+* ..
+* .. External Functions ..
+ INTEGER ILAENV
+ REAL SCEIL
+ EXTERNAL ILAENV, SCEIL
+* ..
+* .. Executable Statements ..
+
+ INFO = 0
+ NBMIN = 2
+ NX = 0
+ IWS = N
+ K = MIN( M, N )
+ NB = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
+
+ IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+* Determine when to cross over from blocked to unblocked code.
+*
+ NX = MAX( 0, ILAENV( 3, 'CGEQRF', ' ', M, N, -1, -1 ) )
+ END IF
+*
+* Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.:
+*
+* NB=3 2NB=6 K=10
+* | | |
+* 1--2--3--4--5--6--7--8--9--10
+* | \________/
+* K-NX=5 NT=4
+*
+* So here 4 x 4 is the last T stored in the workspace
+*
+ NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
+
+*
+* optimal workspace = space for dlarfb + space for normal T's + space for the last T
+*
+ LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
+ LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
+
+ IF ( NT.GT.NB ) THEN
+
+ LBWORK = K-NT
+*
+* Optimal workspace for dlarfb = MAX(1,N)*NT
+*
+ LWKOPT = (LBWORK+LLWORK)*NB
+ WORK( 1 ) = (LWKOPT+NT*NT)
+
+ ELSE
+
+ LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
+ LWKOPT = (LBWORK+LLWORK-NB)*NB
+ WORK( 1 ) = LWKOPT
+
+ END IF
+
+*
+* Test the input arguments
+*
+ LQUERY = ( LWORK.EQ.-1 )
+ IF( M.LT.0 ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+ INFO = -4
+ ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+ INFO = -7
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'CGEQRF', -INFO )
+ RETURN
+ ELSE IF( LQUERY ) THEN
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( K.EQ.0 ) THEN
+ WORK( 1 ) = 1
+ RETURN
+ END IF
+*
+ IF( NB.GT.1 .AND. NB.LT.K ) THEN
+
+ IF( NX.LT.K ) THEN
+*
+* Determine if workspace is large enough for blocked code.
+*
+ IF ( NT.LE.NB ) THEN
+ IWS = (LBWORK+LLWORK-NB)*NB
+ ELSE
+ IWS = (LBWORK+LLWORK)*NB+NT*NT
+ END IF
+
+ IF( LWORK.LT.IWS ) THEN
+*
+* Not enough workspace to use optimal NB: reduce NB and
+* determine the minimum value of NB.
+*
+ IF ( NT.LE.NB ) THEN
+ NB = LWORK / (LLWORK+(LBWORK-NB))
+ ELSE
+ NB = (LWORK-NT*NT)/(LBWORK+LLWORK)
+ END IF
+
+ NBMIN = MAX( 2, ILAENV( 2, 'CGEQRF', ' ', M, N, -1,
+ $ -1 ) )
+ END IF
+ END IF
+ END IF
+*
+ IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+* Use blocked code initially
+*
+ DO 10 I = 1, K - NX, NB
+ IB = MIN( K-I+1, NB )
+*
+* Update the current column using old T's
+*
+ DO 20 J = 1, I - NB, NB
+*
+* Apply H' to A(J:M,I:I+IB-1) from the left
+*
+ CALL CLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, IB, NB,
+ $ A( J, J ), LDA, WORK(J), LBWORK,
+ $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ IB)
+
+20 CONTINUE
+*
+* Compute the QR factorization of the current block
+* A(I:M,I:I+IB-1)
+*
+ CALL CGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ),
+ $ WORK(LBWORK*NB+NT*NT+1), IINFO )
+
+ IF( I+IB.LE.N ) THEN
+*
+* Form the triangular factor of the block reflector
+* H = H(i) H(i+1) . . . H(i+ib-1)
+*
+ CALL CLARFT( 'Forward', 'Columnwise', M-I+1, IB,
+ $ A( I, I ), LDA, TAU( I ),
+ $ WORK(I), LBWORK )
+*
+ END IF
+ 10 CONTINUE
+ ELSE
+ I = 1
+ END IF
+*
+* Use unblocked code to factor the last or only block.
+*
+ IF( I.LE.K ) THEN
+
+ IF ( I .NE. 1 ) THEN
+
+ DO 30 J = 1, I - NB, NB
+*
+* Apply H' to A(J:M,I:K) from the left
+*
+ CALL CLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, K-I+1, NB,
+ $ A( J, J ), LDA, WORK(J), LBWORK,
+ $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ K-I+1)
+30 CONTINUE
+
+ CALL CGEQR2( M-I+1, K-I+1, A( I, I ), LDA, TAU( I ),
+ $ WORK(LBWORK*NB+NT*NT+1),IINFO )
+
+ ELSE
+*
+* Use unblocked code to factor the last or only block.
+*
+ CALL CGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ),
+ $ WORK,IINFO )
+
+ END IF
+ END IF
+
+
+*
+* Apply update to the column M+1:N when N > M
+*
+ IF ( M.LT.N .AND. I.NE.1) THEN
+*
+* Form the last triangular factor of the block reflector
+* H = H(i) H(i+1) . . . H(i+ib-1)
+*
+ IF ( NT .LE. NB ) THEN
+ CALL CLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
+ $ A( I, I ), LDA, TAU( I ), WORK(I), LBWORK )
+ ELSE
+ CALL CLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
+ $ A( I, I ), LDA, TAU( I ),
+ $ WORK(LBWORK*NB+1), NT )
+ END IF
+
+*
+* Apply H' to A(1:M,M+1:N) from the left
+*
+ DO 40 J = 1, K-NX, NB
+
+ IB = MIN( K-J+1, NB )
+
+ CALL CLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, N-M, IB,
+ $ A( J, J ), LDA, WORK(J), LBWORK,
+ $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ N-M)
+
+40 CONTINUE
+
+ IF ( NT.LE.NB ) THEN
+ CALL CLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, N-M, K-J+1,
+ $ A( J, J ), LDA, WORK(J), LBWORK,
+ $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ N-M)
+ ELSE
+ CALL CLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, N-M, K-J+1,
+ $ A( J, J ), LDA,
+ $ WORK(LBWORK*NB+1),
+ $ NT, A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ N-M)
+ END IF
+
+ END IF
+
+ WORK( 1 ) = IWS
+ RETURN
+*
+* End of CGEQRF
+*
+ END
diff --git a/SRC/VARIANTS/qr/LL/dgeqrf.f b/SRC/VARIANTS/qr/LL/dgeqrf.f
new file mode 100644
index 00000000..728ea1b8
--- /dev/null
+++ b/SRC/VARIANTS/qr/LL/dgeqrf.f
@@ -0,0 +1,344 @@
+ SUBROUTINE DGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ INTEGER INFO, LDA, LWORK, M, N
+* ..
+* .. Array Arguments ..
+ DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
+* ..
+*
+* Purpose
+* =======
+*
+* DGEQRF computes a QR factorization of a real M-by-N matrix A:
+* A = Q * R.
+*
+* This is the left-looking Level 3 BLAS version of the algorithm.
+*
+* Arguments
+* =========
+*
+* M (input) INTEGER
+* The number of rows of the matrix A. M >= 0.
+*
+* N (input) INTEGER
+* The number of columns of the matrix A. N >= 0.
+*
+* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+* On entry, the M-by-N matrix A.
+* On exit, the elements on and above the diagonal of the array
+* contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+* upper triangular if m >= n); the elements below the diagonal,
+* with the array TAU, represent the orthogonal matrix Q as a
+* product of min(m,n) elementary reflectors (see Further
+* Details).
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,M).
+*
+* TAU (output) DOUBLE PRECISION array, dimension (min(M,N))
+* The scalar factors of the elementary reflectors (see Further
+* Details).
+*
+* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
+* On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+* LWORK (input) INTEGER
+*
+* The dimension of the array WORK. The dimension can be divided into three parts.
+*
+* 1) The part for the triangular factor T. If the very last T is not bigger
+* than any of the rest, then this part is NB x ceiling(K/NB), otherwise,
+* NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T
+*
+* 2) The part for the very last T when T is bigger than any of the rest T.
+* The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB,
+* where K = min(M,N), NX is calculated by
+* NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) )
+*
+* 3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB)
+*
+* So LWORK = part1 + part2 + part3
+*
+* If LWORK = -1, then a workspace query is assumed; the routine
+* only calculates the optimal size of the WORK array, returns
+* this value as the first entry of the WORK array, and no error
+* message related to LWORK is issued by XERBLA.
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+*
+* Further Details
+* ===============
+*
+* The matrix Q is represented as a product of elementary reflectors
+*
+* Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+* Each H(i) has the form
+*
+* H(i) = I - tau * v * v'
+*
+* where tau is a real scalar, and v is a real vector with
+* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+* and tau in TAU(i).
+*
+* =====================================================================
+*
+* .. Local Scalars ..
+ LOGICAL LQUERY
+ INTEGER I, IB, IINFO, IWS, J, K, LWKOPT, NB,
+ $ NBMIN, NX, LBWORK, NT, LLWORK
+* ..
+* .. External Subroutines ..
+ EXTERNAL DGEQR2, DLARFB, DLARFT, XERBLA
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN
+* ..
+* .. External Functions ..
+ INTEGER ILAENV
+ REAL SCEIL
+ EXTERNAL ILAENV, SCEIL
+* ..
+* .. Executable Statements ..
+
+ INFO = 0
+ NBMIN = 2
+ NX = 0
+ IWS = N
+ K = MIN( M, N )
+ NB = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
+
+ IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+* Determine when to cross over from blocked to unblocked code.
+*
+ NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) )
+ END IF
+*
+* Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.:
+*
+* NB=3 2NB=6 K=10
+* | | |
+* 1--2--3--4--5--6--7--8--9--10
+* | \________/
+* K-NX=5 NT=4
+*
+* So here 4 x 4 is the last T stored in the workspace
+*
+ NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
+
+*
+* optimal workspace = space for dlarfb + space for normal T's + space for the last T
+*
+ LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
+ LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
+
+ IF ( NT.GT.NB ) THEN
+
+ LBWORK = K-NT
+*
+* Optimal workspace for dlarfb = MAX(1,N)*NT
+*
+ LWKOPT = (LBWORK+LLWORK)*NB
+ WORK( 1 ) = (LWKOPT+NT*NT)
+
+ ELSE
+
+ LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
+ LWKOPT = (LBWORK+LLWORK-NB)*NB
+ WORK( 1 ) = LWKOPT
+
+ END IF
+
+*
+* Test the input arguments
+*
+ LQUERY = ( LWORK.EQ.-1 )
+ IF( M.LT.0 ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+ INFO = -4
+ ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+ INFO = -7
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'DGEQRF', -INFO )
+ RETURN
+ ELSE IF( LQUERY ) THEN
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( K.EQ.0 ) THEN
+ WORK( 1 ) = 1
+ RETURN
+ END IF
+*
+ IF( NB.GT.1 .AND. NB.LT.K ) THEN
+
+ IF( NX.LT.K ) THEN
+*
+* Determine if workspace is large enough for blocked code.
+*
+ IF ( NT.LE.NB ) THEN
+ IWS = (LBWORK+LLWORK-NB)*NB
+ ELSE
+ IWS = (LBWORK+LLWORK)*NB+NT*NT
+ END IF
+
+ IF( LWORK.LT.IWS ) THEN
+*
+* Not enough workspace to use optimal NB: reduce NB and
+* determine the minimum value of NB.
+*
+ IF ( NT.LE.NB ) THEN
+ NB = LWORK / (LLWORK+(LBWORK-NB))
+ ELSE
+ NB = (LWORK-NT*NT)/(LBWORK+LLWORK)
+ END IF
+
+ NBMIN = MAX( 2, ILAENV( 2, 'DGEQRF', ' ', M, N, -1,
+ $ -1 ) )
+ END IF
+ END IF
+ END IF
+*
+ IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+* Use blocked code initially
+*
+ DO 10 I = 1, K - NX, NB
+ IB = MIN( K-I+1, NB )
+*
+* Update the current column using old T's
+*
+ DO 20 J = 1, I - NB, NB
+*
+* Apply H' to A(J:M,I:I+IB-1) from the left
+*
+ CALL DLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, IB, NB,
+ $ A( J, J ), LDA, WORK(J), LBWORK,
+ $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ IB)
+
+20 CONTINUE
+*
+* Compute the QR factorization of the current block
+* A(I:M,I:I+IB-1)
+*
+ CALL DGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ),
+ $ WORK(LBWORK*NB+NT*NT+1), IINFO )
+
+ IF( I+IB.LE.N ) THEN
+*
+* Form the triangular factor of the block reflector
+* H = H(i) H(i+1) . . . H(i+ib-1)
+*
+ CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB,
+ $ A( I, I ), LDA, TAU( I ),
+ $ WORK(I), LBWORK )
+*
+ END IF
+ 10 CONTINUE
+ ELSE
+ I = 1
+ END IF
+*
+* Use unblocked code to factor the last or only block.
+*
+ IF( I.LE.K ) THEN
+
+ IF ( I .NE. 1 ) THEN
+
+ DO 30 J = 1, I - NB, NB
+*
+* Apply H' to A(J:M,I:K) from the left
+*
+ CALL DLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, K-I+1, NB,
+ $ A( J, J ), LDA, WORK(J), LBWORK,
+ $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ K-I+1)
+30 CONTINUE
+
+ CALL DGEQR2( M-I+1, K-I+1, A( I, I ), LDA, TAU( I ),
+ $ WORK(LBWORK*NB+NT*NT+1),IINFO )
+
+ ELSE
+*
+* Use unblocked code to factor the last or only block.
+*
+ CALL DGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ),
+ $ WORK,IINFO )
+
+ END IF
+ END IF
+
+
+*
+* Apply update to the column M+1:N when N > M
+*
+ IF ( M.LT.N .AND. I.NE.1) THEN
+*
+* Form the last triangular factor of the block reflector
+* H = H(i) H(i+1) . . . H(i+ib-1)
+*
+ IF ( NT .LE. NB ) THEN
+ CALL DLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
+ $ A( I, I ), LDA, TAU( I ), WORK(I), LBWORK )
+ ELSE
+ CALL DLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
+ $ A( I, I ), LDA, TAU( I ),
+ $ WORK(LBWORK*NB+1), NT )
+ END IF
+
+*
+* Apply H' to A(1:M,M+1:N) from the left
+*
+ DO 40 J = 1, K-NX, NB
+
+ IB = MIN( K-J+1, NB )
+
+ CALL DLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, N-M, IB,
+ $ A( J, J ), LDA, WORK(J), LBWORK,
+ $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ N-M)
+
+40 CONTINUE
+
+ IF ( NT.LE.NB ) THEN
+ CALL DLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, N-M, K-J+1,
+ $ A( J, J ), LDA, WORK(J), LBWORK,
+ $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ N-M)
+ ELSE
+ CALL DLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, N-M, K-J+1,
+ $ A( J, J ), LDA,
+ $ WORK(LBWORK*NB+1),
+ $ NT, A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ N-M)
+ END IF
+
+ END IF
+
+ WORK( 1 ) = IWS
+ RETURN
+*
+* End of DGEQRF
+*
+ END
+
diff --git a/SRC/VARIANTS/qr/LL/sceil.f b/SRC/VARIANTS/qr/LL/sceil.f
new file mode 100644
index 00000000..70c5c5cf
--- /dev/null
+++ b/SRC/VARIANTS/qr/LL/sceil.f
@@ -0,0 +1,28 @@
+ REAL FUNCTION SCEIL( A )
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* June 2008
+*
+* .. Scalar Arguments ..*
+ REAL A
+* ..
+*
+* =====================================================================
+*
+* .. Intrinsic Functions ..
+ INTRINSIC INT
+* ..
+* .. Executable Statements ..*
+*
+ IF (A-INT(A).EQ.0) THEN
+ SCEIL = A
+ ELSE IF (A.GT.0) THEN
+ SCEIL = INT(A)+1;
+ ELSE
+ SCEIL = INT(A)
+ END IF
+
+ RETURN
+*
+ END
diff --git a/SRC/VARIANTS/qr/LL/sgeqrf.f b/SRC/VARIANTS/qr/LL/sgeqrf.f
new file mode 100644
index 00000000..fef6379b
--- /dev/null
+++ b/SRC/VARIANTS/qr/LL/sgeqrf.f
@@ -0,0 +1,343 @@
+ SUBROUTINE SGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ INTEGER INFO, LDA, LWORK, M, N
+* ..
+* .. Array Arguments ..
+ REAL A( LDA, * ), TAU( * ), WORK( * )
+* ..
+*
+* Purpose
+* =======
+*
+* SGEQRF computes a QR factorization of a real M-by-N matrix A:
+* A = Q * R.
+*
+* This is the left-looking Level 3 BLAS version of the algorithm.
+*
+* Arguments
+* =========
+*
+* M (input) INTEGER
+* The number of rows of the matrix A. M >= 0.
+*
+* N (input) INTEGER
+* The number of columns of the matrix A. N >= 0.
+*
+* A (input/output) REAL array, dimension (LDA,N)
+* On entry, the M-by-N matrix A.
+* On exit, the elements on and above the diagonal of the array
+* contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+* upper triangular if m >= n); the elements below the diagonal,
+* with the array TAU, represent the orthogonal matrix Q as a
+* product of min(m,n) elementary reflectors (see Further
+* Details).
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,M).
+*
+* TAU (output) REAL array, dimension (min(M,N))
+* The scalar factors of the elementary reflectors (see Further
+* Details).
+*
+* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK))
+* On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+* LWORK (input) INTEGER
+*
+* The dimension of the array WORK. The dimension can be divided into three parts.
+*
+* 1) The part for the triangular factor T. If the very last T is not bigger
+* than any of the rest, then this part is NB x ceiling(K/NB), otherwise,
+* NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T
+*
+* 2) The part for the very last T when T is bigger than any of the rest T.
+* The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB,
+* where K = min(M,N), NX is calculated by
+* NX = MAX( 0, ILAENV( 3, 'SGEQRF', ' ', M, N, -1, -1 ) )
+*
+* 3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB)
+*
+* So LWORK = part1 + part2 + part3
+*
+* If LWORK = -1, then a workspace query is assumed; the routine
+* only calculates the optimal size of the WORK array, returns
+* this value as the first entry of the WORK array, and no error
+* message related to LWORK is issued by XERBLA.
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+*
+* Further Details
+* ===============
+*
+* The matrix Q is represented as a product of elementary reflectors
+*
+* Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+* Each H(i) has the form
+*
+* H(i) = I - tau * v * v'
+*
+* where tau is a real scalar, and v is a real vector with
+* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+* and tau in TAU(i).
+*
+* =====================================================================
+*
+* .. Local Scalars ..
+ LOGICAL LQUERY
+ INTEGER I, IB, IINFO, IWS, J, K, LWKOPT, NB,
+ $ NBMIN, NX, LBWORK, NT, LLWORK
+* ..
+* .. External Subroutines ..
+ EXTERNAL SGEQR2, SLARFB, SLARFT, XERBLA
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN
+* ..
+* .. External Functions ..
+ INTEGER ILAENV
+ REAL SCEIL
+ EXTERNAL ILAENV, SCEIL
+* ..
+* .. Executable Statements ..
+
+ INFO = 0
+ NBMIN = 2
+ NX = 0
+ IWS = N
+ K = MIN( M, N )
+ NB = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
+
+ IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+* Determine when to cross over from blocked to unblocked code.
+*
+ NX = MAX( 0, ILAENV( 3, 'SGEQRF', ' ', M, N, -1, -1 ) )
+ END IF
+*
+* Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.:
+*
+* NB=3 2NB=6 K=10
+* | | |
+* 1--2--3--4--5--6--7--8--9--10
+* | \________/
+* K-NX=5 NT=4
+*
+* So here 4 x 4 is the last T stored in the workspace
+*
+ NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
+
+*
+* optimal workspace = space for dlarfb + space for normal T's + space for the last T
+*
+ LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
+ LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
+
+ IF ( NT.GT.NB ) THEN
+
+ LBWORK = K-NT
+*
+* Optimal workspace for dlarfb = MAX(1,N)*NT
+*
+ LWKOPT = (LBWORK+LLWORK)*NB
+ WORK( 1 ) = (LWKOPT+NT*NT)
+
+ ELSE
+
+ LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
+ LWKOPT = (LBWORK+LLWORK-NB)*NB
+ WORK( 1 ) = LWKOPT
+
+ END IF
+
+*
+* Test the input arguments
+*
+ LQUERY = ( LWORK.EQ.-1 )
+ IF( M.LT.0 ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+ INFO = -4
+ ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+ INFO = -7
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'SGEQRF', -INFO )
+ RETURN
+ ELSE IF( LQUERY ) THEN
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( K.EQ.0 ) THEN
+ WORK( 1 ) = 1
+ RETURN
+ END IF
+*
+ IF( NB.GT.1 .AND. NB.LT.K ) THEN
+
+ IF( NX.LT.K ) THEN
+*
+* Determine if workspace is large enough for blocked code.
+*
+ IF ( NT.LE.NB ) THEN
+ IWS = (LBWORK+LLWORK-NB)*NB
+ ELSE
+ IWS = (LBWORK+LLWORK)*NB+NT*NT
+ END IF
+
+ IF( LWORK.LT.IWS ) THEN
+*
+* Not enough workspace to use optimal NB: reduce NB and
+* determine the minimum value of NB.
+*
+ IF ( NT.LE.NB ) THEN
+ NB = LWORK / (LLWORK+(LBWORK-NB))
+ ELSE
+ NB = (LWORK-NT*NT)/(LBWORK+LLWORK)
+ END IF
+
+ NBMIN = MAX( 2, ILAENV( 2, 'SGEQRF', ' ', M, N, -1,
+ $ -1 ) )
+ END IF
+ END IF
+ END IF
+*
+ IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+* Use blocked code initially
+*
+ DO 10 I = 1, K - NX, NB
+ IB = MIN( K-I+1, NB )
+*
+* Update the current column using old T's
+*
+ DO 20 J = 1, I - NB, NB
+*
+* Apply H' to A(J:M,I:I+IB-1) from the left
+*
+ CALL SLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, IB, NB,
+ $ A( J, J ), LDA, WORK(J), LBWORK,
+ $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ IB)
+
+20 CONTINUE
+*
+* Compute the QR factorization of the current block
+* A(I:M,I:I+IB-1)
+*
+ CALL SGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ),
+ $ WORK(LBWORK*NB+NT*NT+1), IINFO )
+
+ IF( I+IB.LE.N ) THEN
+*
+* Form the triangular factor of the block reflector
+* H = H(i) H(i+1) . . . H(i+ib-1)
+*
+ CALL SLARFT( 'Forward', 'Columnwise', M-I+1, IB,
+ $ A( I, I ), LDA, TAU( I ),
+ $ WORK(I), LBWORK )
+*
+ END IF
+ 10 CONTINUE
+ ELSE
+ I = 1
+ END IF
+*
+* Use unblocked code to factor the last or only block.
+*
+ IF( I.LE.K ) THEN
+
+ IF ( I .NE. 1 ) THEN
+
+ DO 30 J = 1, I - NB, NB
+*
+* Apply H' to A(J:M,I:K) from the left
+*
+ CALL SLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, K-I+1, NB,
+ $ A( J, J ), LDA, WORK(J), LBWORK,
+ $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ K-I+1)
+30 CONTINUE
+
+ CALL SGEQR2( M-I+1, K-I+1, A( I, I ), LDA, TAU( I ),
+ $ WORK(LBWORK*NB+NT*NT+1),IINFO )
+
+ ELSE
+*
+* Use unblocked code to factor the last or only block.
+*
+ CALL SGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ),
+ $ WORK,IINFO )
+
+ END IF
+ END IF
+
+
+*
+* Apply update to the column M+1:N when N > M
+*
+ IF ( M.LT.N .AND. I.NE.1) THEN
+*
+* Form the last triangular factor of the block reflector
+* H = H(i) H(i+1) . . . H(i+ib-1)
+*
+ IF ( NT .LE. NB ) THEN
+ CALL SLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
+ $ A( I, I ), LDA, TAU( I ), WORK(I), LBWORK )
+ ELSE
+ CALL SLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
+ $ A( I, I ), LDA, TAU( I ),
+ $ WORK(LBWORK*NB+1), NT )
+ END IF
+
+*
+* Apply H' to A(1:M,M+1:N) from the left
+*
+ DO 40 J = 1, K-NX, NB
+
+ IB = MIN( K-J+1, NB )
+
+ CALL SLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, N-M, IB,
+ $ A( J, J ), LDA, WORK(J), LBWORK,
+ $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ N-M)
+
+40 CONTINUE
+
+ IF ( NT.LE.NB ) THEN
+ CALL SLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, N-M, K-J+1,
+ $ A( J, J ), LDA, WORK(J), LBWORK,
+ $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ N-M)
+ ELSE
+ CALL SLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, N-M, K-J+1,
+ $ A( J, J ), LDA,
+ $ WORK(LBWORK*NB+1),
+ $ NT, A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ N-M)
+ END IF
+
+ END IF
+
+ WORK( 1 ) = IWS
+ RETURN
+*
+* End of SGEQRF
+*
+ END
diff --git a/SRC/VARIANTS/qr/LL/zgeqrf.f b/SRC/VARIANTS/qr/LL/zgeqrf.f
new file mode 100644
index 00000000..cf5e093e
--- /dev/null
+++ b/SRC/VARIANTS/qr/LL/zgeqrf.f
@@ -0,0 +1,343 @@
+ SUBROUTINE ZGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+* -- LAPACK routine (version 3.1) --
+* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+* March 2008
+*
+* .. Scalar Arguments ..
+ INTEGER INFO, LDA, LWORK, M, N
+* ..
+* .. Array Arguments ..
+ COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * )
+* ..
+*
+* Purpose
+* =======
+*
+* ZGEQRF computes a QR factorization of a real M-by-N matrix A:
+* A = Q * R.
+*
+* This is the left-looking Level 3 BLAS version of the algorithm.
+*
+* Arguments
+* =========
+*
+* M (input) INTEGER
+* The number of rows of the matrix A. M >= 0.
+*
+* N (input) INTEGER
+* The number of columns of the matrix A. N >= 0.
+*
+* A (input/output) COMPLEX*16 array, dimension (LDA,N)
+* On entry, the M-by-N matrix A.
+* On exit, the elements on and above the diagonal of the array
+* contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+* upper triangular if m >= n); the elements below the diagonal,
+* with the array TAU, represent the orthogonal matrix Q as a
+* product of min(m,n) elementary reflectors (see Further
+* Details).
+*
+* LDA (input) INTEGER
+* The leading dimension of the array A. LDA >= max(1,M).
+*
+* TAU (output) COMPLEX*16 array, dimension (min(M,N))
+* The scalar factors of the elementary reflectors (see Further
+* Details).
+*
+* WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
+* On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+* LWORK (input) INTEGER
+*
+* The dimension of the array WORK. The dimension can be divided into three parts.
+*
+* 1) The part for the triangular factor T. If the very last T is not bigger
+* than any of the rest, then this part is NB x ceiling(K/NB), otherwise,
+* NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T
+*
+* 2) The part for the very last T when T is bigger than any of the rest T.
+* The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB,
+* where K = min(M,N), NX is calculated by
+* NX = MAX( 0, ILAENV( 3, 'ZGEQRF', ' ', M, N, -1, -1 ) )
+*
+* 3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB)
+*
+* So LWORK = part1 + part2 + part3
+*
+* If LWORK = -1, then a workspace query is assumed; the routine
+* only calculates the optimal size of the WORK array, returns
+* this value as the first entry of the WORK array, and no error
+* message related to LWORK is issued by XERBLA.
+*
+* INFO (output) INTEGER
+* = 0: successful exit
+* < 0: if INFO = -i, the i-th argument had an illegal value
+*
+* Further Details
+* ===============
+*
+* The matrix Q is represented as a product of elementary reflectors
+*
+* Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+* Each H(i) has the form
+*
+* H(i) = I - tau * v * v'
+*
+* where tau is a real scalar, and v is a real vector with
+* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+* and tau in TAU(i).
+*
+* =====================================================================
+*
+* .. Local Scalars ..
+ LOGICAL LQUERY
+ INTEGER I, IB, IINFO, IWS, J, K, LWKOPT, NB,
+ $ NBMIN, NX, LBWORK, NT, LLWORK
+* ..
+* .. External Subroutines ..
+ EXTERNAL ZGEQR2, ZLARFB, ZLARFT, XERBLA
+* ..
+* .. Intrinsic Functions ..
+ INTRINSIC MAX, MIN
+* ..
+* .. External Functions ..
+ INTEGER ILAENV
+ REAL SCEIL
+ EXTERNAL ILAENV, SCEIL
+* ..
+* .. Executable Statements ..
+
+ INFO = 0
+ NBMIN = 2
+ NX = 0
+ IWS = N
+ K = MIN( M, N )
+ NB = ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 )
+
+ IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+* Determine when to cross over from blocked to unblocked code.
+*
+ NX = MAX( 0, ILAENV( 3, 'ZGEQRF', ' ', M, N, -1, -1 ) )
+ END IF
+*
+* Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.:
+*
+* NB=3 2NB=6 K=10
+* | | |
+* 1--2--3--4--5--6--7--8--9--10
+* | \________/
+* K-NX=5 NT=4
+*
+* So here 4 x 4 is the last T stored in the workspace
+*
+ NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
+
+*
+* optimal workspace = space for dlarfb + space for normal T's + space for the last T
+*
+ LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
+ LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
+
+ IF ( NT.GT.NB ) THEN
+
+ LBWORK = K-NT
+*
+* Optimal workspace for dlarfb = MAX(1,N)*NT
+*
+ LWKOPT = (LBWORK+LLWORK)*NB
+ WORK( 1 ) = (LWKOPT+NT*NT)
+
+ ELSE
+
+ LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
+ LWKOPT = (LBWORK+LLWORK-NB)*NB
+ WORK( 1 ) = LWKOPT
+
+ END IF
+
+*
+* Test the input arguments
+*
+ LQUERY = ( LWORK.EQ.-1 )
+ IF( M.LT.0 ) THEN
+ INFO = -1
+ ELSE IF( N.LT.0 ) THEN
+ INFO = -2
+ ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+ INFO = -4
+ ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+ INFO = -7
+ END IF
+ IF( INFO.NE.0 ) THEN
+ CALL XERBLA( 'ZGEQRF', -INFO )
+ RETURN
+ ELSE IF( LQUERY ) THEN
+ RETURN
+ END IF
+*
+* Quick return if possible
+*
+ IF( K.EQ.0 ) THEN
+ WORK( 1 ) = 1
+ RETURN
+ END IF
+*
+ IF( NB.GT.1 .AND. NB.LT.K ) THEN
+
+ IF( NX.LT.K ) THEN
+*
+* Determine if workspace is large enough for blocked code.
+*
+ IF ( NT.LE.NB ) THEN
+ IWS = (LBWORK+LLWORK-NB)*NB
+ ELSE
+ IWS = (LBWORK+LLWORK)*NB+NT*NT
+ END IF
+
+ IF( LWORK.LT.IWS ) THEN
+*
+* Not enough workspace to use optimal NB: reduce NB and
+* determine the minimum value of NB.
+*
+ IF ( NT.LE.NB ) THEN
+ NB = LWORK / (LLWORK+(LBWORK-NB))
+ ELSE
+ NB = (LWORK-NT*NT)/(LBWORK+LLWORK)
+ END IF
+
+ NBMIN = MAX( 2, ILAENV( 2, 'ZGEQRF', ' ', M, N, -1,
+ $ -1 ) )
+ END IF
+ END IF
+ END IF
+*
+ IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+* Use blocked code initially
+*
+ DO 10 I = 1, K - NX, NB
+ IB = MIN( K-I+1, NB )
+*
+* Update the current column using old T's
+*
+ DO 20 J = 1, I - NB, NB
+*
+* Apply H' to A(J:M,I:I+IB-1) from the left
+*
+ CALL ZLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, IB, NB,
+ $ A( J, J ), LDA, WORK(J), LBWORK,
+ $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ IB)
+
+20 CONTINUE
+*
+* Compute the QR factorization of the current block
+* A(I:M,I:I+IB-1)
+*
+ CALL ZGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ),
+ $ WORK(LBWORK*NB+NT*NT+1), IINFO )
+
+ IF( I+IB.LE.N ) THEN
+*
+* Form the triangular factor of the block reflector
+* H = H(i) H(i+1) . . . H(i+ib-1)
+*
+ CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, IB,
+ $ A( I, I ), LDA, TAU( I ),
+ $ WORK(I), LBWORK )
+*
+ END IF
+ 10 CONTINUE
+ ELSE
+ I = 1
+ END IF
+*
+* Use unblocked code to factor the last or only block.
+*
+ IF( I.LE.K ) THEN
+
+ IF ( I .NE. 1 ) THEN
+
+ DO 30 J = 1, I - NB, NB
+*
+* Apply H' to A(J:M,I:K) from the left
+*
+ CALL ZLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, K-I+1, NB,
+ $ A( J, J ), LDA, WORK(J), LBWORK,
+ $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ K-I+1)
+30 CONTINUE
+
+ CALL ZGEQR2( M-I+1, K-I+1, A( I, I ), LDA, TAU( I ),
+ $ WORK(LBWORK*NB+NT*NT+1),IINFO )
+
+ ELSE
+*
+* Use unblocked code to factor the last or only block.
+*
+ CALL ZGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ),
+ $ WORK,IINFO )
+
+ END IF
+ END IF
+
+
+*
+* Apply update to the column M+1:N when N > M
+*
+ IF ( M.LT.N .AND. I.NE.1) THEN
+*
+* Form the last triangular factor of the block reflector
+* H = H(i) H(i+1) . . . H(i+ib-1)
+*
+ IF ( NT .LE. NB ) THEN
+ CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
+ $ A( I, I ), LDA, TAU( I ), WORK(I), LBWORK )
+ ELSE
+ CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
+ $ A( I, I ), LDA, TAU( I ),
+ $ WORK(LBWORK*NB+1), NT )
+ END IF
+
+*
+* Apply H' to A(1:M,M+1:N) from the left
+*
+ DO 40 J = 1, K-NX, NB
+
+ IB = MIN( K-J+1, NB )
+
+ CALL ZLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, N-M, IB,
+ $ A( J, J ), LDA, WORK(J), LBWORK,
+ $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ N-M)
+
+40 CONTINUE
+
+ IF ( NT.LE.NB ) THEN
+ CALL ZLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, N-M, K-J+1,
+ $ A( J, J ), LDA, WORK(J), LBWORK,
+ $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ N-M)
+ ELSE
+ CALL ZLARFB( 'Left', 'Transpose', 'Forward',
+ $ 'Columnwise', M-J+1, N-M, K-J+1,
+ $ A( J, J ), LDA,
+ $ WORK(LBWORK*NB+1),
+ $ NT, A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+ $ N-M)
+ END IF
+
+ END IF
+
+ WORK( 1 ) = IWS
+ RETURN
+*
+* End of ZGEQRF
+*
+ END