Move LAPACK trunk into position.

author: jason <jason@8a072113-8704-0410-8d35-dd094bca7971> 2008-10-28 01:38:50 +0000
committer: jason <jason@8a072113-8704-0410-8d35-dd094bca7971> 2008-10-28 01:38:50 +0000
commit: baba851215b44ac3b60b9248eb02bcce7eb76247 (patch)
tree: 8c0f5c006875532a30d4409f5e94b0f310ff00a7 /SRC/VARIANTS
27 files changed, 5330 insertions, 0 deletions
diff --git a/SRC/VARIANTS/Makefile b/SRC/VARIANTS/Makefile
new file mode 100644
index 00000000..42446eb5
--- /dev/null
+++ b/SRC/VARIANTS/Makefile
@@ -0,0 +1,67 @@
+include ../../make.inc
+
+#######################################################################
+#  This is the makefile to create a the variants libraries for LAPACK.
+#  The files are organized as follows:
+#       CHOLRL -- Right looking block version of the algorithm, calling Level 3 BLAS
+#       CHOLTOP -- Top looking block version of the algorithm, calling Level 3 BLAS
+#       LUCR -- Crout Level 3 BLAS version of LU factorization
+#       LULL -- left-looking Level 3 BLAS version of LU factorization
+#       QRLL -- left-looking Level 3 BLAS version of QR factorization
+#       LUREC -- an iterative version of Sivan Toledo's recursive LU algorithm[1].  
+#       For square matrices, this iterative versions should
+#       be within a factor of two of the optimum number of memory transfers.
+#
+# [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with
+#  Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
+#  1065-1081. http://dx.doi.org/10.1137/S0895479896297744
+#######################################################################
+
+VARIANTSDIR=LIB
+
+CHOLRL = cholesky/RL/cpotrf.o cholesky/RL/dpotrf.o cholesky/RL/spotrf.o cholesky/RL/zpotrf.o
+
+CHOLTOP = cholesky/TOP/cpotrf.o cholesky/TOP/dpotrf.o cholesky/TOP/spotrf.o cholesky/TOP/zpotrf.o
+
+LUCR = lu/CR/cgetrf.o lu/CR/dgetrf.o lu/CR/sgetrf.o lu/CR/zgetrf.o
+
+LULL = lu/LL/cgetrf.o lu/LL/dgetrf.o lu/LL/sgetrf.o lu/LL/zgetrf.o
+
+LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o
+
+QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o  qr/LL/sceil.o
+
+
+all: cholrl choltop lucr lull lurec qrll
+
+cholrl: $(CHOLRL)
+	$(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/cholrl.a $(CHOLRL)
+	$(RANLIB) $(VARIANTSDIR)/cholrl.a
+
+choltop: $(CHOLTOP)
+	$(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/choltop.a $(CHOLTOP)
+	$(RANLIB) $(VARIANTSDIR)/choltop.a
+
+lucr: $(LUCR)
+	$(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lucr.a $(LUCR)
+	$(RANLIB) $(VARIANTSDIR)/lucr.a
+
+lull: $(LULL)
+	$(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lull.a $(LULL)
+	$(RANLIB) $(VARIANTSDIR)/lull.a
+
+lurec: $(LUREC)
+	$(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lurec.a $(LUREC)
+	$(RANLIB) $(VARIANTSDIR)/lurec.a
+	
+qrll: $(QRLL)
+	$(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/qrll.a  $(QRLL)
+	$(RANLIB) $(VARIANTSDIR)/qrll.a
+
+
+.f.o: 
+	$(FORTRAN) $(OPTS) -c $< -o $@
+	
+clean:
+	rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL) \
+	      $(VARIANTSDIR)/*.a
+\ No newline at end of file
diff --git a/SRC/VARIANTS/README b/SRC/VARIANTS/README
new file mode 100644
index 00000000..6b4f3258
--- /dev/null
+++ b/SRC/VARIANTS/README
@@ -0,0 +1,84 @@
+   		   ===============
+ 		   = README File =
+		   ===============
+
+This README File is for the LAPACK driver variants.
+It is composed of 5 sections:
+	- Description: contents a quick description of each of the variants. For a more detailed description please refer to LAWN XXX.
+	- Build
+	- Testing
+	- Linking your program
+	- Support
+	
+Author: Julie LANGOU, May 2008
+
+===============
+= DESCRIPTION =
+===============
+
+This directory contains several variants of LAPACK routines in single/double/complex/double complex precision:
+	- [sdcz]getrf with LU Crout Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/lu/CR
+	- [sdcz]getrf with LU Left Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/lu/LL
+	- [sdcz]getrf with Sivan Toledo's recursive LU algorithm [1] - Directory: SRC/VARIANTS/lu/REC
+	- [sdcz]geqrf with QR Left Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/qr/LL
+	- [sdcz]potrf with Cholesky Right Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/RL
+	- [sdcz]potrf with Cholesky Top Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/TOP
+	
+References:For a more detailed description please refer to
+	- [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
+          1065-1081. http://dx.doi.org/10.1137/S0895479896297744
+	- [2]LAWN XXX
+	
+=========
+= BUILD =
+=========
+	
+These variants are compiled by default in the build process but they are not tested by default.
+The build process creates one new library per variants in the four arithmetics (singel/double/comple/double complex).
+The libraries are in the SRC/VARIANTS/LIB directory.
+
+Corresponding libraries created in SRC/VARIANTS/LIB:
+	- LU Crout : lucr.a
+	- LU Left Looking : lull.a
+	- LU Sivan Toledo's recursive : lurec.a
+	- QR Left Looking : qrll.a
+	- Cholesky Right Looking : cholrl.a
+	- Cholesky Top : choltop.a
+	
+
+===========
+= TESTING =
+===========
+
+To test these variants you can type 'make variants-testing'
+This will rerun the linear methods testings once per variants and append the short name of the variants to the output files.
+You should then see the following files in the TESTING directory:
+[scdz]test_cholrl.out
+[scdz]test_choltop.out
+[scdz]test_lucr.out
+[scdz]test_lull.out
+[scdz]test_lurec.out
+[scdz]test_qrll.out
+
+========================
+= LINKING YOUR PROGRAM =
+========================
+
+You just need to add the variants methods library in your linking sequence before your lapack libary.
+Here is a quick example for LU
+
+Default using LU Right Looking version:
+ $(FORTRAN) -c myprog.f
+ $(FORTRAN) -o myexe myprog.o $(LAPACKLIB) $(BLASLIB)
+
+Using LU Left Looking version:
+ $(FORTRAN) -c myprog.f
+ $(FORTRAN) -o myexe myprog.o $(PATH TO LAPACK/SRC/VARIANTS/LIB)/lull.a $(LAPACKLIB) $(BLASLIB)
+
+===========
+= SUPPORT =
+===========
+
+You can use either LAPACK forum or the LAPACK mailing list to get support.
+LAPACK forum : http://icl.cs.utk.edu/lapack-forum
+LAPACK mailing list : lapack@cs.utk.edu
diff --git a/SRC/VARIANTS/cholesky/RL/cpotrf.f b/SRC/VARIANTS/cholesky/RL/cpotrf.f
new file mode 100644
index 00000000..a6d194cb
--- /dev/null
+++ b/SRC/VARIANTS/cholesky/RL/cpotrf.f
@@ -0,0 +1,187 @@
+      SUBROUTINE CPOTRF ( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPOTRF computes the Cholesky factorization of a real Hermitian
+*  positive definite matrix A.
+*
+*  The factorization has the form
+*     A = U**H * U,  if UPLO = 'U', or
+*     A = L  * L**H,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  This is the right looking block version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      COMPLEX            CONE
+      PARAMETER          ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JB, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMM, CPOTF2, CHERK, CTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPOTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'CPOTRF', UPLO, N, -1, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code.
+*
+         CALL CPOTF2( UPLO, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         IF( UPPER ) THEN
+*
+*           Compute the Cholesky factorization A = U'*U.
+*
+            DO 10 J = 1, N, NB
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               JB = MIN( NB, N-J+1 )
+
+               CALL CPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+
+               IF( INFO.NE.0 )
+     $            GO TO 30
+
+               IF( J+JB.LE.N ) THEN
+*
+*                 Updating the trailing submatrix.
+*
+                  CALL CTRSM( 'Left', 'Upper', 'Conjugate Transpose', 
+     $                        'Non-unit', JB, N-J-JB+1, CONE, A( J, J ),
+     $                        LDA, A( J, J+JB ), LDA )
+                  CALL CHERK( 'Upper', 'Conjugate transpose', N-J-JB+1, 
+     $                        JB, -ONE, A( J, J+JB ), LDA, 
+     $                        ONE, A( J+JB, J+JB ), LDA )
+               END IF
+   10       CONTINUE
+*
+         ELSE
+*
+*           Compute the Cholesky factorization A = L*L'.
+*
+            DO 20 J = 1, N, NB
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               JB = MIN( NB, N-J+1 )
+
+               CALL CPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+
+               IF( INFO.NE.0 )
+     $            GO TO 30
+
+               IF( J+JB.LE.N ) THEN
+*
+*                Updating the trailing submatrix.
+*
+                 CALL CTRSM( 'Right', 'Lower', 'Conjugate Transpose', 
+     $                       'Non-unit', N-J-JB+1, JB, CONE, A( J, J ),
+     $                       LDA, A( J+JB, J ), LDA )
+
+                 CALL CHERK( 'Lower', 'No Transpose', N-J-JB+1, JB, 
+     $                       -ONE, A( J+JB, J ), LDA, 
+     $                       ONE, A( J+JB, J+JB ), LDA )
+               END IF
+   20       CONTINUE
+         END IF
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = INFO + J - 1
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of CPOTRF
+*
+      END
diff --git a/SRC/VARIANTS/cholesky/RL/dpotrf.f b/SRC/VARIANTS/cholesky/RL/dpotrf.f
new file mode 100644
index 00000000..72603304
--- /dev/null
+++ b/SRC/VARIANTS/cholesky/RL/dpotrf.f
@@ -0,0 +1,186 @@
+      SUBROUTINE DPOTRF ( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPOTRF computes the Cholesky factorization of a real symmetric
+*  positive definite matrix A.
+*
+*  The factorization has the form
+*     A = U**T * U,  if UPLO = 'U', or
+*     A = L  * L**T,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  This is the right looking block version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JB, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DPOTF2, DSYRK, DTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPOTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'DPOTRF', UPLO, N, -1, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code.
+*
+         CALL DPOTF2( UPLO, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         IF( UPPER ) THEN
+*
+*           Compute the Cholesky factorization A = U'*U.
+*
+            DO 10 J = 1, N, NB
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               JB = MIN( NB, N-J+1 )
+
+               CALL DPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+
+               IF( INFO.NE.0 )
+     $            GO TO 30
+
+               IF( J+JB.LE.N ) THEN
+*
+*                 Updating the trailing submatrix.
+*
+                  CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit',
+     $                        JB, N-J-JB+1, ONE, A( J, J ), LDA,
+     $                        A( J, J+JB ), LDA )
+                  CALL DSYRK( 'Upper', 'Transpose', N-J-JB+1, JB, -ONE,
+     $                        A( J, J+JB ), LDA, 
+     $                        ONE, A( J+JB, J+JB ), LDA )
+               END IF
+   10       CONTINUE
+*
+         ELSE
+*
+*           Compute the Cholesky factorization A = L*L'.
+*
+            DO 20 J = 1, N, NB
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               JB = MIN( NB, N-J+1 )
+
+               CALL DPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+
+               IF( INFO.NE.0 )
+     $            GO TO 30
+
+               IF( J+JB.LE.N ) THEN
+*
+*                Updating the trailing submatrix.
+*
+                 CALL DTRSM( 'Right', 'Lower', 'Transpose', 'Non-unit',
+     $                       N-J-JB+1, JB, ONE, A( J, J ), LDA,
+     $                       A( J+JB, J ), LDA )
+
+                 CALL DSYRK( 'Lower', 'No Transpose', N-J-JB+1, JB, 
+     $                       -ONE, A( J+JB, J ), LDA, 
+     $                       ONE, A( J+JB, J+JB ), LDA )
+               END IF
+   20       CONTINUE
+         END IF
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = INFO + J - 1
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of DPOTRF
+*
+      END
diff --git a/SRC/VARIANTS/cholesky/RL/spotrf.f b/SRC/VARIANTS/cholesky/RL/spotrf.f
new file mode 100644
index 00000000..3375902a
--- /dev/null
+++ b/SRC/VARIANTS/cholesky/RL/spotrf.f
@@ -0,0 +1,186 @@
+      SUBROUTINE SPOTRF ( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPOTRF computes the Cholesky factorization of a real symmetric
+*  positive definite matrix A.
+*
+*  The factorization has the form
+*     A = U**T * U,  if UPLO = 'U', or
+*     A = L  * L**T,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  This is the right looking block version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JB, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM, SPOTF2, SSYRK, STRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPOTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'SPOTRF', UPLO, N, -1, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code.
+*
+         CALL SPOTF2( UPLO, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         IF( UPPER ) THEN
+*
+*           Compute the Cholesky factorization A = U'*U.
+*
+            DO 10 J = 1, N, NB
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               JB = MIN( NB, N-J+1 )
+
+               CALL SPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+
+               IF( INFO.NE.0 )
+     $            GO TO 30
+
+               IF( J+JB.LE.N ) THEN
+*
+*                 Updating the trailing submatrix.
+*
+                  CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit',
+     $                        JB, N-J-JB+1, ONE, A( J, J ), LDA,
+     $                        A( J, J+JB ), LDA )
+                  CALL SSYRK( 'Upper', 'Transpose', N-J-JB+1, JB, -ONE,
+     $                        A( J, J+JB ), LDA, 
+     $                        ONE, A( J+JB, J+JB ), LDA )
+               END IF
+   10       CONTINUE
+*
+         ELSE
+*
+*           Compute the Cholesky factorization A = L*L'.
+*
+            DO 20 J = 1, N, NB
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               JB = MIN( NB, N-J+1 )
+
+               CALL SPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+
+               IF( INFO.NE.0 )
+     $            GO TO 30
+
+               IF( J+JB.LE.N ) THEN
+*
+*                Updating the trailing submatrix.
+*
+                 CALL STRSM( 'Right', 'Lower', 'Transpose', 'Non-unit',
+     $                       N-J-JB+1, JB, ONE, A( J, J ), LDA,
+     $                       A( J+JB, J ), LDA )
+
+                 CALL SSYRK( 'Lower', 'No Transpose', N-J-JB+1, JB, 
+     $                       -ONE, A( J+JB, J ), LDA, 
+     $                       ONE, A( J+JB, J+JB ), LDA )
+               END IF
+   20       CONTINUE
+         END IF
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = INFO + J - 1
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of SPOTRF
+*
+      END
diff --git a/SRC/VARIANTS/cholesky/RL/zpotrf.f b/SRC/VARIANTS/cholesky/RL/zpotrf.f
new file mode 100644
index 00000000..b2bce7f6
--- /dev/null
+++ b/SRC/VARIANTS/cholesky/RL/zpotrf.f
@@ -0,0 +1,187 @@
+      SUBROUTINE ZPOTRF ( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPOTRF computes the Cholesky factorization of a real Hermitian
+*  positive definite matrix A.
+*
+*  The factorization has the form
+*     A = U**H * U,  if UPLO = 'U', or
+*     A = L  * L**H,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  This is the right looking block version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      COMPLEX*16         CONE
+      PARAMETER          ( ONE = 1.0D+0, CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JB, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZGEMM, ZPOTF2, ZHERK, ZTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPOTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'ZPOTRF', UPLO, N, -1, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code.
+*
+         CALL ZPOTF2( UPLO, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         IF( UPPER ) THEN
+*
+*           Compute the Cholesky factorization A = U'*U.
+*
+            DO 10 J = 1, N, NB
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               JB = MIN( NB, N-J+1 )
+
+               CALL ZPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+
+               IF( INFO.NE.0 )
+     $            GO TO 30
+
+               IF( J+JB.LE.N ) THEN
+*
+*                 Updating the trailing submatrix.
+*
+                  CALL ZTRSM( 'Left', 'Upper', 'Conjugate Transpose', 
+     $                        'Non-unit', JB, N-J-JB+1, CONE, A( J, J ),
+     $                        LDA, A( J, J+JB ), LDA )
+                  CALL ZHERK( 'Upper', 'Conjugate transpose', N-J-JB+1, 
+     $                        JB, -ONE, A( J, J+JB ), LDA, 
+     $                        ONE, A( J+JB, J+JB ), LDA )
+               END IF
+   10       CONTINUE
+*
+         ELSE
+*
+*           Compute the Cholesky factorization A = L*L'.
+*
+            DO 20 J = 1, N, NB
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               JB = MIN( NB, N-J+1 )
+
+               CALL ZPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+
+               IF( INFO.NE.0 )
+     $            GO TO 30
+
+               IF( J+JB.LE.N ) THEN
+*
+*                Updating the trailing submatrix.
+*
+                 CALL ZTRSM( 'Right', 'Lower', 'Conjugate Transpose', 
+     $                       'Non-unit', N-J-JB+1, JB, CONE, A( J, J ),
+     $                       LDA, A( J+JB, J ), LDA )
+
+                 CALL ZHERK( 'Lower', 'No Transpose', N-J-JB+1, JB, 
+     $                       -ONE, A( J+JB, J ), LDA, 
+     $                       ONE, A( J+JB, J+JB ), LDA )
+               END IF
+   20       CONTINUE
+         END IF
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = INFO + J - 1
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of ZPOTRF
+*
+      END
diff --git a/SRC/VARIANTS/cholesky/TOP/cpotrf.f b/SRC/VARIANTS/cholesky/TOP/cpotrf.f
new file mode 100644
index 00000000..54ae1bb9
--- /dev/null
+++ b/SRC/VARIANTS/cholesky/TOP/cpotrf.f
@@ -0,0 +1,181 @@
+      SUBROUTINE CPOTRF ( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPOTRF computes the Cholesky factorization of a real symmetric
+*  positive definite matrix A.
+*
+*  The factorization has the form
+*     A = U**H * U,  if UPLO = 'U', or
+*     A = L  * L**H,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  This is the top-looking block version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      COMPLEX            CONE
+      PARAMETER          ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JB, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMM, CPOTF2, CHERK, CTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPOTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'CPOTRF', UPLO, N, -1, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code.
+*
+         CALL CPOTF2( UPLO, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         IF( UPPER ) THEN
+*
+*           Compute the Cholesky factorization A = U'*U.
+*
+            DO 10 J = 1, N, NB
+
+               JB = MIN( NB, N-J+1 )
+*
+*              Compute the current block.
+*
+               CALL CTRSM( 'Left', 'Upper', 'Conjugate Transpose', 
+     $                      'Non-unit', J-1, JB, CONE, A( 1, 1 ), LDA,
+     $                      A( 1, J ), LDA )
+
+               CALL CHERK( 'Upper', 'Conjugate Transpose', JB, J-1, 
+     $                      -ONE, A( 1, J ), LDA, ONE, A( J, J ), LDA )
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               CALL CPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 30
+
+   10       CONTINUE
+*
+         ELSE
+*
+*           Compute the Cholesky factorization A = L*L'.
+*
+            DO 20 J = 1, N, NB
+
+               JB = MIN( NB, N-J+1 )
+*
+*              Compute the current block.
+*
+               CALL CTRSM( 'Right', 'Lower', 'Conjugate Transpose', 
+     $                     'Non-unit', JB, J-1, CONE, A( 1, 1 ), LDA,
+     $                     A( J, 1 ), LDA )
+
+               CALL CHERK( 'Lower', 'No Transpose', JB, J-1, 
+     $                     -ONE, A( J, 1 ), LDA, 
+     $                     ONE, A( J, J ), LDA )
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               CALL CPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 30
+
+   20       CONTINUE
+         END IF
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = INFO + J - 1
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of CPOTRF
+*
+      END
diff --git a/SRC/VARIANTS/cholesky/TOP/dpotrf.f b/SRC/VARIANTS/cholesky/TOP/dpotrf.f
new file mode 100644
index 00000000..8dd2c45b
--- /dev/null
+++ b/SRC/VARIANTS/cholesky/TOP/dpotrf.f
@@ -0,0 +1,182 @@
+      SUBROUTINE DPOTRF ( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPOTRF computes the Cholesky factorization of a real symmetric
+*  positive definite matrix A.
+*
+*  The factorization has the form
+*     A = U**T * U,  if UPLO = 'U', or
+*     A = L  * L**T,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  This is the top-looking block version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JB, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DPOTF2, DSYRK, DTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPOTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'DPOTRF', UPLO, N, -1, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code.
+*
+         CALL DPOTF2( UPLO, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         IF( UPPER ) THEN
+*
+*           Compute the Cholesky factorization A = U'*U.
+*
+            DO 10 J = 1, N, NB
+
+               JB = MIN( NB, N-J+1 )
+*
+*              Compute the current block.
+*
+               CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit',
+     $                      J-1, JB, ONE, A( 1, 1 ), LDA,
+     $                      A( 1, J ), LDA )
+
+               CALL DSYRK( 'Upper', 'Transpose', JB, J-1, -ONE,
+     $                      A( 1, J ), LDA, 
+     $                      ONE, A( J, J ), LDA )
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               CALL DPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 30
+
+   10       CONTINUE
+*
+         ELSE
+*
+*           Compute the Cholesky factorization A = L*L'.
+*
+            DO 20 J = 1, N, NB
+
+               JB = MIN( NB, N-J+1 )
+*
+*              Compute the current block.
+*
+               CALL DTRSM( 'Right', 'Lower', 'Transpose', 'Non-unit',
+     $                     JB, J-1, ONE, A( 1, 1 ), LDA,
+     $                     A( J, 1 ), LDA )
+
+               CALL DSYRK( 'Lower', 'No Transpose', JB, J-1, 
+     $                     -ONE, A( J, 1 ), LDA, 
+     $                     ONE, A( J, J ), LDA )
+               
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               CALL DPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 30
+
+   20       CONTINUE
+         END IF
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = INFO + J - 1
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of DPOTRF
+*
+      END
diff --git a/SRC/VARIANTS/cholesky/TOP/spotrf.f b/SRC/VARIANTS/cholesky/TOP/spotrf.f
new file mode 100644
index 00000000..08b41b48
--- /dev/null
+++ b/SRC/VARIANTS/cholesky/TOP/spotrf.f
@@ -0,0 +1,181 @@
+      SUBROUTINE SPOTRF ( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPOTRF computes the Cholesky factorization of a real symmetric
+*  positive definite matrix A.
+*
+*  The factorization has the form
+*     A = U**T * U,  if UPLO = 'U', or
+*     A = L  * L**T,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  This is the top-looking block version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JB, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM, SPOTF2, SSYRK, STRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPOTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'SPOTRF', UPLO, N, -1, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code.
+*
+         CALL SPOTF2( UPLO, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         IF( UPPER ) THEN
+*
+*           Compute the Cholesky factorization A = U'*U.
+*
+            DO 10 J = 1, N, NB
+
+               JB = MIN( NB, N-J+1 )
+*
+*              Compute the current block.
+*
+               CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit',
+     $                      J-1, JB, ONE, A( 1, 1 ), LDA,
+     $                      A( 1, J ), LDA )
+
+               CALL SSYRK( 'Upper', 'Transpose', JB, J-1, -ONE,
+     $                      A( 1, J ), LDA, 
+     $                      ONE, A( J, J ), LDA )
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               CALL SPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 30
+
+   10       CONTINUE
+*
+         ELSE
+*
+*           Compute the Cholesky factorization A = L*L'.
+*
+            DO 20 J = 1, N, NB
+
+               JB = MIN( NB, N-J+1 )
+*
+*              Compute the current block.
+*
+               CALL STRSM( 'Right', 'Lower', 'Transpose', 'Non-unit',
+     $                     JB, J-1, ONE, A( 1, 1 ), LDA,
+     $                     A( J, 1 ), LDA )
+
+               CALL SSYRK( 'Lower', 'No Transpose', JB, J-1, 
+     $                     -ONE, A( J, 1 ), LDA, 
+     $                     ONE, A( J, J ), LDA )
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               CALL SPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 30
+
+   20       CONTINUE
+         END IF
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = INFO + J - 1
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of SPOTRF
+*
+      END
diff --git a/SRC/VARIANTS/cholesky/TOP/zpotrf.f b/SRC/VARIANTS/cholesky/TOP/zpotrf.f
new file mode 100644
index 00000000..4eae3d1b
--- /dev/null
+++ b/SRC/VARIANTS/cholesky/TOP/zpotrf.f
@@ -0,0 +1,181 @@
+      SUBROUTINE ZPOTRF ( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPOTRF computes the Cholesky factorization of a real symmetric
+*  positive definite matrix A.
+*
+*  The factorization has the form
+*     A = U**H * U,  if UPLO = 'U', or
+*     A = L  * L**H,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  This is the top-looking block version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      COMPLEX*16         CONE
+      PARAMETER          ( ONE = 1.0D+0, CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JB, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZGEMM, ZPOTF2, ZHERK, ZTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPOTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'ZPOTRF', UPLO, N, -1, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code.
+*
+         CALL ZPOTF2( UPLO, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         IF( UPPER ) THEN
+*
+*           Compute the Cholesky factorization A = U'*U.
+*
+            DO 10 J = 1, N, NB
+
+               JB = MIN( NB, N-J+1 )
+*
+*              Compute the current block.
+*
+               CALL ZTRSM( 'Left', 'Upper', 'Conjugate Transpose', 
+     $                      'Non-unit', J-1, JB, CONE, A( 1, 1 ), LDA,
+     $                      A( 1, J ), LDA )
+
+               CALL ZHERK( 'Upper', 'Conjugate Transpose', JB, J-1, 
+     $                      -ONE, A( 1, J ), LDA, ONE, A( J, J ), LDA )
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               CALL ZPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 30
+
+   10       CONTINUE
+*
+         ELSE
+*
+*           Compute the Cholesky factorization A = L*L'.
+*
+            DO 20 J = 1, N, NB
+
+               JB = MIN( NB, N-J+1 )
+*
+*              Compute the current block.
+*
+               CALL ZTRSM( 'Right', 'Lower', 'Conjugate Transpose', 
+     $                     'Non-unit', JB, J-1, CONE, A( 1, 1 ), LDA,
+     $                     A( J, 1 ), LDA )
+
+               CALL ZHERK( 'Lower', 'No Transpose', JB, J-1, 
+     $                     -ONE, A( J, 1 ), LDA, 
+     $                     ONE, A( J, J ), LDA )
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               CALL ZPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 30
+
+   20       CONTINUE
+         END IF
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = INFO + J - 1
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of ZPOTRF
+*
+      END
diff --git a/SRC/VARIANTS/lu/CR/cgetrf.f b/SRC/VARIANTS/lu/CR/cgetrf.f
new file mode 100644
index 00000000..7d6403e1
--- /dev/null
+++ b/SRC/VARIANTS/lu/CR/cgetrf.f
@@ -0,0 +1,165 @@
+      SUBROUTINE CGETRF ( M, N, A, LDA, IPIV, INFO)
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGETRF computes an LU factorization of a general M-by-N matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This is the Crout Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IINFO, J, JB, NB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMM, CGETF2, CLASWP, CTRSM, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, MOD
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGETRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'CGETRF', ' ', M, N, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+*        Use unblocked code.
+*
+         CALL CGETF2( M, N, A, LDA, IPIV, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         DO 20 J = 1, MIN( M, N ), NB
+            JB = MIN( MIN( M, N )-J+1, NB )
+*
+*           Update current block.
+*
+            CALL CGEMM( 'No transpose', 'No transpose', 
+     $                 M-J+1, JB, J-1, -ONE, 
+     $                 A( J, 1 ), LDA, A( 1, J ), LDA, ONE,
+     $                 A( J, J ), LDA )
+            
+*
+*           Factor diagonal and subdiagonal blocks and test for exact
+*           singularity.
+*
+            CALL CGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+*           Adjust INFO and the pivot indices.
+*
+            IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $         INFO = IINFO + J - 1
+            DO 10 I = J, MIN( M, J+JB-1 )
+               IPIV( I ) = J - 1 + IPIV( I )
+   10       CONTINUE
+*            
+*           Apply interchanges to column 1:J-1            
+*
+            CALL CLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 )
+*
+            IF ( J+JB.LE.N ) THEN
+*            
+*              Apply interchanges to column J+JB:N            
+*
+               CALL CLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, 
+     $                     IPIV, 1 )
+*               
+               CALL CGEMM( 'No transpose', 'No transpose', 
+     $                    JB, N-J-JB+1, J-1, -ONE, 
+     $                    A( J, 1 ), LDA, A( 1, J+JB ), LDA, ONE,
+     $                    A( J, J+JB ), LDA )
+*
+*              Compute block row of U.
+*
+               CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                    JB, N-J-JB+1, ONE, A( J, J ), LDA, 
+     $                    A( J, J+JB ), LDA )
+            END IF
+
+   20    CONTINUE
+
+      END IF
+      RETURN
+*
+*     End of CGETRF
+*
+      END
diff --git a/SRC/VARIANTS/lu/CR/dgetrf.f b/SRC/VARIANTS/lu/CR/dgetrf.f
new file mode 100644
index 00000000..e1b4121e
--- /dev/null
+++ b/SRC/VARIANTS/lu/CR/dgetrf.f
@@ -0,0 +1,165 @@
+      SUBROUTINE DGETRF ( M, N, A, LDA, IPIV, INFO)
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGETRF computes an LU factorization of a general M-by-N matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This is the Crout Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IINFO, J, JB, NB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DGETF2, DLASWP, DTRSM, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, MOD
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGETRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'DGETRF', ' ', M, N, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+*        Use unblocked code.
+*
+         CALL DGETF2( M, N, A, LDA, IPIV, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         DO 20 J = 1, MIN( M, N ), NB
+            JB = MIN( MIN( M, N )-J+1, NB )
+*
+*           Update current block.
+*
+            CALL DGEMM( 'No transpose', 'No transpose', 
+     $                 M-J+1, JB, J-1, -ONE, 
+     $                 A( J, 1 ), LDA, A( 1, J ), LDA, ONE,
+     $                 A( J, J ), LDA )
+            
+*
+*           Factor diagonal and subdiagonal blocks and test for exact
+*           singularity.
+*
+            CALL DGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+*           Adjust INFO and the pivot indices.
+*
+            IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $         INFO = IINFO + J - 1
+            DO 10 I = J, MIN( M, J+JB-1 )
+               IPIV( I ) = J - 1 + IPIV( I )
+   10       CONTINUE
+*            
+*           Apply interchanges to column 1:J-1            
+*
+            CALL DLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 )
+*
+            IF ( J+JB.LE.N ) THEN
+*            
+*              Apply interchanges to column J+JB:N            
+*
+               CALL DLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, 
+     $                     IPIV, 1 )
+*               
+               CALL DGEMM( 'No transpose', 'No transpose', 
+     $                    JB, N-J-JB+1, J-1, -ONE, 
+     $                    A( J, 1 ), LDA, A( 1, J+JB ), LDA, ONE,
+     $                    A( J, J+JB ), LDA )
+*
+*              Compute block row of U.
+*
+               CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                    JB, N-J-JB+1, ONE, A( J, J ), LDA, 
+     $                    A( J, J+JB ), LDA )
+            END IF
+
+   20    CONTINUE
+
+      END IF
+      RETURN
+*
+*     End of DGETRF
+*
+      END
diff --git a/SRC/VARIANTS/lu/CR/sgetrf.f b/SRC/VARIANTS/lu/CR/sgetrf.f
new file mode 100644
index 00000000..238ec119
--- /dev/null
+++ b/SRC/VARIANTS/lu/CR/sgetrf.f
@@ -0,0 +1,165 @@
+      SUBROUTINE SGETRF ( M, N, A, LDA, IPIV, INFO)
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGETRF computes an LU factorization of a general M-by-N matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This is the Crout Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IINFO, J, JB, NB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM, SGETF2, SLASWP, STRSM, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, MOD
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGETRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'SGETRF', ' ', M, N, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+*        Use unblocked code.
+*
+         CALL SGETF2( M, N, A, LDA, IPIV, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         DO 20 J = 1, MIN( M, N ), NB
+            JB = MIN( MIN( M, N )-J+1, NB )
+*
+*           Update current block.
+*
+            CALL SGEMM( 'No transpose', 'No transpose', 
+     $                 M-J+1, JB, J-1, -ONE, 
+     $                 A( J, 1 ), LDA, A( 1, J ), LDA, ONE,
+     $                 A( J, J ), LDA )
+            
+*
+*           Factor diagonal and subdiagonal blocks and test for exact
+*           singularity.
+*
+            CALL SGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+*           Adjust INFO and the pivot indices.
+*
+            IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $         INFO = IINFO + J - 1
+            DO 10 I = J, MIN( M, J+JB-1 )
+               IPIV( I ) = J - 1 + IPIV( I )
+   10       CONTINUE
+*            
+*           Apply interchanges to column 1:J-1            
+*
+            CALL SLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 )
+*
+            IF ( J+JB.LE.N ) THEN
+*            
+*              Apply interchanges to column J+JB:N            
+*
+               CALL SLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, 
+     $                     IPIV, 1 )
+*               
+               CALL SGEMM( 'No transpose', 'No transpose', 
+     $                    JB, N-J-JB+1, J-1, -ONE, 
+     $                    A( J, 1 ), LDA, A( 1, J+JB ), LDA, ONE,
+     $                    A( J, J+JB ), LDA )
+*
+*              Compute block row of U.
+*
+               CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                    JB, N-J-JB+1, ONE, A( J, J ), LDA, 
+     $                    A( J, J+JB ), LDA )
+            END IF
+
+   20    CONTINUE
+
+      END IF
+      RETURN
+*
+*     End of SGETRF
+*
+      END
diff --git a/SRC/VARIANTS/lu/CR/zgetrf.f b/SRC/VARIANTS/lu/CR/zgetrf.f
new file mode 100644
index 00000000..2dafefbf
--- /dev/null
+++ b/SRC/VARIANTS/lu/CR/zgetrf.f
@@ -0,0 +1,165 @@
+      SUBROUTINE ZGETRF ( M, N, A, LDA, IPIV, INFO)
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGETRF computes an LU factorization of a general M-by-N matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This is the Crout Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IINFO, J, JB, NB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZGEMM, ZGETF2, ZLASWP, ZTRSM, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, MOD
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGETRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'ZGETRF', ' ', M, N, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+*        Use unblocked code.
+*
+         CALL ZGETF2( M, N, A, LDA, IPIV, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         DO 20 J = 1, MIN( M, N ), NB
+            JB = MIN( MIN( M, N )-J+1, NB )
+*
+*           Update current block.
+*
+            CALL ZGEMM( 'No transpose', 'No transpose', 
+     $                 M-J+1, JB, J-1, -ONE, 
+     $                 A( J, 1 ), LDA, A( 1, J ), LDA, ONE,
+     $                 A( J, J ), LDA )
+            
+*
+*           Factor diagonal and subdiagonal blocks and test for exact
+*           singularity.
+*
+            CALL ZGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+*           Adjust INFO and the pivot indices.
+*
+            IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $         INFO = IINFO + J - 1
+            DO 10 I = J, MIN( M, J+JB-1 )
+               IPIV( I ) = J - 1 + IPIV( I )
+   10       CONTINUE
+*            
+*           Apply interchanges to column 1:J-1            
+*
+            CALL ZLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 )
+*
+            IF ( J+JB.LE.N ) THEN
+*            
+*              Apply interchanges to column J+JB:N            
+*
+               CALL ZLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, 
+     $                     IPIV, 1 )
+*               
+               CALL ZGEMM( 'No transpose', 'No transpose', 
+     $                    JB, N-J-JB+1, J-1, -ONE, 
+     $                    A( J, 1 ), LDA, A( 1, J+JB ), LDA, ONE,
+     $                    A( J, J+JB ), LDA )
+*
+*              Compute block row of U.
+*
+               CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                    JB, N-J-JB+1, ONE, A( J, J ), LDA, 
+     $                    A( J, J+JB ), LDA )
+            END IF
+
+   20    CONTINUE
+
+      END IF
+      RETURN
+*
+*     End of ZGETRF
+*
+      END
diff --git a/SRC/VARIANTS/lu/LL/cgetrf.f b/SRC/VARIANTS/lu/LL/cgetrf.f
new file mode 100644
index 00000000..189362b0
--- /dev/null
+++ b/SRC/VARIANTS/lu/LL/cgetrf.f
@@ -0,0 +1,190 @@
+      SUBROUTINE CGETRF ( M, N, A, LDA, IPIV, INFO)
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGETRF computes an LU factorization of a general M-by-N matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This is the left-looking Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = (1.0E+0, 0.0E+0) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IINFO, J, JB, K, NB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMM, CGETF2, CLASWP, CTRSM, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGETRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'CGETRF', ' ', M, N, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+*        Use unblocked code.
+*
+         CALL CGETF2( M, N, A, LDA, IPIV, INFO )
+
+      ELSE
+*
+*        Use blocked code.
+*
+         DO 20 J = 1, MIN( M, N ), NB
+            JB = MIN( MIN( M, N )-J+1, NB )
+*
+*
+*           Update before factoring the current panel
+*
+            DO 30 K = 1, J-NB, NB
+*            
+*              Apply interchanges to rows K:K+NB-1.
+* 
+               CALL CLASWP( JB, A(1, J), LDA, K, K+NB-1, IPIV, 1 )
+*
+*              Compute block row of U.
+*
+               CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                    NB, JB, ONE, A( K, K ), LDA, 
+     $                    A( K, J ), LDA )
+*
+*              Update trailing submatrix.
+*
+               CALL CGEMM( 'No transpose', 'No transpose', 
+     $                    M-K-NB+1, JB, NB, -ONE, 
+     $                    A( K+NB, K ), LDA, A( K, J ), LDA, ONE,
+     $                    A( K+NB, J ), LDA )
+   30       CONTINUE
+*
+*           Factor diagonal and subdiagonal blocks and test for exact
+*           singularity.
+*
+            CALL CGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+*           Adjust INFO and the pivot indices.
+*
+            IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $         INFO = IINFO + J - 1
+            DO 10 I = J, MIN( M, J+JB-1 )
+               IPIV( I ) = J - 1 + IPIV( I )
+   10       CONTINUE
+*
+   20    CONTINUE
+
+*
+*        Apply interchanges to the left-overs
+*
+         DO 40 K = 1, MIN( M, N ), NB
+            CALL CLASWP( K-1, A( 1, 1 ), LDA, K, 
+     $                  MIN (K+NB-1, MIN ( M, N )), IPIV, 1 )
+   40    CONTINUE 
+*
+*        Apply update to the M+1:N columns when N > M
+*
+         IF ( N.GT.M ) THEN
+
+            CALL CLASWP( N-M, A(1, M+1), LDA, 1, M, IPIV, 1 )
+
+            DO 50 K = 1, M, NB
+
+               JB = MIN( M-K+1, NB )
+*
+               CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                    JB, N-M, ONE, A( K, K ), LDA, 
+     $                    A( K, M+1 ), LDA )
+
+*     
+               IF ( K+NB.LE.M ) THEN
+                    CALL CGEMM( 'No transpose', 'No transpose', 
+     $                         M-K-NB+1, N-M, NB, -ONE, 
+     $                         A( K+NB, K ), LDA, A( K, M+1 ), LDA, ONE,
+     $                        A( K+NB, M+1 ), LDA )
+               END IF
+   50       CONTINUE  
+         END IF
+*
+      END IF
+      RETURN
+*
+*     End of CGETRF
+*
+      END
diff --git a/SRC/VARIANTS/lu/LL/dgetrf.f b/SRC/VARIANTS/lu/LL/dgetrf.f
new file mode 100644
index 00000000..8231805c
--- /dev/null
+++ b/SRC/VARIANTS/lu/LL/dgetrf.f
@@ -0,0 +1,189 @@
+      SUBROUTINE DGETRF ( M, N, A, LDA, IPIV, INFO)
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGETRF computes an LU factorization of a general M-by-N matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This is the left-looking Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IINFO, J, JB, K, NB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DGETF2, DLASWP, DTRSM, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGETRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'DGETRF', ' ', M, N, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+*        Use unblocked code.
+*
+         CALL DGETF2( M, N, A, LDA, IPIV, INFO )
+
+      ELSE
+*
+*        Use blocked code.
+*
+         DO 20 J = 1, MIN( M, N ), NB
+            JB = MIN( MIN( M, N )-J+1, NB )
+*
+*           Update before factoring the current panel
+*
+            DO 30 K = 1, J-NB, NB
+*            
+*              Apply interchanges to rows K:K+NB-1.
+* 
+               CALL DLASWP( JB, A(1, J), LDA, K, K+NB-1, IPIV, 1 )
+*
+*              Compute block row of U.
+*
+               CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                    NB, JB, ONE, A( K, K ), LDA, 
+     $                    A( K, J ), LDA )
+*
+*              Update trailing submatrix.
+*
+               CALL DGEMM( 'No transpose', 'No transpose', 
+     $                    M-K-NB+1, JB, NB, -ONE, 
+     $                    A( K+NB, K ), LDA, A( K, J ), LDA, ONE,
+     $                    A( K+NB, J ), LDA )
+   30       CONTINUE
+*
+*           Factor diagonal and subdiagonal blocks and test for exact
+*           singularity.
+*
+            CALL DGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+*           Adjust INFO and the pivot indices.
+*
+            IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $         INFO = IINFO + J - 1
+            DO 10 I = J, MIN( M, J+JB-1 )
+               IPIV( I ) = J - 1 + IPIV( I )
+   10       CONTINUE
+*
+   20    CONTINUE
+
+*
+*        Apply interchanges to the left-overs
+*
+         DO 40 K = 1, MIN( M, N ), NB
+            CALL DLASWP( K-1, A( 1, 1 ), LDA, K, 
+     $                  MIN (K+NB-1, MIN ( M, N )), IPIV, 1 )
+   40    CONTINUE 
+*
+*        Apply update to the M+1:N columns when N > M
+*
+         IF ( N.GT.M ) THEN
+
+            CALL DLASWP( N-M, A(1, M+1), LDA, 1, M, IPIV, 1 )
+
+            DO 50 K = 1, M, NB
+
+               JB = MIN( M-K+1, NB )
+*
+               CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                    JB, N-M, ONE, A( K, K ), LDA, 
+     $                    A( K, M+1 ), LDA )
+
+*     
+               IF ( K+NB.LE.M ) THEN
+                    CALL DGEMM( 'No transpose', 'No transpose', 
+     $                         M-K-NB+1, N-M, NB, -ONE, 
+     $                         A( K+NB, K ), LDA, A( K, M+1 ), LDA, ONE,
+     $                        A( K+NB, M+1 ), LDA )
+               END IF
+   50       CONTINUE  
+         END IF
+*
+      END IF
+      RETURN
+*
+*     End of DGETRF
+*
+      END
diff --git a/SRC/VARIANTS/lu/LL/sgetrf.f b/SRC/VARIANTS/lu/LL/sgetrf.f
new file mode 100644
index 00000000..856c1a7e
--- /dev/null
+++ b/SRC/VARIANTS/lu/LL/sgetrf.f
@@ -0,0 +1,190 @@
+      SUBROUTINE SGETRF ( M, N, A, LDA, IPIV, INFO)
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGETRF computes an LU factorization of a general M-by-N matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This is the left-looking Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IINFO, J, JB, K, NB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM, SGETF2, SLASWP, STRSM, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGETRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'SGETRF', ' ', M, N, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+*        Use unblocked code.
+*
+         CALL SGETF2( M, N, A, LDA, IPIV, INFO )
+
+      ELSE
+*
+*        Use blocked code.
+*
+         DO 20 J = 1, MIN( M, N ), NB
+            JB = MIN( MIN( M, N )-J+1, NB )
+*
+*
+*           Update before factoring the current panel
+*
+            DO 30 K = 1, J-NB, NB
+*            
+*              Apply interchanges to rows K:K+NB-1.
+* 
+               CALL SLASWP( JB, A(1, J), LDA, K, K+NB-1, IPIV, 1 )
+*
+*              Compute block row of U.
+*
+               CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                    NB, JB, ONE, A( K, K ), LDA, 
+     $                    A( K, J ), LDA )
+*
+*              Update trailing submatrix.
+*
+               CALL SGEMM( 'No transpose', 'No transpose', 
+     $                    M-K-NB+1, JB, NB, -ONE, 
+     $                    A( K+NB, K ), LDA, A( K, J ), LDA, ONE,
+     $                    A( K+NB, J ), LDA )
+   30       CONTINUE
+*
+*           Factor diagonal and subdiagonal blocks and test for exact
+*           singularity.
+*
+            CALL SGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+*           Adjust INFO and the pivot indices.
+*
+            IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $         INFO = IINFO + J - 1
+            DO 10 I = J, MIN( M, J+JB-1 )
+               IPIV( I ) = J - 1 + IPIV( I )
+   10       CONTINUE
+*
+   20    CONTINUE
+
+*
+*        Apply interchanges to the left-overs
+*
+         DO 40 K = 1, MIN( M, N ), NB
+            CALL SLASWP( K-1, A( 1, 1 ), LDA, K, 
+     $                  MIN (K+NB-1, MIN ( M, N )), IPIV, 1 )
+   40    CONTINUE 
+*
+*        Apply update to the M+1:N columns when N > M
+*
+         IF ( N.GT.M ) THEN
+
+            CALL SLASWP( N-M, A(1, M+1), LDA, 1, M, IPIV, 1 )
+
+            DO 50 K = 1, M, NB
+
+               JB = MIN( M-K+1, NB )
+*
+               CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                    JB, N-M, ONE, A( K, K ), LDA, 
+     $                    A( K, M+1 ), LDA )
+
+*     
+               IF ( K+NB.LE.M ) THEN
+                    CALL SGEMM( 'No transpose', 'No transpose', 
+     $                         M-K-NB+1, N-M, NB, -ONE, 
+     $                         A( K+NB, K ), LDA, A( K, M+1 ), LDA, ONE,
+     $                        A( K+NB, M+1 ), LDA )
+               END IF
+   50       CONTINUE  
+         END IF
+*
+      END IF
+      RETURN
+*
+*     End of SGETRF
+*
+      END
diff --git a/SRC/VARIANTS/lu/LL/zgetrf.f b/SRC/VARIANTS/lu/LL/zgetrf.f
new file mode 100644
index 00000000..a6f9c0ff
--- /dev/null
+++ b/SRC/VARIANTS/lu/LL/zgetrf.f
@@ -0,0 +1,190 @@
+      SUBROUTINE ZGETRF ( M, N, A, LDA, IPIV, INFO)
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGETRF computes an LU factorization of a general M-by-N matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This is the left-looking Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = (1.0D+0, 0.0D+0) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IINFO, J, JB, K, NB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZGEMM, ZGETF2, ZLASWP, ZTRSM, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGETRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'ZGETRF', ' ', M, N, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+*        Use unblocked code.
+*
+         CALL ZGETF2( M, N, A, LDA, IPIV, INFO )
+
+      ELSE
+*
+*        Use blocked code.
+*
+         DO 20 J = 1, MIN( M, N ), NB
+            JB = MIN( MIN( M, N )-J+1, NB )
+*
+*
+*           Update before factoring the current panel
+*
+            DO 30 K = 1, J-NB, NB
+*            
+*              Apply interchanges to rows K:K+NB-1.
+* 
+               CALL ZLASWP( JB, A(1, J), LDA, K, K+NB-1, IPIV, 1 )
+*
+*              Compute block row of U.
+*
+               CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                    NB, JB, ONE, A( K, K ), LDA, 
+     $                    A( K, J ), LDA )
+*
+*              Update trailing submatrix.
+*
+               CALL ZGEMM( 'No transpose', 'No transpose', 
+     $                    M-K-NB+1, JB, NB, -ONE, 
+     $                    A( K+NB, K ), LDA, A( K, J ), LDA, ONE,
+     $                    A( K+NB, J ), LDA )
+   30       CONTINUE
+*
+*           Factor diagonal and subdiagonal blocks and test for exact
+*           singularity.
+*
+            CALL ZGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+*           Adjust INFO and the pivot indices.
+*
+            IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $         INFO = IINFO + J - 1
+            DO 10 I = J, MIN( M, J+JB-1 )
+               IPIV( I ) = J - 1 + IPIV( I )
+   10       CONTINUE
+*
+   20    CONTINUE
+
+*
+*        Apply interchanges to the left-overs
+*
+         DO 40 K = 1, MIN( M, N ), NB
+            CALL ZLASWP( K-1, A( 1, 1 ), LDA, K, 
+     $                  MIN (K+NB-1, MIN ( M, N )), IPIV, 1 )
+   40    CONTINUE 
+*
+*        Apply update to the M+1:N columns when N > M
+*
+         IF ( N.GT.M ) THEN
+
+            CALL ZLASWP( N-M, A(1, M+1), LDA, 1, M, IPIV, 1 )
+
+            DO 50 K = 1, M, NB
+
+               JB = MIN( M-K+1, NB )
+*
+               CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                    JB, N-M, ONE, A( K, K ), LDA, 
+     $                    A( K, M+1 ), LDA )
+
+*     
+               IF ( K+NB.LE.M ) THEN
+                    CALL ZGEMM( 'No transpose', 'No transpose', 
+     $                         M-K-NB+1, N-M, NB, -ONE, 
+     $                         A( K+NB, K ), LDA, A( K, M+1 ), LDA, ONE,
+     $                        A( K+NB, M+1 ), LDA )
+               END IF
+   50       CONTINUE  
+         END IF
+*
+      END IF
+      RETURN
+*
+*     End of ZGETRF
+*
+      END
diff --git a/SRC/VARIANTS/lu/REC/cgetrf.f b/SRC/VARIANTS/lu/REC/cgetrf.f
new file mode 100644
index 00000000..d8a8a90b
--- /dev/null
+++ b/SRC/VARIANTS/lu/REC/cgetrf.f
@@ -0,0 +1,224 @@
+      SUBROUTINE CGETRF( M, N, A, LDA, IPIV, INFO )
+      IMPLICIT NONE
+*
+*  -- LAPACK routine (version 3.X) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     May 2008
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGETRF computes an LU factorization of a general M-by-N matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This code implements an iterative version of Sivan Toledo's recursive
+*  LU algorithm[1].  For square matrices, this iterative versions should
+*  be within a factor of two of the optimum number of memory transfers.
+*
+*  The pattern is as follows, with the large blocks of U being updated
+*  in one call to DTRSM, and the dotted lines denoting sections that
+*  have had all pending permutations applied:
+*
+*   1 2 3 4 5 6 7 8
+*  +-+-+---+-------+------
+*  | |1|   |       |
+*  |.+-+ 2 |       |
+*  | | |   |       |
+*  |.|.+-+-+   4   |
+*  | | | |1|       |
+*  | | |.+-+       |
+*  | | | | |       |
+*  |.|.|.|.+-+-+---+  8
+*  | | | | | |1|   |
+*  | | | | |.+-+ 2 |
+*  | | | | | | |   |
+*  | | | | |.|.+-+-+
+*  | | | | | | | |1|
+*  | | | | | | |.+-+
+*  | | | | | | | | |
+*  |.|.|.|.|.|.|.|.+-----
+*  | | | | | | | | |
+*
+*  The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in
+*  the binary expansion of the current column.  Each Schur update is
+*  applied as soon as the necessary portion of U is available.
+*
+*  [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with
+*  Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
+*  1065-1081. http://dx.doi.org/10.1137/S0895479896297744
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, NEGONE
+      REAL               ZERO
+      PARAMETER          ( ONE = (1.0E+0, 0.0E+0) )
+      PARAMETER          ( NEGONE = (-1.0E+0, 0.0E+0) )
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               SFMIN, PIVMAG
+      COMPLEX            TMP
+      INTEGER            I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD
+      INTEGER            KSTART, IPIVSTART, JPIVSTART, KCOLS
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      INTEGER            ICAMAX
+      LOGICAL            SISNAN
+      EXTERNAL           SLAMCH, ICAMAX, SISNAN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CTRSM, CSCAL, XERBLA, CLASWP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, IAND, ABS
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGETRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Compute machine safe minimum
+*
+      SFMIN = SLAMCH( 'S' )
+*
+      NSTEP = MIN( M, N )
+      DO J = 1, NSTEP
+         KAHEAD = IAND( J, -J )
+         KSTART = J + 1 - KAHEAD
+         KCOLS = MIN( KAHEAD, M-J )
+*
+*        Find pivot.
+*
+         JP = J - 1 + ICAMAX( M-J+1, A( J, J ), 1 )
+         IPIV( J ) = JP
+
+!        Permute just this column.
+         IF (JP .NE. J) THEN
+            TMP = A( J, J )
+            A( J, J ) = A( JP, J )
+            A( JP, J ) = TMP
+         END IF
+
+!        Apply pending permutations to L
+         NTOPIV = 1
+         IPIVSTART = J
+         JPIVSTART = J - NTOPIV
+         DO WHILE ( NTOPIV .LT. KAHEAD )
+            CALL CLASWP( NTOPIV, A( 1, JPIVSTART ), LDA, IPIVSTART, J,
+     $           IPIV, 1 )
+            IPIVSTART = IPIVSTART - NTOPIV;
+            NTOPIV = NTOPIV * 2;
+            JPIVSTART = JPIVSTART - NTOPIV;
+         END DO
+
+!        Permute U block to match L
+         CALL CLASWP( KCOLS, A( 1,J+1 ), LDA, KSTART, J, IPIV, 1 )
+
+!        Factor the current column
+         PIVMAG = ABS( A( J, J ) )
+         IF( PIVMAG.NE.ZERO .AND. .NOT.SISNAN( PIVMAG ) ) THEN
+               IF( PIVMAG .GE. SFMIN ) THEN
+                  CALL CSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
+               ELSE
+                 DO I = 1, M-J
+                    A( J+I, J ) = A( J+I, J ) / A( J, J )
+                 END DO
+               END IF
+         ELSE IF( PIVMAG .EQ. ZERO .AND. INFO .EQ. 0 ) THEN
+            INFO = J
+         END IF
+
+!        Solve for U block.
+         CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', KAHEAD,
+     $        KCOLS, ONE, A( KSTART, KSTART ), LDA,
+     $        A( KSTART, J+1 ), LDA )
+!        Schur complement.
+         CALL CGEMM( 'No transpose', 'No transpose', M-J,
+     $        KCOLS, KAHEAD, NEGONE, A( J+1, KSTART ), LDA,
+     $        A( KSTART, J+1 ), LDA, ONE, A( J+1, J+1 ), LDA )
+      END DO
+
+!     Handle pivot permutations on the way out of the recursion
+      NPIVED = IAND( NSTEP, -NSTEP )
+      J = NSTEP - NPIVED
+      DO WHILE ( J .GT. 0 )
+         NTOPIV = IAND( J, -J )
+         CALL CLASWP( NTOPIV, A( 1, J-NTOPIV+1 ), LDA, J+1, NSTEP,
+     $        IPIV, 1 )
+         J = J - NTOPIV
+      END DO
+
+!     If short and wide, handle the rest of the columns.
+      IF ( M .LT. N ) THEN
+         CALL CLASWP( N-M, A( 1, M+KCOLS+1 ), LDA, 1, M, IPIV, 1 )
+         CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', M,
+     $        N-M, ONE, A, LDA, A( 1,M+KCOLS+1 ), LDA )
+      END IF
+
+      RETURN
+*
+*     End of CGETRF
+*
+      END
diff --git a/SRC/VARIANTS/lu/REC/dgetrf.f b/SRC/VARIANTS/lu/REC/dgetrf.f
new file mode 100644
index 00000000..f8c2caf1
--- /dev/null
+++ b/SRC/VARIANTS/lu/REC/dgetrf.f
@@ -0,0 +1,220 @@
+      SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
+      IMPLICIT NONE
+*
+*  -- LAPACK routine (version 3.X) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     May 2008
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGETRF computes an LU factorization of a general M-by-N matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This code implements an iterative version of Sivan Toledo's recursive
+*  LU algorithm[1].  For square matrices, this iterative versions should
+*  be within a factor of two of the optimum number of memory transfers.
+*
+*  The pattern is as follows, with the large blocks of U being updated
+*  in one call to DTRSM, and the dotted lines denoting sections that
+*  have had all pending permutations applied:
+*
+*   1 2 3 4 5 6 7 8
+*  +-+-+---+-------+------
+*  | |1|   |       |
+*  |.+-+ 2 |       |
+*  | | |   |       |
+*  |.|.+-+-+   4   |
+*  | | | |1|       |
+*  | | |.+-+       |
+*  | | | | |       |
+*  |.|.|.|.+-+-+---+  8
+*  | | | | | |1|   |
+*  | | | | |.+-+ 2 |
+*  | | | | | | |   |
+*  | | | | |.|.+-+-+
+*  | | | | | | | |1|
+*  | | | | | | |.+-+
+*  | | | | | | | | |
+*  |.|.|.|.|.|.|.|.+-----
+*  | | | | | | | | |
+*
+*  The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in
+*  the binary expansion of the current column.  Each Schur update is
+*  applied as soon as the necessary portion of U is available.
+*
+*  [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with
+*  Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
+*  1065-1081. http://dx.doi.org/10.1137/S0895479896297744
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO, NEGONE
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+      PARAMETER          ( NEGONE = -1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   SFMIN, TMP
+      INTEGER            I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD
+      INTEGER            KSTART, IPIVSTART, JPIVSTART, KCOLS
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      INTEGER            IDAMAX
+      LOGICAL            DISNAN
+      EXTERNAL           DLAMCH, IDAMAX, DISNAN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DTRSM, DSCAL, XERBLA, DLASWP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, IAND
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGETRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Compute machine safe minimum
+*
+      SFMIN = DLAMCH( 'S' )
+*
+      NSTEP = MIN( M, N )
+      DO J = 1, NSTEP
+         KAHEAD = IAND( J, -J )
+         KSTART = J + 1 - KAHEAD
+         KCOLS = MIN( KAHEAD, M-J )
+*
+*        Find pivot.
+*
+         JP = J - 1 + IDAMAX( M-J+1, A( J, J ), 1 )
+         IPIV( J ) = JP
+
+!        Permute just this column.
+         IF (JP .NE. J) THEN
+            TMP = A( J, J )
+            A( J, J ) = A( JP, J )
+            A( JP, J ) = TMP
+         END IF
+
+!        Apply pending permutations to L
+         NTOPIV = 1
+         IPIVSTART = J
+         JPIVSTART = J - NTOPIV
+         DO WHILE ( NTOPIV .LT. KAHEAD )
+            CALL DLASWP( NTOPIV, A( 1, JPIVSTART ), LDA, IPIVSTART, J,
+     $           IPIV, 1 )
+            IPIVSTART = IPIVSTART - NTOPIV;
+            NTOPIV = NTOPIV * 2;
+            JPIVSTART = JPIVSTART - NTOPIV;
+         END DO
+
+!        Permute U block to match L
+         CALL DLASWP( KCOLS, A( 1,J+1 ), LDA, KSTART, J, IPIV, 1 )
+
+!        Factor the current column
+         IF( A( J, J ).NE.ZERO .AND. .NOT.DISNAN( A( J, J ) ) ) THEN
+               IF( ABS(A( J, J )) .GE. SFMIN ) THEN
+                  CALL DSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
+               ELSE
+                 DO I = 1, M-J
+                    A( J+I, J ) = A( J+I, J ) / A( J, J )
+                 END DO
+               END IF
+         ELSE IF( A( J,J ) .EQ. ZERO .AND. INFO .EQ. 0 ) THEN
+            INFO = J
+         END IF
+
+!        Solve for U block.
+         CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', KAHEAD,
+     $        KCOLS, ONE, A( KSTART, KSTART ), LDA,
+     $        A( KSTART, J+1 ), LDA )
+!        Schur complement.
+         CALL DGEMM( 'No transpose', 'No transpose', M-J,
+     $        KCOLS, KAHEAD, NEGONE, A( J+1, KSTART ), LDA,
+     $        A( KSTART, J+1 ), LDA, ONE, A( J+1, J+1 ), LDA )
+      END DO
+
+!     Handle pivot permutations on the way out of the recursion
+      NPIVED = IAND( NSTEP, -NSTEP )
+      J = NSTEP - NPIVED
+      DO WHILE ( J .GT. 0 )
+         NTOPIV = IAND( J, -J )
+         CALL DLASWP( NTOPIV, A( 1, J-NTOPIV+1 ), LDA, J+1, NSTEP,
+     $        IPIV, 1 )
+         J = J - NTOPIV
+      END DO
+
+!     If short and wide, handle the rest of the columns.
+      IF ( M .LT. N ) THEN
+         CALL DLASWP( N-M, A( 1, M+KCOLS+1 ), LDA, 1, M, IPIV, 1 )
+         CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', M,
+     $        N-M, ONE, A, LDA, A( 1,M+KCOLS+1 ), LDA )
+      END IF
+
+      RETURN
+*
+*     End of DGETRF
+*
+      END
diff --git a/SRC/VARIANTS/lu/REC/sgetrf.f b/SRC/VARIANTS/lu/REC/sgetrf.f
new file mode 100644
index 00000000..1890f987
--- /dev/null
+++ b/SRC/VARIANTS/lu/REC/sgetrf.f
@@ -0,0 +1,220 @@
+      SUBROUTINE SGETRF( M, N, A, LDA, IPIV, INFO )
+      IMPLICIT NONE
+*
+*  -- LAPACK routine (version 3.X) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     May 2008
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGETRF computes an LU factorization of a general M-by-N matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This code implements an iterative version of Sivan Toledo's recursive
+*  LU algorithm[1].  For square matrices, this iterative versions should
+*  be within a factor of two of the optimum number of memory transfers.
+*
+*  The pattern is as follows, with the large blocks of U being updated
+*  in one call to STRSM, and the dotted lines denoting sections that
+*  have had all pending permutations applied:
+*
+*   1 2 3 4 5 6 7 8
+*  +-+-+---+-------+------
+*  | |1|   |       |
+*  |.+-+ 2 |       |
+*  | | |   |       |
+*  |.|.+-+-+   4   |
+*  | | | |1|       |
+*  | | |.+-+       |
+*  | | | | |       |
+*  |.|.|.|.+-+-+---+  8
+*  | | | | | |1|   |
+*  | | | | |.+-+ 2 |
+*  | | | | | | |   |
+*  | | | | |.|.+-+-+
+*  | | | | | | | |1|
+*  | | | | | | |.+-+
+*  | | | | | | | | |
+*  |.|.|.|.|.|.|.|.+-----
+*  | | | | | | | | |
+*
+*  The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in
+*  the binary expansion of the current column.  Each Schur update is
+*  applied as soon as the necessary portion of U is available.
+*
+*  [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with
+*  Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
+*  1065-1081. http://dx.doi.org/10.1137/S0895479896297744
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO, NEGONE
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+      PARAMETER          ( NEGONE = -1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               SFMIN, TMP
+      INTEGER            I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD
+      INTEGER            KSTART, IPIVSTART, JPIVSTART, KCOLS
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      INTEGER            ISAMAX
+      LOGICAL            SISNAN
+      EXTERNAL           SLAMCH, ISAMAX, SISNAN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           STRSM, SSCAL, XERBLA, SLASWP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, IAND
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGETRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Compute machine safe minimum
+*
+      SFMIN = SLAMCH( 'S' )
+*
+      NSTEP = MIN( M, N )
+      DO J = 1, NSTEP
+         KAHEAD = IAND( J, -J )
+         KSTART = J + 1 - KAHEAD
+         KCOLS = MIN( KAHEAD, M-J )
+*
+*        Find pivot.
+*
+         JP = J - 1 + ISAMAX( M-J+1, A( J, J ), 1 )
+         IPIV( J ) = JP
+
+!        Permute just this column.
+         IF (JP .NE. J) THEN
+            TMP = A( J, J )
+            A( J, J ) = A( JP, J )
+            A( JP, J ) = TMP
+         END IF
+
+!        Apply pending permutations to L
+         NTOPIV = 1
+         IPIVSTART = J
+         JPIVSTART = J - NTOPIV
+         DO WHILE ( NTOPIV .LT. KAHEAD )
+            CALL SLASWP( NTOPIV, A( 1, JPIVSTART ), LDA, IPIVSTART, J,
+     $           IPIV, 1 )
+            IPIVSTART = IPIVSTART - NTOPIV;
+            NTOPIV = NTOPIV * 2;
+            JPIVSTART = JPIVSTART - NTOPIV;
+         END DO
+
+!        Permute U block to match L
+         CALL SLASWP( KCOLS, A( 1,J+1 ), LDA, KSTART, J, IPIV, 1 )
+
+!        Factor the current column
+         IF( A( J, J ).NE.ZERO .AND. .NOT.SISNAN( A( J, J ) ) ) THEN
+               IF( ABS(A( J, J )) .GE. SFMIN ) THEN
+                  CALL SSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
+               ELSE
+                 DO I = 1, M-J
+                    A( J+I, J ) = A( J+I, J ) / A( J, J )
+                 END DO
+               END IF
+         ELSE IF( A( J,J ) .EQ. ZERO .AND. INFO .EQ. 0 ) THEN
+            INFO = J
+         END IF
+
+!        Solve for U block.
+         CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', KAHEAD,
+     $        KCOLS, ONE, A( KSTART, KSTART ), LDA,
+     $        A( KSTART, J+1 ), LDA )
+!        Schur complement.
+         CALL SGEMM( 'No transpose', 'No transpose', M-J,
+     $        KCOLS, KAHEAD, NEGONE, A( J+1, KSTART ), LDA,
+     $        A( KSTART, J+1 ), LDA, ONE, A( J+1, J+1 ), LDA )
+      END DO
+
+!     Handle pivot permutations on the way out of the recursion
+      NPIVED = IAND( NSTEP, -NSTEP )
+      J = NSTEP - NPIVED
+      DO WHILE ( J .GT. 0 )
+         NTOPIV = IAND( J, -J )
+         CALL SLASWP( NTOPIV, A( 1, J-NTOPIV+1 ), LDA, J+1, NSTEP,
+     $        IPIV, 1 )
+         J = J - NTOPIV
+      END DO
+
+!     If short and wide, handle the rest of the columns.
+      IF ( M .LT. N ) THEN
+         CALL SLASWP( N-M, A( 1, M+KCOLS+1 ), LDA, 1, M, IPIV, 1 )
+         CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', M,
+     $        N-M, ONE, A, LDA, A( 1,M+KCOLS+1 ), LDA )
+      END IF
+
+      RETURN
+*
+*     End of SGETRF
+*
+      END
diff --git a/SRC/VARIANTS/lu/REC/zgetrf.f b/SRC/VARIANTS/lu/REC/zgetrf.f
new file mode 100644
index 00000000..e7b75b00
--- /dev/null
+++ b/SRC/VARIANTS/lu/REC/zgetrf.f
@@ -0,0 +1,224 @@
+      SUBROUTINE ZGETRF( M, N, A, LDA, IPIV, INFO )
+      IMPLICIT NONE
+*
+*  -- LAPACK routine (version 3.X) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     May 2008
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGETRF computes an LU factorization of a general M-by-N matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This code implements an iterative version of Sivan Toledo's recursive
+*  LU algorithm[1].  For square matrices, this iterative versions should
+*  be within a factor of two of the optimum number of memory transfers.
+*
+*  The pattern is as follows, with the large blocks of U being updated
+*  in one call to DTRSM, and the dotted lines denoting sections that
+*  have had all pending permutations applied:
+*
+*   1 2 3 4 5 6 7 8
+*  +-+-+---+-------+------
+*  | |1|   |       |
+*  |.+-+ 2 |       |
+*  | | |   |       |
+*  |.|.+-+-+   4   |
+*  | | | |1|       |
+*  | | |.+-+       |
+*  | | | | |       |
+*  |.|.|.|.+-+-+---+  8
+*  | | | | | |1|   |
+*  | | | | |.+-+ 2 |
+*  | | | | | | |   |
+*  | | | | |.|.+-+-+
+*  | | | | | | | |1|
+*  | | | | | | |.+-+
+*  | | | | | | | | |
+*  |.|.|.|.|.|.|.|.+-----
+*  | | | | | | | | |
+*
+*  The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in
+*  the binary expansion of the current column.  Each Schur update is
+*  applied as soon as the necessary portion of U is available.
+*
+*  [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with
+*  Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
+*  1065-1081. http://dx.doi.org/10.1137/S0895479896297744
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, NEGONE
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ONE = (1.0D+0, 0.0D+0) )
+      PARAMETER          ( NEGONE = (-1.0D+0, 0.0D+0) )
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   SFMIN, PIVMAG
+      COMPLEX*16         TMP
+      INTEGER            I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD
+      INTEGER            KSTART, IPIVSTART, JPIVSTART, KCOLS
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      INTEGER            IZAMAX
+      LOGICAL            DISNAN
+      EXTERNAL           DLAMCH, IZAMAX, DISNAN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZTRSM, ZSCAL, XERBLA, ZLASWP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, IAND, ABS
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGETRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Compute machine safe minimum
+*
+      SFMIN = DLAMCH( 'S' )
+*
+      NSTEP = MIN( M, N )
+      DO J = 1, NSTEP
+         KAHEAD = IAND( J, -J )
+         KSTART = J + 1 - KAHEAD
+         KCOLS = MIN( KAHEAD, M-J )
+*
+*        Find pivot.
+*
+         JP = J - 1 + IZAMAX( M-J+1, A( J, J ), 1 )
+         IPIV( J ) = JP
+
+!        Permute just this column.
+         IF (JP .NE. J) THEN
+            TMP = A( J, J )
+            A( J, J ) = A( JP, J )
+            A( JP, J ) = TMP
+         END IF
+
+!        Apply pending permutations to L
+         NTOPIV = 1
+         IPIVSTART = J
+         JPIVSTART = J - NTOPIV
+         DO WHILE ( NTOPIV .LT. KAHEAD )
+            CALL ZLASWP( NTOPIV, A( 1, JPIVSTART ), LDA, IPIVSTART, J,
+     $           IPIV, 1 )
+            IPIVSTART = IPIVSTART - NTOPIV;
+            NTOPIV = NTOPIV * 2;
+            JPIVSTART = JPIVSTART - NTOPIV;
+         END DO
+
+!        Permute U block to match L
+         CALL ZLASWP( KCOLS, A( 1,J+1 ), LDA, KSTART, J, IPIV, 1 )
+
+!        Factor the current column
+         PIVMAG = ABS( A( J, J ) )
+         IF( PIVMAG.NE.ZERO .AND. .NOT.DISNAN( PIVMAG ) ) THEN
+               IF( PIVMAG .GE. SFMIN ) THEN
+                  CALL ZSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
+               ELSE
+                 DO I = 1, M-J
+                    A( J+I, J ) = A( J+I, J ) / A( J, J )
+                 END DO
+               END IF
+         ELSE IF( PIVMAG .EQ. ZERO .AND. INFO .EQ. 0 ) THEN
+            INFO = J
+         END IF
+
+!        Solve for U block.
+         CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', KAHEAD,
+     $        KCOLS, ONE, A( KSTART, KSTART ), LDA,
+     $        A( KSTART, J+1 ), LDA )
+!        Schur complement.
+         CALL ZGEMM( 'No transpose', 'No transpose', M-J,
+     $        KCOLS, KAHEAD, NEGONE, A( J+1, KSTART ), LDA,
+     $        A( KSTART, J+1 ), LDA, ONE, A( J+1, J+1 ), LDA )
+      END DO
+
+!     Handle pivot permutations on the way out of the recursion
+      NPIVED = IAND( NSTEP, -NSTEP )
+      J = NSTEP - NPIVED
+      DO WHILE ( J .GT. 0 )
+         NTOPIV = IAND( J, -J )
+         CALL ZLASWP( NTOPIV, A( 1, J-NTOPIV+1 ), LDA, J+1, NSTEP,
+     $        IPIV, 1 )
+         J = J - NTOPIV
+      END DO
+
+!     If short and wide, handle the rest of the columns.
+      IF ( M .LT. N ) THEN
+         CALL ZLASWP( N-M, A( 1, M+KCOLS+1 ), LDA, 1, M, IPIV, 1 )
+         CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', M,
+     $        N-M, ONE, A, LDA, A( 1,M+KCOLS+1 ), LDA )
+      END IF
+
+      RETURN
+*
+*     End of ZGETRF
+*
+      END
diff --git a/SRC/VARIANTS/qr/LL/cgeqrf.f b/SRC/VARIANTS/qr/LL/cgeqrf.f
new file mode 100644
index 00000000..413bc90c
--- /dev/null
+++ b/SRC/VARIANTS/qr/LL/cgeqrf.f
@@ -0,0 +1,343 @@
+      SUBROUTINE CGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEQRF computes a QR factorization of a real M-by-N matrix A:
+*  A = Q * R.
+*
+*  This is the left-looking Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+*          upper triangular if m >= n); the elements below the diagonal,
+*          with the array TAU, represent the orthogonal matrix Q as a
+*          product of min(m,n) elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*
+*          The dimension of the array WORK. The dimension can be divided into three parts.
+*
+*          1) The part for the triangular factor T. If the very last T is not bigger 
+*             than any of the rest, then this part is NB x ceiling(K/NB), otherwise, 
+*             NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T              
+*
+*          2) The part for the very last T when T is bigger than any of the rest T. 
+*             The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB,
+*             where K = min(M,N), NX is calculated by
+*                   NX = MAX( 0, ILAENV( 3, 'CGEQRF', ' ', M, N, -1, -1 ) )
+*
+*          3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB)
+*
+*          So LWORK = part1 + part2 + part3
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+*  and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, J, K, LWKOPT, NB,
+     $                   NBMIN, NX, LBWORK, NT, LLWORK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEQR2, CLARFB, CLARFT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      REAL               SCEIL
+      EXTERNAL           ILAENV, SCEIL
+*     ..
+*     .. Executable Statements ..
+
+      INFO = 0
+      NBMIN = 2
+      NX = 0
+      IWS = N
+      K = MIN( M, N )
+      NB = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
+
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'CGEQRF', ' ', M, N, -1, -1 ) )
+      END IF
+*
+*     Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.:
+*
+*            NB=3     2NB=6       K=10
+*            |        |           |    
+*      1--2--3--4--5--6--7--8--9--10
+*                  |     \________/
+*               K-NX=5      NT=4
+*
+*     So here 4 x 4 is the last T stored in the workspace
+*
+      NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
+
+*
+*     optimal workspace = space for dlarfb + space for normal T's + space for the last T
+*
+      LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
+      LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
+
+      IF ( NT.GT.NB ) THEN
+
+          LBWORK = K-NT 
+*
+*         Optimal workspace for dlarfb = MAX(1,N)*NT
+*
+          LWKOPT = (LBWORK+LLWORK)*NB
+          WORK( 1 ) = (LWKOPT+NT*NT)
+
+      ELSE
+
+          LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
+          LWKOPT = (LBWORK+LLWORK-NB)*NB 
+          WORK( 1 ) = LWKOPT
+
+      END IF
+
+*
+*     Test the input arguments
+*
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEQRF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            IF ( NT.LE.NB ) THEN
+                IWS = (LBWORK+LLWORK-NB)*NB
+            ELSE
+                IWS = (LBWORK+LLWORK)*NB+NT*NT
+            END IF
+
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               IF ( NT.LE.NB ) THEN
+                    NB = LWORK / (LLWORK+(LBWORK-NB))
+               ELSE
+                    NB = (LWORK-NT*NT)/(LBWORK+LLWORK)
+               END IF
+
+               NBMIN = MAX( 2, ILAENV( 2, 'CGEQRF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially
+*
+         DO 10 I = 1, K - NX, NB
+            IB = MIN( K-I+1, NB )
+*
+*           Update the current column using old T's
+*
+            DO 20 J = 1, I - NB, NB
+*
+*              Apply H' to A(J:M,I:I+IB-1) from the left
+*
+               CALL CLARFB( 'Left', 'Transpose', 'Forward',
+     $                      'Columnwise', M-J+1, IB, NB,
+     $                      A( J, J ), LDA, WORK(J), LBWORK, 
+     $                      A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                      IB)
+
+20          CONTINUE   
+*
+*           Compute the QR factorization of the current block
+*           A(I:M,I:I+IB-1)
+*
+            CALL CGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), 
+     $                        WORK(LBWORK*NB+NT*NT+1), IINFO )
+
+            IF( I+IB.LE.N ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL CLARFT( 'Forward', 'Columnwise', M-I+1, IB,
+     $                      A( I, I ), LDA, TAU( I ), 
+     $                      WORK(I), LBWORK )
+*
+            END IF
+   10    CONTINUE
+      ELSE
+         I = 1
+      END IF
+*
+*     Use unblocked code to factor the last or only block.
+*
+      IF( I.LE.K ) THEN
+         
+         IF ( I .NE. 1 )   THEN
+
+             DO 30 J = 1, I - NB, NB
+*
+*                Apply H' to A(J:M,I:K) from the left
+*
+                 CALL CLARFB( 'Left', 'Transpose', 'Forward',
+     $                       'Columnwise', M-J+1, K-I+1, NB,
+     $                       A( J, J ), LDA, WORK(J), LBWORK, 
+     $                       A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                       K-I+1)
+30           CONTINUE   
+
+             CALL CGEQR2( M-I+1, K-I+1, A( I, I ), LDA, TAU( I ), 
+     $                   WORK(LBWORK*NB+NT*NT+1),IINFO )
+
+         ELSE
+*
+*        Use unblocked code to factor the last or only block.
+*
+         CALL CGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), 
+     $               WORK,IINFO )
+
+         END IF
+      END IF
+
+
+*
+*     Apply update to the column M+1:N when N > M
+*
+      IF ( M.LT.N .AND. I.NE.1) THEN
+*
+*         Form the last triangular factor of the block reflector
+*         H = H(i) H(i+1) . . . H(i+ib-1)
+*
+          IF ( NT .LE. NB ) THEN
+               CALL CLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
+     $                     A( I, I ), LDA, TAU( I ), WORK(I), LBWORK )
+          ELSE
+               CALL CLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
+     $                     A( I, I ), LDA, TAU( I ), 
+     $                     WORK(LBWORK*NB+1), NT )
+          END IF
+
+*
+*         Apply H' to A(1:M,M+1:N) from the left
+*
+          DO 40 J = 1, K-NX, NB
+
+               IB = MIN( K-J+1, NB )
+
+               CALL CLARFB( 'Left', 'Transpose', 'Forward',
+     $                     'Columnwise', M-J+1, N-M, IB,
+     $                     A( J, J ), LDA, WORK(J), LBWORK, 
+     $                     A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                     N-M)
+
+40       CONTINUE   
+         
+         IF ( NT.LE.NB ) THEN
+             CALL CLARFB( 'Left', 'Transpose', 'Forward',
+     $                   'Columnwise', M-J+1, N-M, K-J+1,
+     $                   A( J, J ), LDA, WORK(J), LBWORK, 
+     $                   A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                   N-M)
+         ELSE 
+             CALL CLARFB( 'Left', 'Transpose', 'Forward',
+     $                   'Columnwise', M-J+1, N-M, K-J+1,
+     $                   A( J, J ), LDA, 
+     $                   WORK(LBWORK*NB+1), 
+     $                   NT, A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                   N-M)
+         END IF
+          
+      END IF
+
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of CGEQRF
+*
+      END
diff --git a/SRC/VARIANTS/qr/LL/dgeqrf.f b/SRC/VARIANTS/qr/LL/dgeqrf.f
new file mode 100644
index 00000000..728ea1b8
--- /dev/null
+++ b/SRC/VARIANTS/qr/LL/dgeqrf.f
@@ -0,0 +1,344 @@
+      SUBROUTINE DGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEQRF computes a QR factorization of a real M-by-N matrix A:
+*  A = Q * R.
+*
+*  This is the left-looking Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+*          upper triangular if m >= n); the elements below the diagonal,
+*          with the array TAU, represent the orthogonal matrix Q as a
+*          product of min(m,n) elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*
+*          The dimension of the array WORK. The dimension can be divided into three parts.
+*
+*          1) The part for the triangular factor T. If the very last T is not bigger 
+*             than any of the rest, then this part is NB x ceiling(K/NB), otherwise, 
+*             NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T              
+*
+*          2) The part for the very last T when T is bigger than any of the rest T. 
+*             The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB,
+*             where K = min(M,N), NX is calculated by
+*                   NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) )
+*
+*          3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB)
+*
+*          So LWORK = part1 + part2 + part3
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+*  and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, J, K, LWKOPT, NB,
+     $                   NBMIN, NX, LBWORK, NT, LLWORK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEQR2, DLARFB, DLARFT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      REAL               SCEIL
+      EXTERNAL           ILAENV, SCEIL
+*     ..
+*     .. Executable Statements ..
+
+      INFO = 0
+      NBMIN = 2
+      NX = 0
+      IWS = N
+      K = MIN( M, N )
+      NB = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
+
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) )
+      END IF
+*
+*     Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.:
+*
+*            NB=3     2NB=6       K=10
+*            |        |           |    
+*      1--2--3--4--5--6--7--8--9--10
+*                  |     \________/
+*               K-NX=5      NT=4
+*
+*     So here 4 x 4 is the last T stored in the workspace
+*
+      NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
+
+*
+*     optimal workspace = space for dlarfb + space for normal T's + space for the last T
+*
+      LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
+      LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
+
+      IF ( NT.GT.NB ) THEN
+
+          LBWORK = K-NT 
+*
+*         Optimal workspace for dlarfb = MAX(1,N)*NT
+*
+          LWKOPT = (LBWORK+LLWORK)*NB
+          WORK( 1 ) = (LWKOPT+NT*NT)
+
+      ELSE
+
+          LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
+          LWKOPT = (LBWORK+LLWORK-NB)*NB 
+          WORK( 1 ) = LWKOPT
+
+      END IF
+
+*
+*     Test the input arguments
+*
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGEQRF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            IF ( NT.LE.NB ) THEN
+                IWS = (LBWORK+LLWORK-NB)*NB
+            ELSE
+                IWS = (LBWORK+LLWORK)*NB+NT*NT
+            END IF
+
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               IF ( NT.LE.NB ) THEN
+                    NB = LWORK / (LLWORK+(LBWORK-NB))
+               ELSE
+                    NB = (LWORK-NT*NT)/(LBWORK+LLWORK)
+               END IF
+
+               NBMIN = MAX( 2, ILAENV( 2, 'DGEQRF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially
+*
+         DO 10 I = 1, K - NX, NB
+            IB = MIN( K-I+1, NB )
+*
+*           Update the current column using old T's
+*
+            DO 20 J = 1, I - NB, NB
+*
+*              Apply H' to A(J:M,I:I+IB-1) from the left
+*
+               CALL DLARFB( 'Left', 'Transpose', 'Forward',
+     $                      'Columnwise', M-J+1, IB, NB,
+     $                      A( J, J ), LDA, WORK(J), LBWORK, 
+     $                      A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                      IB)
+
+20          CONTINUE   
+*
+*           Compute the QR factorization of the current block
+*           A(I:M,I:I+IB-1)
+*
+            CALL DGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), 
+     $                        WORK(LBWORK*NB+NT*NT+1), IINFO )
+
+            IF( I+IB.LE.N ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB,
+     $                      A( I, I ), LDA, TAU( I ), 
+     $                      WORK(I), LBWORK )
+*
+            END IF
+   10    CONTINUE
+      ELSE
+         I = 1
+      END IF
+*
+*     Use unblocked code to factor the last or only block.
+*
+      IF( I.LE.K ) THEN
+         
+         IF ( I .NE. 1 )   THEN
+
+             DO 30 J = 1, I - NB, NB
+*
+*                Apply H' to A(J:M,I:K) from the left
+*
+                 CALL DLARFB( 'Left', 'Transpose', 'Forward',
+     $                       'Columnwise', M-J+1, K-I+1, NB,
+     $                       A( J, J ), LDA, WORK(J), LBWORK, 
+     $                       A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                       K-I+1)
+30           CONTINUE   
+
+             CALL DGEQR2( M-I+1, K-I+1, A( I, I ), LDA, TAU( I ), 
+     $                   WORK(LBWORK*NB+NT*NT+1),IINFO )
+
+         ELSE
+*
+*        Use unblocked code to factor the last or only block.
+*
+         CALL DGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), 
+     $               WORK,IINFO )
+
+         END IF
+      END IF
+
+
+*
+*     Apply update to the column M+1:N when N > M
+*
+      IF ( M.LT.N .AND. I.NE.1) THEN
+*
+*         Form the last triangular factor of the block reflector
+*         H = H(i) H(i+1) . . . H(i+ib-1)
+*
+          IF ( NT .LE. NB ) THEN
+               CALL DLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
+     $                     A( I, I ), LDA, TAU( I ), WORK(I), LBWORK )
+          ELSE
+               CALL DLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
+     $                     A( I, I ), LDA, TAU( I ), 
+     $                     WORK(LBWORK*NB+1), NT )
+          END IF
+
+*
+*         Apply H' to A(1:M,M+1:N) from the left
+*
+          DO 40 J = 1, K-NX, NB
+
+               IB = MIN( K-J+1, NB )
+
+               CALL DLARFB( 'Left', 'Transpose', 'Forward',
+     $                     'Columnwise', M-J+1, N-M, IB,
+     $                     A( J, J ), LDA, WORK(J), LBWORK, 
+     $                     A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                     N-M)
+
+40       CONTINUE   
+         
+         IF ( NT.LE.NB ) THEN
+             CALL DLARFB( 'Left', 'Transpose', 'Forward',
+     $                   'Columnwise', M-J+1, N-M, K-J+1,
+     $                   A( J, J ), LDA, WORK(J), LBWORK, 
+     $                   A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                   N-M)
+         ELSE 
+             CALL DLARFB( 'Left', 'Transpose', 'Forward',
+     $                   'Columnwise', M-J+1, N-M, K-J+1,
+     $                   A( J, J ), LDA, 
+     $                   WORK(LBWORK*NB+1), 
+     $                   NT, A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                   N-M)
+         END IF
+          
+      END IF
+
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of DGEQRF
+*
+      END
+
diff --git a/SRC/VARIANTS/qr/LL/sceil.f b/SRC/VARIANTS/qr/LL/sceil.f
new file mode 100644
index 00000000..70c5c5cf
--- /dev/null
+++ b/SRC/VARIANTS/qr/LL/sceil.f
@@ -0,0 +1,28 @@
+      REAL FUNCTION SCEIL( A )
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     June 2008
+*
+*     .. Scalar Arguments ..*
+      REAL A
+*     ..
+*
+*  =====================================================================
+*
+*     .. Intrinsic Functions ..
+	      INTRINSIC          INT
+*     ..
+*     .. Executable Statements ..*
+*      
+      IF (A-INT(A).EQ.0) THEN
+          SCEIL = A
+      ELSE IF (A.GT.0) THEN
+          SCEIL = INT(A)+1;
+      ELSE
+          SCEIL = INT(A)
+      END IF
+
+      RETURN
+*
+      END
diff --git a/SRC/VARIANTS/qr/LL/sgeqrf.f b/SRC/VARIANTS/qr/LL/sgeqrf.f
new file mode 100644
index 00000000..fef6379b
--- /dev/null
+++ b/SRC/VARIANTS/qr/LL/sgeqrf.f
@@ -0,0 +1,343 @@
+      SUBROUTINE SGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEQRF computes a QR factorization of a real M-by-N matrix A:
+*  A = Q * R.
+*
+*  This is the left-looking Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+*          upper triangular if m >= n); the elements below the diagonal,
+*          with the array TAU, represent the orthogonal matrix Q as a
+*          product of min(m,n) elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) REAL array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*
+*          The dimension of the array WORK. The dimension can be divided into three parts.
+*
+*          1) The part for the triangular factor T. If the very last T is not bigger 
+*             than any of the rest, then this part is NB x ceiling(K/NB), otherwise, 
+*             NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T              
+*
+*          2) The part for the very last T when T is bigger than any of the rest T. 
+*             The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB,
+*             where K = min(M,N), NX is calculated by
+*                   NX = MAX( 0, ILAENV( 3, 'SGEQRF', ' ', M, N, -1, -1 ) )
+*
+*          3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB)
+*
+*          So LWORK = part1 + part2 + part3
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+*  and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, J, K, LWKOPT, NB,
+     $                   NBMIN, NX, LBWORK, NT, LLWORK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEQR2, SLARFB, SLARFT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      REAL               SCEIL
+      EXTERNAL           ILAENV, SCEIL
+*     ..
+*     .. Executable Statements ..
+
+      INFO = 0
+      NBMIN = 2
+      NX = 0
+      IWS = N
+      K = MIN( M, N )
+      NB = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
+
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'SGEQRF', ' ', M, N, -1, -1 ) )
+      END IF
+*
+*     Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.:
+*
+*            NB=3     2NB=6       K=10
+*            |        |           |    
+*      1--2--3--4--5--6--7--8--9--10
+*                  |     \________/
+*               K-NX=5      NT=4
+*
+*     So here 4 x 4 is the last T stored in the workspace
+*
+      NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
+
+*
+*     optimal workspace = space for dlarfb + space for normal T's + space for the last T
+*
+      LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
+      LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
+
+      IF ( NT.GT.NB ) THEN
+
+          LBWORK = K-NT 
+*
+*         Optimal workspace for dlarfb = MAX(1,N)*NT
+*
+          LWKOPT = (LBWORK+LLWORK)*NB
+          WORK( 1 ) = (LWKOPT+NT*NT)
+
+      ELSE
+
+          LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
+          LWKOPT = (LBWORK+LLWORK-NB)*NB 
+          WORK( 1 ) = LWKOPT
+
+      END IF
+
+*
+*     Test the input arguments
+*
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGEQRF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            IF ( NT.LE.NB ) THEN
+                IWS = (LBWORK+LLWORK-NB)*NB
+            ELSE
+                IWS = (LBWORK+LLWORK)*NB+NT*NT
+            END IF
+
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               IF ( NT.LE.NB ) THEN
+                    NB = LWORK / (LLWORK+(LBWORK-NB))
+               ELSE
+                    NB = (LWORK-NT*NT)/(LBWORK+LLWORK)
+               END IF
+
+               NBMIN = MAX( 2, ILAENV( 2, 'SGEQRF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially
+*
+         DO 10 I = 1, K - NX, NB
+            IB = MIN( K-I+1, NB )
+*
+*           Update the current column using old T's
+*
+            DO 20 J = 1, I - NB, NB
+*
+*              Apply H' to A(J:M,I:I+IB-1) from the left
+*
+               CALL SLARFB( 'Left', 'Transpose', 'Forward',
+     $                      'Columnwise', M-J+1, IB, NB,
+     $                      A( J, J ), LDA, WORK(J), LBWORK, 
+     $                      A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                      IB)
+
+20          CONTINUE   
+*
+*           Compute the QR factorization of the current block
+*           A(I:M,I:I+IB-1)
+*
+            CALL SGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), 
+     $                        WORK(LBWORK*NB+NT*NT+1), IINFO )
+
+            IF( I+IB.LE.N ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL SLARFT( 'Forward', 'Columnwise', M-I+1, IB,
+     $                      A( I, I ), LDA, TAU( I ), 
+     $                      WORK(I), LBWORK )
+*
+            END IF
+   10    CONTINUE
+      ELSE
+         I = 1
+      END IF
+*
+*     Use unblocked code to factor the last or only block.
+*
+      IF( I.LE.K ) THEN
+         
+         IF ( I .NE. 1 )   THEN
+
+             DO 30 J = 1, I - NB, NB
+*
+*                Apply H' to A(J:M,I:K) from the left
+*
+                 CALL SLARFB( 'Left', 'Transpose', 'Forward',
+     $                       'Columnwise', M-J+1, K-I+1, NB,
+     $                       A( J, J ), LDA, WORK(J), LBWORK, 
+     $                       A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                       K-I+1)
+30           CONTINUE   
+
+             CALL SGEQR2( M-I+1, K-I+1, A( I, I ), LDA, TAU( I ), 
+     $                   WORK(LBWORK*NB+NT*NT+1),IINFO )
+
+         ELSE
+*
+*        Use unblocked code to factor the last or only block.
+*
+         CALL SGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), 
+     $               WORK,IINFO )
+
+         END IF
+      END IF
+
+
+*
+*     Apply update to the column M+1:N when N > M
+*
+      IF ( M.LT.N .AND. I.NE.1) THEN
+*
+*         Form the last triangular factor of the block reflector
+*         H = H(i) H(i+1) . . . H(i+ib-1)
+*
+          IF ( NT .LE. NB ) THEN
+               CALL SLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
+     $                     A( I, I ), LDA, TAU( I ), WORK(I), LBWORK )
+          ELSE
+               CALL SLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
+     $                     A( I, I ), LDA, TAU( I ), 
+     $                     WORK(LBWORK*NB+1), NT )
+          END IF
+
+*
+*         Apply H' to A(1:M,M+1:N) from the left
+*
+          DO 40 J = 1, K-NX, NB
+
+               IB = MIN( K-J+1, NB )
+
+               CALL SLARFB( 'Left', 'Transpose', 'Forward',
+     $                     'Columnwise', M-J+1, N-M, IB,
+     $                     A( J, J ), LDA, WORK(J), LBWORK, 
+     $                     A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                     N-M)
+
+40       CONTINUE   
+         
+         IF ( NT.LE.NB ) THEN
+             CALL SLARFB( 'Left', 'Transpose', 'Forward',
+     $                   'Columnwise', M-J+1, N-M, K-J+1,
+     $                   A( J, J ), LDA, WORK(J), LBWORK, 
+     $                   A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                   N-M)
+         ELSE 
+             CALL SLARFB( 'Left', 'Transpose', 'Forward',
+     $                   'Columnwise', M-J+1, N-M, K-J+1,
+     $                   A( J, J ), LDA, 
+     $                   WORK(LBWORK*NB+1), 
+     $                   NT, A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                   N-M)
+         END IF
+          
+      END IF
+
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of SGEQRF
+*
+      END
diff --git a/SRC/VARIANTS/qr/LL/zgeqrf.f b/SRC/VARIANTS/qr/LL/zgeqrf.f
new file mode 100644
index 00000000..cf5e093e
--- /dev/null
+++ b/SRC/VARIANTS/qr/LL/zgeqrf.f
@@ -0,0 +1,343 @@
+      SUBROUTINE ZGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     March 2008
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEQRF computes a QR factorization of a real M-by-N matrix A:
+*  A = Q * R.
+*
+*  This is the left-looking Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+*          upper triangular if m >= n); the elements below the diagonal,
+*          with the array TAU, represent the orthogonal matrix Q as a
+*          product of min(m,n) elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX*16 array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*
+*          The dimension of the array WORK. The dimension can be divided into three parts.
+*
+*          1) The part for the triangular factor T. If the very last T is not bigger 
+*             than any of the rest, then this part is NB x ceiling(K/NB), otherwise, 
+*             NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T              
+*
+*          2) The part for the very last T when T is bigger than any of the rest T. 
+*             The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB,
+*             where K = min(M,N), NX is calculated by
+*                   NX = MAX( 0, ILAENV( 3, 'ZGEQRF', ' ', M, N, -1, -1 ) )
+*
+*          3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB)
+*
+*          So LWORK = part1 + part2 + part3
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+*  and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, J, K, LWKOPT, NB,
+     $                   NBMIN, NX, LBWORK, NT, LLWORK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZGEQR2, ZLARFB, ZLARFT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      REAL               SCEIL
+      EXTERNAL           ILAENV, SCEIL
+*     ..
+*     .. Executable Statements ..
+
+      INFO = 0
+      NBMIN = 2
+      NX = 0
+      IWS = N
+      K = MIN( M, N )
+      NB = ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 )
+
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'ZGEQRF', ' ', M, N, -1, -1 ) )
+      END IF
+*
+*     Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.:
+*
+*            NB=3     2NB=6       K=10
+*            |        |           |    
+*      1--2--3--4--5--6--7--8--9--10
+*                  |     \________/
+*               K-NX=5      NT=4
+*
+*     So here 4 x 4 is the last T stored in the workspace
+*
+      NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
+
+*
+*     optimal workspace = space for dlarfb + space for normal T's + space for the last T
+*
+      LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
+      LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
+
+      IF ( NT.GT.NB ) THEN
+
+          LBWORK = K-NT 
+*
+*         Optimal workspace for dlarfb = MAX(1,N)*NT
+*
+          LWKOPT = (LBWORK+LLWORK)*NB
+          WORK( 1 ) = (LWKOPT+NT*NT)
+
+      ELSE
+
+          LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
+          LWKOPT = (LBWORK+LLWORK-NB)*NB 
+          WORK( 1 ) = LWKOPT
+
+      END IF
+
+*
+*     Test the input arguments
+*
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGEQRF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            IF ( NT.LE.NB ) THEN
+                IWS = (LBWORK+LLWORK-NB)*NB
+            ELSE
+                IWS = (LBWORK+LLWORK)*NB+NT*NT
+            END IF
+
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               IF ( NT.LE.NB ) THEN
+                    NB = LWORK / (LLWORK+(LBWORK-NB))
+               ELSE
+                    NB = (LWORK-NT*NT)/(LBWORK+LLWORK)
+               END IF
+
+               NBMIN = MAX( 2, ILAENV( 2, 'ZGEQRF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially
+*
+         DO 10 I = 1, K - NX, NB
+            IB = MIN( K-I+1, NB )
+*
+*           Update the current column using old T's
+*
+            DO 20 J = 1, I - NB, NB
+*
+*              Apply H' to A(J:M,I:I+IB-1) from the left
+*
+               CALL ZLARFB( 'Left', 'Transpose', 'Forward',
+     $                      'Columnwise', M-J+1, IB, NB,
+     $                      A( J, J ), LDA, WORK(J), LBWORK, 
+     $                      A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                      IB)
+
+20          CONTINUE   
+*
+*           Compute the QR factorization of the current block
+*           A(I:M,I:I+IB-1)
+*
+            CALL ZGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), 
+     $                        WORK(LBWORK*NB+NT*NT+1), IINFO )
+
+            IF( I+IB.LE.N ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, IB,
+     $                      A( I, I ), LDA, TAU( I ), 
+     $                      WORK(I), LBWORK )
+*
+            END IF
+   10    CONTINUE
+      ELSE
+         I = 1
+      END IF
+*
+*     Use unblocked code to factor the last or only block.
+*
+      IF( I.LE.K ) THEN
+         
+         IF ( I .NE. 1 )   THEN
+
+             DO 30 J = 1, I - NB, NB
+*
+*                Apply H' to A(J:M,I:K) from the left
+*
+                 CALL ZLARFB( 'Left', 'Transpose', 'Forward',
+     $                       'Columnwise', M-J+1, K-I+1, NB,
+     $                       A( J, J ), LDA, WORK(J), LBWORK, 
+     $                       A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                       K-I+1)
+30           CONTINUE   
+
+             CALL ZGEQR2( M-I+1, K-I+1, A( I, I ), LDA, TAU( I ), 
+     $                   WORK(LBWORK*NB+NT*NT+1),IINFO )
+
+         ELSE
+*
+*        Use unblocked code to factor the last or only block.
+*
+         CALL ZGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), 
+     $               WORK,IINFO )
+
+         END IF
+      END IF
+
+
+*
+*     Apply update to the column M+1:N when N > M
+*
+      IF ( M.LT.N .AND. I.NE.1) THEN
+*
+*         Form the last triangular factor of the block reflector
+*         H = H(i) H(i+1) . . . H(i+ib-1)
+*
+          IF ( NT .LE. NB ) THEN
+               CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
+     $                     A( I, I ), LDA, TAU( I ), WORK(I), LBWORK )
+          ELSE
+               CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
+     $                     A( I, I ), LDA, TAU( I ), 
+     $                     WORK(LBWORK*NB+1), NT )
+          END IF
+
+*
+*         Apply H' to A(1:M,M+1:N) from the left
+*
+          DO 40 J = 1, K-NX, NB
+
+               IB = MIN( K-J+1, NB )
+
+               CALL ZLARFB( 'Left', 'Transpose', 'Forward',
+     $                     'Columnwise', M-J+1, N-M, IB,
+     $                     A( J, J ), LDA, WORK(J), LBWORK, 
+     $                     A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                     N-M)
+
+40       CONTINUE   
+         
+         IF ( NT.LE.NB ) THEN
+             CALL ZLARFB( 'Left', 'Transpose', 'Forward',
+     $                   'Columnwise', M-J+1, N-M, K-J+1,
+     $                   A( J, J ), LDA, WORK(J), LBWORK, 
+     $                   A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                   N-M)
+         ELSE 
+             CALL ZLARFB( 'Left', 'Transpose', 'Forward',
+     $                   'Columnwise', M-J+1, N-M, K-J+1,
+     $                   A( J, J ), LDA, 
+     $                   WORK(LBWORK*NB+1), 
+     $                   NT, A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
+     $                   N-M)
+         END IF
+          
+      END IF
+
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of ZGEQRF
+*
+      END
author	jason <jason@8a072113-8704-0410-8d35-dd094bca7971>	2008-10-28 01:38:50 +0000
committer	jason <jason@8a072113-8704-0410-8d35-dd094bca7971>	2008-10-28 01:38:50 +0000
commit	baba851215b44ac3b60b9248eb02bcce7eb76247 (patch)
tree	8c0f5c006875532a30d4409f5e94b0f310ff00a7 /SRC/VARIANTS