diff options
author | julielangou <julie@cs.utk.edu> | 2016-11-19 14:40:33 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-11-19 14:40:33 -0800 |
commit | 01cdfedf1d726a003f7a4e7331f32a7e434f1707 (patch) | |
tree | 4a9ff4318f5a1d66a4dc2690d9dc5ccb242981a3 | |
parent | ead2c73f1a6dad1342bf32987c0b2f2eaf61f18a (diff) | |
parent | ae36f785d68d637ef0a60f9028c38eb2d369d9f1 (diff) |
Merge pull request #83 from haidarazzam/master
adding the 2stage symmetric eigenvalue routines drivers checking
83 files changed, 53949 insertions, 84 deletions
diff --git a/SRC/Makefile b/SRC/Makefile index 01cf7021..c521d7f6 100644 --- a/SRC/Makefile +++ b/SRC/Makefile @@ -56,7 +56,7 @@ include ../make.inc # ####################################################################### -ALLAUX = ilaenv.o ieeeck.o lsamen.o xerbla.o xerbla_array.o iparmq.o \ +ALLAUX = ilaenv.o ieeeck.o lsamen.o xerbla.o xerbla_array.o iparmq.o iparam2stage.o \ ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \ ../INSTALL/ilaver.o ../INSTALL/lsame.o ../INSTALL/slamch.o @@ -120,7 +120,7 @@ SLASRC = \ slaqgb.o slaqge.o slaqp2.o slaqps.o slaqsb.o slaqsp.o slaqsy.o \ slaqr0.o slaqr1.o slaqr2.o slaqr3.o slaqr4.o slaqr5.o \ slaqtr.o slar1v.o slar2v.o ilaslr.o ilaslc.o \ - slarf.o slarfb.o slarfg.o slarfgp.o slarft.o slarfx.o slargv.o \ + slarf.o slarfb.o slarfg.o slarfgp.o slarft.o slarfx.o slarfy.o slargv.o \ slarrv.o slartv.o \ slarz.o slarzb.o slarzt.o slaswp.o slasy2.o slasyf.o slasyf_rook.o \ slasyf_rk.o \ @@ -167,7 +167,10 @@ SLASRC = \ sgelqt.o sgelqt3.o sgemlqt.o \ sgetsls.o sgeqr.o slatsqr.o slamtsqr.o sgemqr.o \ sgelq.o slaswlq.o slamswlq.o sgemlq.o \ - stplqt.o stplqt2.o stpmlqt.o + stplqt.o stplqt2.o stpmlqt.o \ + ssytrd_2stage.o ssytrd_sy2sb.o ssytrd_sb2st.o ssb2st_kernels.o \ + ssyevd_2stage.o ssyev_2stage.o ssyevx_2stage.o ssyevr_2stage.o \ + ssbev_2stage.o ssbevx_2stage.o ssbevd_2stage.o ssygv_2stage.o DSLASRC = spotrs.o sgetrs.o spotrf.o sgetrf.o @@ -224,7 +227,7 @@ CLASRC = \ claqr0.o claqr1.o claqr2.o claqr3.o claqr4.o claqr5.o \ claqsp.o claqsy.o clar1v.o clar2v.o ilaclr.o ilaclc.o \ clarf.o clarfb.o clarfg.o clarft.o clarfgp.o \ - clarfx.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \ + clarfx.o clarfy.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \ clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \ claswp.o clasyf.o clasyf_rook.o clasyf_rk.o \ clatbs.o clatdf.o clatps.o clatrd.o clatrs.o clatrz.o \ @@ -263,7 +266,10 @@ CLASRC = \ cgelqt.o cgelqt3.o cgemlqt.o \ cgetsls.o cgeqr.o clatsqr.o clamtsqr.o cgemqr.o \ cgelq.o claswlq.o clamswlq.o cgemlq.o \ - ctplqt.o ctplqt2.o ctpmlqt.o + ctplqt.o ctplqt2.o ctpmlqt.o \ + chetrd_2stage.o chetrd_he2hb.o chetrd_hb2st.o chb2st_kernels.o \ + cheevd_2stage.o cheev_2stage.o cheevx_2stage.o cheevr_2stage.o \ + chbev_2stage.o chbevx_2stage.o chbevd_2stage.o chegv_2stage.o ifdef USEXBLAS CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \ @@ -306,7 +312,7 @@ DLASRC = \ dlaqgb.o dlaqge.o dlaqp2.o dlaqps.o dlaqsb.o dlaqsp.o dlaqsy.o \ dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \ dlaqtr.o dlar1v.o dlar2v.o iladlr.o iladlc.o \ - dlarf.o dlarfb.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o \ + dlarf.o dlarfb.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o dlarfy.o \ dlargv.o dlarrv.o dlartv.o \ dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o \ dlasyf.o dlasyf_rook.o dlasyf_rk.o \ @@ -355,7 +361,10 @@ DLASRC = \ dgelqt.o dgelqt3.o dgemlqt.o \ dgetsls.o dgeqr.o dlatsqr.o dlamtsqr.o dgemqr.o \ dgelq.o dlaswlq.o dlamswlq.o dgemlq.o \ - dtplqt.o dtplqt2.o dtpmlqt.o + dtplqt.o dtplqt2.o dtpmlqt.o \ + dsytrd_2stage.o dsytrd_sy2sb.o dsytrd_sb2st.o dsb2st_kernels.o \ + dsyevd_2stage.o dsyev_2stage.o dsyevx_2stage.o dsyevr_2stage.o \ + dsbev_2stage.o dsbevx_2stage.o dsbevd_2stage.o dsygv_2stage.o ifdef USEXBLAS DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \ @@ -413,7 +422,7 @@ ZLASRC = \ zlaqsp.o zlaqsy.o zlar1v.o zlar2v.o ilazlr.o ilazlc.o \ zlarcm.o zlarf.o zlarfb.o \ zlarfg.o zlarft.o zlarfgp.o \ - zlarfx.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \ + zlarfx.o zlarfy.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \ zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \ zlassq.o zlaswp.o zlasyf.o zlasyf_rook.o zlasyf_rk.o \ zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlauu2.o \ @@ -455,7 +464,10 @@ ZLASRC = \ zgelqt.o zgelqt3.o zgemlqt.o \ zgetsls.o zgeqr.o zlatsqr.o zlamtsqr.o zgemqr.o \ zgelq.o zlaswlq.o zlamswlq.o zgemlq.o \ - ztplqt.o ztplqt2.o ztpmlqt.o + ztplqt.o ztplqt2.o ztpmlqt.o \ + zhetrd_2stage.o zhetrd_he2hb.o zhetrd_hb2st.o zhb2st_kernels.o \ + zheevd_2stage.o zheev_2stage.o zheevx_2stage.o zheevr_2stage.o \ + zhbev_2stage.o zhbevx_2stage.o zhbevd_2stage.o zhegv_2stage.o ifdef USEXBLAS ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \ diff --git a/SRC/chb2st_kernels.f b/SRC/chb2st_kernels.f new file mode 100644 index 00000000..8b0a4b28 --- /dev/null +++ b/SRC/chb2st_kernels.f @@ -0,0 +1,320 @@ +*> \brief \b CHB2ST_KERNELS +* +* @generated from zhb2st_kernels.f, fortran z -> c, Sun Nov 6 19:34:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHB2ST_KERNELS + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chb2st_kernels.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chb2st_kernels.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chb2st_kernels.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHB2ST_KERNELS( UPLO, WANTZ, TTYPE, +* ST, ED, SWEEP, N, NB, IB, +* A, LDA, V, TAU, LDVT, WORK) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* LOGICAL WANTZ +* INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), V( * ), +* TAU( * ), WORK( * ) +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHB2ST_KERNELS is an internal routine used by the CHETRD_HB2ST +*> subroutine. +*> \endverbatim +* +* Arguments: +* ========== +* +*> @param[in] n +*> The order of the matrix A. +*> +*> @param[in] nb +*> The size of the band. +*> +*> @param[in, out] A +*> A pointer to the matrix A. +*> +*> @param[in] lda +*> The leading dimension of the matrix A. +*> +*> @param[out] V +*> COMPLEX array, dimension 2*n if eigenvalues only are +*> requested or to be queried for vectors. +*> +*> @param[out] TAU +*> COMPLEX array, dimension (2*n). +*> The scalar factors of the Householder reflectors are stored +*> in this array. +*> +*> @param[in] st +*> internal parameter for indices. +*> +*> @param[in] ed +*> internal parameter for indices. +*> +*> @param[in] sweep +*> internal parameter for indices. +*> +*> @param[in] Vblksiz +*> internal parameter for indices. +*> +*> @param[in] wantz +*> logical which indicate if Eigenvalue are requested or both +*> Eigenvalue/Eigenvectors. +*> +*> @param[in] work +*> Workspace of size nb. +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CHB2ST_KERNELS( UPLO, WANTZ, TTYPE, + $ ST, ED, SWEEP, N, NB, IB, + $ A, LDA, V, TAU, LDVT, WORK) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + LOGICAL WANTZ + INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), V( * ), + $ TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS, + $ DPOS, OFDPOS, AJETER + COMPLEX CTMP +* .. +* .. External Subroutines .. + EXTERNAL CLARFG, CLARFX, CLARFY +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MOD +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. +* .. Executable Statements .. +* + AJETER = IB + LDVT + UPPER = LSAME( UPLO, 'U' ) + + IF( UPPER ) THEN + DPOS = 2 * NB + 1 + OFDPOS = 2 * NB + ELSE + DPOS = 1 + OFDPOS = 2 + ENDIF + +* +* Upper case +* + IF( UPPER ) THEN +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ENDIF + GO TO ( 101, 102, 103 ) TTYPE +* + 101 CONTINUE + LM = ED - ST + 1 +* + V( VPOS ) = ONE + DO 10 I = 1, LM-1 + V( VPOS+I ) = CONJG( A( OFDPOS-I, ST+I ) ) + A( OFDPOS-I, ST+I ) = ZERO + 10 CONTINUE + CTMP = CONJG( A( OFDPOS, ST ) ) + CALL CLARFG( LM, CTMP, V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) + A( OFDPOS, ST ) = CTMP +* + 103 CONTINUE + LM = ED - ST + 1 + CALL CLARFY( UPLO, LM, V( VPOS ), 1, CONJG( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + GOTO 300 +* + 102 CONTINUE + J1 = ED+1 + J2 = MIN( ED+NB, N ) + LN = ED-ST+1 + LM = J2-J1+1 + IF( LM.GT.0) THEN + CALL CLARFX( 'Left', LN, LM, V( VPOS ), + $ CONJG( TAU( TAUPOS ) ), A( DPOS-NB, J1 ), + $ LDA-1, WORK) +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ENDIF +* + V( VPOS ) = ONE + DO 30 I = 1, LM-1 + V( VPOS+I ) = CONJG( A( DPOS-NB-I, J1+I ) ) + A( DPOS-NB-I, J1+I ) = ZERO + 30 CONTINUE + CTMP = CONJG( A( DPOS-NB, J1 ) ) + CALL CLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) ) + A( DPOS-NB, J1 ) = CTMP +* + CALL CLARFX( 'Right', LN-1, LM, V( VPOS ), + $ TAU( TAUPOS ), + $ A( DPOS-NB+1, J1 ), LDA-1, WORK) + ENDIF + GOTO 300 +* +* Lower case +* + ELSE +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ENDIF + GO TO ( 201, 202, 203 ) TTYPE +* + 201 CONTINUE + LM = ED - ST + 1 +* + V( VPOS ) = ONE + DO 20 I = 1, LM-1 + V( VPOS+I ) = A( OFDPOS+I, ST-1 ) + A( OFDPOS+I, ST-1 ) = ZERO + 20 CONTINUE + CALL CLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) +* + 203 CONTINUE + LM = ED - ST + 1 +* + CALL CLARFY( UPLO, LM, V( VPOS ), 1, CONJG( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + + GOTO 300 +* + 202 CONTINUE + J1 = ED+1 + J2 = MIN( ED+NB, N ) + LN = ED-ST+1 + LM = J2-J1+1 +* + IF( LM.GT.0) THEN + CALL CLARFX( 'Right', LM, LN, V( VPOS ), + $ TAU( TAUPOS ), A( DPOS+NB, ST ), + $ LDA-1, WORK) +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ENDIF +* + V( VPOS ) = ONE + DO 40 I = 1, LM-1 + V( VPOS+I ) = A( DPOS+NB+I, ST ) + A( DPOS+NB+I, ST ) = ZERO + 40 CONTINUE + CALL CLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) +* + CALL CLARFX( 'Left', LM, LN-1, V( VPOS ), + $ CONJG( TAU( TAUPOS ) ), + $ A( DPOS+NB-1, ST+1 ), LDA-1, WORK) + + ENDIF + GOTO 300 + ENDIF + + 300 CONTINUE + RETURN +* +* END OF CHB2ST_KERNELS +* + END diff --git a/SRC/chbev_2stage.f b/SRC/chbev_2stage.f new file mode 100644 index 00000000..182d3d93 --- /dev/null +++ b/SRC/chbev_2stage.f @@ -0,0 +1,386 @@ +*> \brief <b> CHBEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices</b> +* +* @generated from zhbev_2stage.f, fortran z -> c, Sat Nov 5 23:18:20 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHBEV_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chbev_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chbev_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chbev_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, +* WORK, LWORK, RWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, N, LWORK +* .. +* .. Array Arguments .. +* REAL RWORK( * ), W( * ) +* COMPLEX AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHBEV_2STAGE computes all the eigenvalues and, optionally, eigenvectors of +*> a complex Hermitian band matrix A using the 2stage technique for +*> the reduction to tridiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension LWORK +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (max(1,3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complexOTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, + $ WORK, LWORK, RWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, N, LWORK +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, WANTZ, LQUERY + INTEGER IINFO, IMAX, INDE, INDWRK, INDRWK, ISCALE, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, CLANHB + EXTERNAL LSAME, SLAMCH, CLANHB, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSTERF, XERBLA, CLASCL, CSTEQR + $ CHETRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + IB = ILAENV( 18, 'CHETRD_HB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'CHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'CHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + LWMIN = LHTRD + LWTRD + WORK( 1 ) = LWMIN + ENDIF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -11 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBEV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( LOWER ) THEN + W( 1 ) = REAL( AB( 1, 1 ) ) + ELSE + W( 1 ) = REAL( AB( KD+1, 1 ) ) + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL CLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL CLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call CHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. +* + INDE = 1 + INDHOUS = 1 + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL CHETRD_HB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, + $ RWORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + INDRWK = INDE + N + CALL CSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of CHBEV_2STAGE +* + END diff --git a/SRC/chbevd_2stage.f b/SRC/chbevd_2stage.f new file mode 100644 index 00000000..89c118d3 --- /dev/null +++ b/SRC/chbevd_2stage.f @@ -0,0 +1,458 @@ +*> \brief <b> CHBEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices</b> +* +* @generated from zhbevd_2stage.f, fortran z -> c, Sat Nov 5 23:18:17 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHBEVD_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chbevd_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chbevd_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chbevd_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, +* WORK, LWORK, RWORK, LRWORK, IWORK, +* LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHBEVD_2STAGE computes all the eigenvalues and, optionally, eigenvectors of +*> a complex Hermitian band matrix A using the 2stage technique for +*> the reduction to tridiagonal. If eigenvectors are desired, it +*> uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, +*> dimension (LRWORK) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of array RWORK. +*> If N <= 1, LRWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LRWORK must be at least N. +*> If JOBZ = 'V' and N > 1, LRWORK must be at least +*> 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N . +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complexOTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, + $ WORK, LWORK, RWORK, LRWORK, IWORK, + $ LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDWK2, INDRWK, ISCALE, + $ LLWORK, INDWK, LHTRD, LWTRD, IB, INDHOUS, + $ LIWMIN, LLRWK, LLWK2, LRWMIN, LWMIN + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, CLANHB + EXTERNAL LSAME, SLAMCH, CLANHB, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSTERF, XERBLA, CGEMM, CLACPY, + $ CLASCL, CSTEDC, CHETRD_HB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + ELSE + IB = ILAENV( 18, 'CHETRD_HB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'CHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'CHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + IF( WANTZ ) THEN + LWMIN = 2*N**2 + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = MAX( N, LHTRD + LWTRD ) + LRWMIN = N + LIWMIN = 1 + END IF + END IF + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBEVD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = REAL( AB( 1, 1 ) ) + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL CLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL CLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call CHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. +* + INDE = 1 + INDRWK = INDE + N + LLRWK = LRWORK - INDRWK + 1 + INDHOUS = 1 + INDWK = INDHOUS + LHTRD + LLWORK = LWORK - INDWK + 1 + INDWK2 = INDWK + N*N + LLWK2 = LWORK - INDWK2 + 1 +* + CALL CHETRD_HB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, + $ RWORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ), + $ LLWK2, RWORK( INDRWK ), LLRWK, IWORK, LIWORK, + $ INFO ) + CALL CGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO, + $ WORK( INDWK2 ), N ) + CALL CLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of CHBEVD_2STAGE +* + END diff --git a/SRC/chbevx_2stage.f b/SRC/chbevx_2stage.f new file mode 100644 index 00000000..07eb6153 --- /dev/null +++ b/SRC/chbevx_2stage.f @@ -0,0 +1,646 @@ +*> \brief <b> CHBEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices</b> +* +* @generated from zhbevx_2stage.f, fortran z -> c, Sat Nov 5 23:18:22 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHBEVX_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chbevx_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chbevx_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chbevx_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, +* Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, +* Z, LDZ, WORK, LWORK, RWORK, IWORK, +* IFAIL, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N, LWORK +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX AB( LDAB, * ), Q( LDQ, * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHBEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian band matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors +*> can be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ, N) +*> If JOBZ = 'V', the N-by-N unitary matrix used in the +*> reduction to tridiagonal form. +*> If JOBZ = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If JOBZ = 'V', then +*> LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AB to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, + $ Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, + $ Z, LDZ, WORK, LWORK, RWORK, IWORK, + $ IFAIL, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N, LWORK + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AB( LDAB, * ), Q( LDQ, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, TEST, VALEIG, WANTZ, + $ LQUERY + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDWRK, ISCALE, ITMP1, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS, + $ J, JJ, NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU + COMPLEX CTMP1 +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, CLANHB + EXTERNAL LSAME, SLAMCH, CLANHB, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA, CCOPY, + $ CGEMV, CLACPY, CLASCL, CSTEIN, CSTEQR, + $ CSWAP, CHETRD_HB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + IB = ILAENV( 18, 'CHETRD_HB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'CHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'CHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + LWMIN = LHTRD + LWTRD + WORK( 1 ) = LWMIN + ENDIF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -20 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBEVX_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + M = 1 + IF( LOWER ) THEN + CTMP1 = AB( 1, 1 ) + ELSE + CTMP1 = AB( KD+1, 1 ) + END IF + TMP1 = REAL( CTMP1 ) + IF( VALEIG ) THEN + IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) ) + $ M = 0 + END IF + IF( M.EQ.1 ) THEN + W( 1 ) = REAL( CTMP1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + END IF + ANRM = CLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL CLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL CLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call CHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N +* + INDHOUS = 1 + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL CHETRD_HB2ST( 'N', JOBZ, UPLO, N, KD, AB, LDAB, + $ RWORK( INDD ), RWORK( INDE ), WORK( INDHOUS ), + $ LHTRD, WORK( INDWRK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call SSTERF or CSTEQR. If this fails for some +* eigenvalue, then try SSTEBZ. +* + TEST = .FALSE. + IF (INDEIG) THEN + IF (IL.EQ.1 .AND. IU.EQ.N) THEN + TEST = .TRUE. + END IF + END IF + IF ((ALLEIG .OR. TEST) .AND. (ABSTOL.LE.ZERO)) THEN + CALL SCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL SSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL CLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL CSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL CSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEIN. +* + DO 20 J = 1, M + CALL CCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL CGEMV( 'N', N, N, CONE, Q, LDQ, WORK, 1, CZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of CHBEVX_2STAGE +* + END diff --git a/SRC/cheev_2stage.f b/SRC/cheev_2stage.f new file mode 100644 index 00000000..b98dac76 --- /dev/null +++ b/SRC/cheev_2stage.f @@ -0,0 +1,355 @@ +*> \brief <b> CHEEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices</b> +* +* @generated from zheev_2stage.f, fortran z -> c, Sat Nov 5 23:18:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEEV_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cheev_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cheev_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cheev_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, +* RWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* REAL RWORK( * ), W( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEEV_2STAGE computes all eigenvalues and, optionally, eigenvectors of a +*> complex Hermitian matrix A using the 2stage technique for +*> the reduction to tridiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (max(1, 3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complexHEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + $ RWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, CLANHE + EXTERNAL LSAME, ILAENV, SLAMCH, CLANHE +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSTERF, XERBLA, CLASCL, CSTEQR, + $ CUNGTR, CHETRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + KD = ILAENV( 17, 'CHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'CHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEEV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = REAL( A( 1, 1 ) ) + WORK( 1 ) = 1 + IF( WANTZ ) + $ A( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHE( 'M', UPLO, N, A, LDA, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL CLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call CHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL CHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, RWORK( INDE ), + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* CUNGTR to generate the unitary matrix, then call CSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CUNGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + INDWRK = INDE + N + CALL CSTEQR( JOBZ, N, W, RWORK( INDE ), A, LDA, + $ RWORK( INDWRK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of CHEEV_2STAGE +* + END diff --git a/SRC/cheevd_2stage.f b/SRC/cheevd_2stage.f new file mode 100644 index 00000000..9d1057fc --- /dev/null +++ b/SRC/cheevd_2stage.f @@ -0,0 +1,451 @@ +*> \brief <b> CHEEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices</b> +* +* @generated from zheevd_2stage.f, fortran z -> c, Sat Nov 5 23:18:14 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEEVD_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cheevd_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cheevd_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cheevd_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, +* RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a +*> complex Hermitian matrix A using the 2stage technique for +*> the reduction to tridiagonal. If eigenvectors are desired, it uses a +*> divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N+1 +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N+1 +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2 +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, +*> dimension (LRWORK) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of the array RWORK. +*> If N <= 1, LRWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LRWORK must be at least N. +*> If JOBZ = 'V' and N > 1, LRWORK must be at least +*> 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed +*> to converge; i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> if INFO = i and JOBZ = 'V', then the algorithm failed +*> to compute an eigenvalue while working on the submatrix +*> lying in rows and columns INFO/(N+1) through +*> mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complexHEeigen +* +*> \par Further Details: +* ===================== +*> +*> Modified description of INFO. Sven, 16 Feb 05. +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + $ RWORK, LRWORK, IWORK, LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWK2, + $ INDWRK, ISCALE, LIWMIN, LLRWK, LLWORK, + $ LLWRK2, LRWMIN, LWMIN, + $ LHTRD, LWTRD, KD, IB, INDHOUS + + + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, CLANHE + EXTERNAL LSAME, ILAENV, SLAMCH, CLANHE +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSTERF, XERBLA, CLACPY, CLASCL, + $ CSTEDC, CUNMTR, CHETRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + ELSE + KD = ILAENV( 17, 'CHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'CHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + IF( WANTZ ) THEN + LWMIN = 2*N + N*N + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + 1 + LHTRD + LWTRD + LRWMIN = N + LIWMIN = 1 + END IF + END IF + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEEVD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = REAL( A( 1, 1 ) ) + IF( WANTZ ) + $ A( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHE( 'M', UPLO, N, A, LDA, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL CLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call CHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. +* + INDE = 1 + INDRWK = INDE + N + LLRWK = LRWORK - INDRWK + 1 + INDTAU = 1 + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 +* + CALL CHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, RWORK( INDE ), + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* CSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call CUNMTR to multiply it to the +* Householder transformations represented as Householder vectors in +* A. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CSTEDC( 'I', N, W, RWORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, RWORK( INDRWK ), LLRWK, + $ IWORK, LIWORK, INFO ) + CALL CUNMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ), + $ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO ) + CALL CLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of CHEEVD_2STAGE +* + END diff --git a/SRC/cheevr_2stage.f b/SRC/cheevr_2stage.f new file mode 100644 index 00000000..23a98389 --- /dev/null +++ b/SRC/cheevr_2stage.f @@ -0,0 +1,779 @@ +*> \brief <b> CHEEVR_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices</b> +* +* @generated from zheevr_2stage.f, fortran z -> c, Sat Nov 5 23:18:11 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEEVR_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cheevr_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cheevr_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cheevr_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, +* WORK, LWORK, RWORK, LRWORK, IWORK, +* LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK, +* $ M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEEVR_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can +*> be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> +*> CHEEVR_2STAGE first reduces the matrix A to tridiagonal form T with a call +*> to CHETRD. Then, whenever possible, CHEEVR_2STAGE calls CSTEMR to compute +*> eigenspectrum using Relatively Robust Representations. CSTEMR +*> computes eigenvalues by the dqds algorithm, while orthogonal +*> eigenvectors are computed from various "good" L D L^T representations +*> (also known as Relatively Robust Representations). Gram-Schmidt +*> orthogonalization is avoided as far as possible. More specifically, +*> the various steps of the algorithm are as follows. +*> +*> For each unreduced block (submatrix) of T, +*> (a) Compute T - sigma I = L D L^T, so that L and D +*> define all the wanted eigenvalues to high relative accuracy. +*> This means that small relative changes in the entries of D and L +*> cause only small relative changes in the eigenvalues and +*> eigenvectors. The standard (unfactored) representation of the +*> tridiagonal matrix T does not have this property in general. +*> (b) Compute the eigenvalues to suitable accuracy. +*> If the eigenvectors are desired, the algorithm attains full +*> accuracy of the computed eigenvalues only right before +*> the corresponding vectors have to be computed, see steps c) and d). +*> (c) For each cluster of close eigenvalues, select a new +*> shift close to the cluster, find a new factorization, and refine +*> the shifted eigenvalues to suitable accuracy. +*> (d) For each eigenvalue with a large enough relative separation compute +*> the corresponding eigenvector by forming a rank revealing twisted +*> factorization. Go back to (c) for any clusters that remain. +*> +*> The desired accuracy of the output can be specified by the input +*> parameter ABSTOL. +*> +*> For more details, see DSTEMR's documentation and: +*> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations +*> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," +*> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. +*> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and +*> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, +*> 2004. Also LAPACK Working Note 154. +*> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric +*> tridiagonal eigenvalue/eigenvector problem", +*> Computer Science Division Technical Report No. UCB/CSD-97-971, +*> UC Berkeley, May 1997. +*> +*> +*> Note 1 : CHEEVR_2STAGE calls CSTEMR when the full spectrum is requested +*> on machines which conform to the ieee-754 floating point standard. +*> CHEEVR_2STAGE calls SSTEBZ and CSTEIN on non-ieee machines and +*> when partial spectrum requests are made. +*> +*> Normal execution of CSTEMR may create NaNs and infinities and +*> hence may abort due to a floating point exception in environments +*> which do not handle NaNs and infinities in the ieee standard default +*> manner. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> For RANGE = 'V' or 'I' and IU - IL < N - 1, SSTEBZ and +*> CSTEIN are called +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> +*> If high relative accuracy is important, set ABSTOL to +*> SLAMCH( 'Safe minimum' ). Doing so will guarantee that +*> eigenvalues are computed to high relative accuracy when +*> possible in future releases. The current code does not +*> make any guarantees about high relative accuracy, but +*> furutre releases will. See J. Barlow and J. Demmel, +*> "Computing Accurate Eigensystems of Scaled Diagonally +*> Dominant Matrices", LAPACK Working Note #7, for a discussion +*> of which matrices define their eigenvalues to high relative +*> accuracy. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is an output of CSTEMR (tridiagonal +*> matrix). The support of the eigenvectors of A is typically +*> 1:N because of the unitary transformations applied by CUNMTR. +*> Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 26*N, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO = 0, RWORK(1) returns the optimal +*> (and minimal) LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The length of the array RWORK. LRWORK >= max(1,24*N). +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal +*> (and minimal) LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N). +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: Internal error +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexHEeigen +* +*> \par Contributors: +* ================== +*> +*> Inderjit Dhillon, IBM Almaden, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Ken Stanley, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Jason Riedy, Computer Science Division, University of +*> California at Berkeley, USA \n +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHEEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, + $ WORK, LWORK, RWORK, LRWORK, IWORK, + $ LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK, + $ M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ, TRYRAC + CHARACTER ORDER + INTEGER I, IEEEOK, IINFO, IMAX, INDIBL, INDIFL, INDISP, + $ INDIWO, INDRD, INDRDD, INDRE, INDREE, INDRWK, + $ INDTAU, INDWK, INDWKN, ISCALE, ITMP1, J, JJ, + $ LIWMIN, LLWORK, LLRWORK, LLWRKN, LRWMIN, + $ LWMIN, NSPLIT, LHTRD, LWTRD, KD, IB, INDHOUS + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, CLANSY + EXTERNAL LSAME, ILAENV, SLAMCH, CLANSY +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA, CSSCAL, + $ CHETRD_2STAGE, CSTEMR, CSTEIN, CSWAP, CUNMTR +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'CHEEVR', 'N', 1, 2, 3, 4 ) +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LRWORK.EQ.-1 ) .OR. + $ ( LIWORK.EQ.-1 ) ) +* + KD = ILAENV( 17, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + LRWMIN = MAX( 1, 24*N ) + LIWMIN = MAX( 1, 10*N ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -22 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEEVR_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 2 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = REAL( A( 1, 1 ) ) + ELSE + IF( VL.LT.REAL( A( 1, 1 ) ) .AND. VU.GE.REAL( A( 1, 1 ) ) ) + $ THEN + M = 1 + W( 1 ) = REAL( A( 1, 1 ) ) + END IF + END IF + IF( WANTZ ) THEN + Z( 1, 1 ) = ONE + ISUPPZ( 1 ) = 1 + ISUPPZ( 2 ) = 1 + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF (VALEIG) THEN + VLL = VL + VUU = VU + END IF + ANRM = CLANSY( 'M', UPLO, N, A, LDA, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL CSSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL CSSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF + +* Initialize indices into workspaces. Note: The IWORK indices are +* used only if SSTERF or CSTEMR fail. + +* WORK(INDTAU:INDTAU+N-1) stores the complex scalar factors of the +* elementary reflectors used in CHETRD. + INDTAU = 1 +* INDWK is the starting offset of the remaining complex workspace, +* and LLWORK is the remaining complex workspace size. + INDHOUS = INDTAU + N + INDWK = INDHOUS + LHTRD + LLWORK = LWORK - INDWK + 1 + +* RWORK(INDRD:INDRD+N-1) stores the real tridiagonal's diagonal +* entries. + INDRD = 1 +* RWORK(INDRE:INDRE+N-1) stores the off-diagonal entries of the +* tridiagonal matrix from CHETRD. + INDRE = INDRD + N +* RWORK(INDRDD:INDRDD+N-1) is a copy of the diagonal entries over +* -written by CSTEMR (the SSTERF path copies the diagonal to W). + INDRDD = INDRE + N +* RWORK(INDREE:INDREE+N-1) is a copy of the off-diagonal entries over +* -written while computing the eigenvalues in SSTERF and CSTEMR. + INDREE = INDRDD + N +* INDRWK is the starting offset of the left-over real workspace, and +* LLRWORK is the remaining workspace size. + INDRWK = INDREE + N + LLRWORK = LRWORK - INDRWK + 1 + +* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in SSTEBZ and +* stores the block indices of each of the M<=N eigenvalues. + INDIBL = 1 +* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in SSTEBZ and +* stores the starting and finishing indices of each block. + INDISP = INDIBL + N +* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors +* that corresponding to eigenvectors that fail to converge in +* CSTEIN. This information is discarded; if any fail, the driver +* returns INFO > 0. + INDIFL = INDISP + N +* INDIWO is the offset of the remaining integer workspace. + INDIWO = INDIFL + N + +* +* Call CHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. +* + CALL CHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, RWORK( INDRD ), + $ RWORK( INDRE ), WORK( INDTAU ), + $ WORK( INDHOUS ), LHTRD, + $ WORK( INDWK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired +* then call SSTERF or CSTEMR and CUNMTR. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG.OR.TEST ) .AND. ( IEEEOK.EQ.1 ) ) THEN + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N, RWORK( INDRD ), 1, W, 1 ) + CALL SCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 ) + CALL SSTERF( N, W, RWORK( INDREE ), INFO ) + ELSE + CALL SCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 ) + CALL SCOPY( N, RWORK( INDRD ), 1, RWORK( INDRDD ), 1 ) +* + IF (ABSTOL .LE. TWO*N*EPS) THEN + TRYRAC = .TRUE. + ELSE + TRYRAC = .FALSE. + END IF + CALL CSTEMR( JOBZ, 'A', N, RWORK( INDRDD ), + $ RWORK( INDREE ), VL, VU, IL, IU, M, W, + $ Z, LDZ, N, ISUPPZ, TRYRAC, + $ RWORK( INDRWK ), LLRWORK, + $ IWORK, LIWORK, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEMR. +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN + INDWKN = INDWK + LLWRKN = LWORK - INDWKN + 1 + CALL CUNMTR( 'L', UPLO, 'N', N, M, A, LDA, + $ WORK( INDTAU ), Z, LDZ, WORK( INDWKN ), + $ LLWRKN, IINFO ) + END IF + END IF +* +* + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. +* Also call SSTEBZ and CSTEIN if CSTEMR fails. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDRD ), RWORK( INDRE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL CSTEIN( N, RWORK( INDRD ), RWORK( INDRE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEIN. +* + INDWKN = INDWK + LLWRKN = LWORK - INDWKN + 1 + CALL CUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of CHEEVR_2STAGE +* + END diff --git a/SRC/cheevx_2stage.f b/SRC/cheevx_2stage.f new file mode 100644 index 00000000..84ae438d --- /dev/null +++ b/SRC/cheevx_2stage.f @@ -0,0 +1,618 @@ +*> \brief <b> CHEEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices</b> +* +* @generated from zheevx_2stage.f, fortran z -> c, Sat Nov 5 23:18:09 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEEVX_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cheevx_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cheevx_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cheevx_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEEVX_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, WORK, +* LWORK, RWORK, IWORK, IFAIL, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can +*> be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> On normal exit, the first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 8*N, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexHEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHEEVX_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, WORK, + $ LWORK, RWORK, IWORK, IFAIL, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDTAU, INDWRK, ISCALE, + $ ITMP1, J, JJ, LLWORK, + $ NSPLIT, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, CLANHE + EXTERNAL LSAME, ILAENV, SLAMCH, CLANHE +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA, CSSCAL, + $ CLACPY, CSTEIN, CSTEQR, CSWAP, CUNGTR, CUNMTR, + $ CHETRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + KD = ILAENV( 17, 'CHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'CHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + WORK( 1 ) = LWMIN + END IF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -17 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEEVX_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = REAL( A( 1, 1 ) ) + ELSE IF( VALEIG ) THEN + IF( VL.LT.REAL( A( 1, 1 ) ) .AND. VU.GE.REAL( A( 1, 1 ) ) ) + $ THEN + M = 1 + W( 1 ) = REAL( A( 1, 1 ) ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + END IF + ANRM = CLANHE( 'M', UPLO, N, A, LDA, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL CSSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL CSSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call CHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDTAU = 1 + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL CHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, RWORK( INDD ), + $ RWORK( INDE ), WORK( INDTAU ), + $ WORK( INDHOUS ), LHTRD, WORK( INDWRK ), + $ LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal to +* zero, then call SSTERF or CUNGTR and CSTEQR. If this fails for +* some eigenvalue, then try SSTEBZ. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. ( ABSTOL.LE.ZERO ) ) THEN + CALL SCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL SSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL CLACPY( 'A', N, N, A, LDA, Z, LDZ ) + CALL CUNGTR( UPLO, N, Z, LDZ, WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL CSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + IFAIL( I ) = 0 + 30 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 40 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL CSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEIN. +* + CALL CUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWRK ), LLWORK, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 40 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 50 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 60 CONTINUE + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of CHEEVX_2STAGE +* + END diff --git a/SRC/chegv_2stage.f b/SRC/chegv_2stage.f new file mode 100644 index 00000000..71d58d74 --- /dev/null +++ b/SRC/chegv_2stage.f @@ -0,0 +1,379 @@ +*> \brief \b CHEGV_2STAGE +* +* @generated from zhegv_2stage.f, fortran z -> c, Sun Nov 6 13:09:52 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEGV_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chegv_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chegv_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chegv_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, +* WORK, LWORK, RWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. +* REAL RWORK( * ), W( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEGV_2STAGE computes all the eigenvalues, and optionally, the eigenvectors +*> of a complex generalized Hermitian-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +*> Here A and B are assumed to be Hermitian and B is also +*> positive definite. +*> This routine use the 2stage technique for the reduction to tridiagonal +*> which showed higher performance on recent architecture and for large +* sizes N>2000. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> matrix Z of eigenvectors. The eigenvectors are normalized +*> as follows: +*> if ITYPE = 1 or 2, Z**H*B*Z = I; +*> if ITYPE = 3, Z**H*inv(B)*Z = I. +*> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +*> or the lower triangle (if UPLO='L') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB, N) +*> On entry, the Hermitian positive definite matrix B. +*> If UPLO = 'U', the leading N-by-N upper triangular part of B +*> contains the upper triangular part of the matrix B. +*> If UPLO = 'L', the leading N-by-N lower triangular part of B +*> contains the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**H*U or B = L*L**H. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (max(1, 3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: CPOTRF or CHEEV returned an error code: +*> <= N: if INFO = i, CHEEV failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complexHEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, + $ WORK, LWORK, RWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER NEIG, LWMIN, LHTRD, LWTRD, KD, IB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CHEGST, CPOTRF, CTRMM, CTRSM, + $ CHEEV_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + KD = ILAENV( 17, 'CHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'CHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEGV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL CPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL CHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, + $ WORK, LWORK, RWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + CALL CTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**H *y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + CALL CTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of CHEGV_2STAGE +* + END diff --git a/SRC/chetrd_2stage.f b/SRC/chetrd_2stage.f new file mode 100644 index 00000000..795462c6 --- /dev/null +++ b/SRC/chetrd_2stage.f @@ -0,0 +1,337 @@ +*> \brief \b CHETRD_2STAGE +* +* @generated from zhetrd_2stage.f, fortran z -> c, Sun Nov 6 19:34:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRD_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetrd_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetrd_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetrd_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, +* HOUS2, LHOUS2, WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER VECT, UPLO +* INTEGER N, LDA, LWORK, LHOUS2, INFO +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* COMPLEX A( LDA, * ), TAU( * ), +* HOUS2( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRD_2STAGE reduces a complex Hermitian matrix A to real symmetric +*> tridiagonal form T by a unitary similarity transformation: +*> Q1**H Q2**H* A * Q2 * Q1 = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': No need for the Housholder representation, +*> in particular for the second stage (Band to +*> tridiagonal) and thus LHOUS2 is of size max(1, 4*N); +*> = 'V': the Householder representation is needed to +*> either generate Q1 Q2 or to apply Q1 Q2, +*> then LHOUS2 is to be queried and computed. +*> (NOT AVAILABLE IN THIS RELEASE). +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the band superdiagonal +*> of A are overwritten by the corresponding elements of the +*> internal band-diagonal matrix AB, and the elements above +*> the KD superdiagonal, with the array TAU, represent the unitary +*> matrix Q1 as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and band subdiagonal of A are over- +*> written by the corresponding elements of the internal band-diagonal +*> matrix AB, and the elements below the KD subdiagonal, with +*> the array TAU, represent the unitary matrix Q1 as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (N-KD) +*> The scalar factors of the elementary reflectors of +*> the first stage (see Further Details). +*> \endverbatim +*> +*> \param[out] HOUS2 +*> \verbatim +*> HOUS2 is COMPLEX array, dimension LHOUS2, that +*> store the Householder representation of the stage2 +*> band to tridiagonal. +*> \endverbatim +*> +*> \param[in] LHOUS2 +*> \verbatim +*> LHOUS2 is INTEGER +*> The dimension of the array HOUS2. LHOUS2 = MAX(1, dimension) +*> If LWORK = -1, or LHOUS2=-1, +*> then a query is assumed; the routine +*> only calculates the optimal size of the HOUS2 array, returns +*> this value as the first entry of the HOUS2 array, and no error +*> message related to LHOUS2 is issued by XERBLA. +*> LHOUS2 = MAX(1, dimension) where +*> dimension = 4*N if VECT='N' +*> not available now if VECT='H' +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK = MAX(1, dimension) +*> If LWORK = -1, or LHOUS2=-1, +*> then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complexHEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CHETRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, + $ HOUS2, LHOUS2, WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER VECT, UPLO + INTEGER N, LDA, LWORK, LHOUS2, INFO +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX A( LDA, * ), TAU( * ), + $ HOUS2( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTQ + INTEGER KD, IB, LWMIN, LHMIN, LWRK, LDAB, WPOS, ABPOS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CHETRD_HE2HB, CHETRD_HB2ST +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + WANTQ = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS2.EQ.-1 ) +* +* Determine the block size, the workspace size and the hous size. +* + KD = ILAENV( 17, 'CHETRD_2STAGE', VECT, N, -1, -1, -1 ) + IB = ILAENV( 18, 'CHETRD_2STAGE', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV( 19, 'CHETRD_2STAGE', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV( 20, 'CHETRD_2STAGE', VECT, N, KD, IB, -1 ) +* WRITE(*,*),'CHETRD_2STAGE N KD UPLO LHMIN LWMIN ',N, KD, UPLO, +* $ LHMIN, LWMIN +* + IF( .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LHOUS2.LT.LHMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + HOUS2( 1 ) = LHMIN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine pointer position +* + LDAB = KD+1 + LWRK = LWORK-LDAB*N + ABPOS = 1 + WPOS = ABPOS + LDAB*N + CALL CHETRD_HE2HB( UPLO, N, KD, A, LDA, WORK( ABPOS ), LDAB, + $ TAU, WORK( WPOS ), LWRK, INFO ) + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRD_HE2HB', -INFO ) + RETURN + END IF + CALL CHETRD_HB2ST( 'Y', VECT, UPLO, N, KD, + $ WORK( ABPOS ), LDAB, D, E, + $ HOUS2, LHOUS2, WORK( WPOS ), LWRK, INFO ) + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRD_HB2ST', -INFO ) + RETURN + END IF +* +* + HOUS2( 1 ) = LHMIN + WORK( 1 ) = LWMIN + RETURN +* +* End of CHETRD_2STAGE +* + END diff --git a/SRC/chetrd_hb2st.F b/SRC/chetrd_hb2st.F new file mode 100644 index 00000000..6f253278 --- /dev/null +++ b/SRC/chetrd_hb2st.F @@ -0,0 +1,603 @@ +*> \brief \b CHBTRD +* +* @generated from zhetrd_hb2st.F, fortran z -> c, Sun Nov 6 19:34:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHBTRD + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chbtrd.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chbtrd.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chbtrd.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, +* D, E, HOUS, LHOUS, WORK, LWORK, INFO ) +* +* #define PRECISION_COMPLEX +* +* #if defined(_OPENMP) +* use omp_lib +* #endif +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER STAGE1, UPLO, VECT +* INTEGER N, KD, IB, LDAB, LHOUS, LWORK, INFO +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* COMPLEX AB( LDAB, * ), HOUS( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHBTRD reduces a complex Hermitian band matrix A to real symmetric +*> tridiagonal form T by a unitary similarity transformation: +*> Q**H * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] STAGE +*> \verbatim +*> STAGE is CHARACTER*1 +*> = 'N': "No": to mention that the stage 1 of the reduction +*> from dense to band using the chetrd_he2hb routine +*> was not called before this routine to reproduce AB. +*> In other term this routine is called as standalone. +*> = 'Y': "Yes": to mention that the stage 1 of the +*> reduction from dense to band using the chetrd_he2hb +*> routine has been called to produce AB (e.g., AB is +*> the output of chetrd_he2hb. +*> \endverbatim +*> +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': No need for the Housholder representation, +*> and thus LHOUS is of size max(1, 4*N); +*> = 'V': the Householder representation is needed to +*> either generate or to apply Q later on, +*> then LHOUS is to be queried and computed. +*> (NOT AVAILABLE IN THIS RELEASE). +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> On exit, the diagonal elements of AB are overwritten by the +*> diagonal elements of the tridiagonal matrix T; if KD > 0, the +*> elements on the first superdiagonal (if UPLO = 'U') or the +*> first subdiagonal (if UPLO = 'L') are overwritten by the +*> off-diagonal elements of T; the rest of AB is overwritten by +*> values generated during the reduction. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] HOUS +*> \verbatim +*> HOUS is COMPLEX array, dimension LHOUS, that +*> store the Householder representation. +*> \endverbatim +*> +*> \param[in] LHOUS +*> \verbatim +*> LHOUS is INTEGER +*> The dimension of the array HOUS. LHOUS = MAX(1, dimension) +*> If LWORK = -1, or LHOUS=-1, +*> then a query is assumed; the routine +*> only calculates the optimal size of the HOUS array, returns +*> this value as the first entry of the HOUS array, and no error +*> message related to LHOUS is issued by XERBLA. +*> LHOUS = MAX(1, dimension) where +*> dimension = 4*N if VECT='N' +*> not available now if VECT='H' +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK = MAX(1, dimension) +*> If LWORK = -1, or LHOUS=-1, +*> then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, + $ D, E, HOUS, LHOUS, WORK, LWORK, INFO ) +* +#define PRECISION_COMPLEX +* +#if defined(_OPENMP) + use omp_lib +#endif +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER STAGE1, UPLO, VECT + INTEGER N, KD, LDAB, LHOUS, LWORK, INFO +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX AB( LDAB, * ), HOUS( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL RZERO + COMPLEX ZERO, ONE + PARAMETER ( RZERO = 0.0E+0, + $ ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ, UPPER, AFTERS1 + INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST, + $ ED, STIND, EDIND, BLKLASTIND, COLPT, THED, + $ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID, + $ NBTILES, TTYPE, TID, NTHREADS, DEBUG, + $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS, + $ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU, + $ SICEV, SIZETAU, LDV, LHMIN, LWMIN +#if defined (PRECISION_COMPLEX) + REAL ABSTMP + COMPLEX TMP +#endif +* .. +* .. External Subroutines .. + EXTERNAL CHB2ST_KERNELS, CLACPY, CLASET +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX, CEILING, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Determine the minimal workspace size required. +* Test the input parameters +* + DEBUG = 0 + INFO = 0 + AFTERS1 = LSAME( STAGE1, 'Y' ) + WANTQ = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS.EQ.-1 ) +* +* Determine the block size, the workspace size and the hous size. +* + IB = ILAENV( 18, 'CHETRD_HB2ST', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV( 19, 'CHETRD_HB2ST', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV( 20, 'CHETRD_HB2ST', VECT, N, KD, IB, -1 ) +* + IF( .NOT.AFTERS1 .AND. .NOT.LSAME( STAGE1, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.(KD+1) ) THEN + INFO = -7 + ELSE IF( LHOUS.LT.LHMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + HOUS( 1 ) = LHMIN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRD_HB2ST', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine pointer position +* + LDV = KD + IB + SIZETAU = 2 * N + SICEV = 2 * N + INDTAU = 1 + INDV = INDTAU + SIZETAU + LDA = 2 * KD + 1 + SIZEA = LDA * N + INDA = 1 + INDW = INDA + SIZEA + NTHREADS = 1 + TID = 0 +* + IF( UPPER ) THEN + APOS = INDA + KD + AWPOS = INDA + DPOS = APOS + KD + OFDPOS = DPOS - 1 + ABDPOS = KD + 1 + ABOFDPOS = KD + ELSE + APOS = INDA + AWPOS = INDA + KD + 1 + DPOS = APOS + OFDPOS = DPOS + 1 + ABDPOS = 1 + ABOFDPOS = 2 + + ENDIF +* +* Case KD=0: +* The matrix is diagonal. We just copy it (convert to "real" for +* complex because D is double and the imaginary part should be 0) +* and store it in D. A sequential code here is better or +* in a parallel environment it might need two cores for D and E +* + IF( KD.EQ.0 ) THEN + DO 30 I = 1, N + D( I ) = REAL( AB( ABDPOS, I ) ) + 30 CONTINUE + DO 40 I = 1, N-1 + E( I ) = RZERO + 40 CONTINUE + GOTO 200 + END IF +* +* Case KD=1: +* The matrix is already Tridiagonal. We have to make diagonal +* and offdiagonal elements real, and store them in D and E. +* For that, for real precision just copy the diag and offdiag +* to D and E while for the COMPLEX case the bulge chasing is +* performed to convert the hermetian tridiagonal to symmetric +* tridiagonal. A simpler coversion formula might be used, but then +* updating the Q matrix will be required and based if Q is generated +* or not this might complicate the story. +* +C IF( KD.EQ.1 .AND. N.GT.(KD+1) .AND. AFTERS1 ) THEN + IF( KD.EQ.1 ) THEN + DO 50 I = 1, N + D( I ) = REAL( AB( ABDPOS, I ) ) + 50 CONTINUE +#if defined (PRECISION_COMPLEX) +* +* make off-diagonal elements real and copy them to E +* + IF( UPPER ) THEN + DO 60 I = 1, N - 1 + TMP = AB( ABOFDPOS, I+1 ) + ABSTMP = ABS( TMP ) + AB( ABOFDPOS, I+1 ) = ABSTMP + E( I ) = ABSTMP + IF( ABSTMP.NE.RZERO ) THEN + TMP = TMP / ABSTMP + ELSE + TMP = ONE + END IF + IF( I.LT.N-1 ) + $ AB( ABOFDPOS, I+2 ) = AB( ABOFDPOS, I+2 )*TMP +C IF( WANTZ ) THEN +C CALL CSCAL( N, CONJG( TMP ), Q( 1, I+1 ), 1 ) +C END IF + 60 CONTINUE + ELSE + DO 70 I = 1, N - 1 + TMP = AB( ABOFDPOS, I ) + ABSTMP = ABS( TMP ) + AB( ABOFDPOS, I ) = ABSTMP + E( I ) = ABSTMP + IF( ABSTMP.NE.RZERO ) THEN + TMP = TMP / ABSTMP + ELSE + TMP = ONE + END IF + IF( I.LT.N-1 ) + $ AB( ABOFDPOS, I+1 ) = AB( ABOFDPOS, I+1 )*TMP +C IF( WANTQ ) THEN +C CALL CSCAL( N, TMP, Q( 1, I+1 ), 1 ) +C END IF + 70 CONTINUE + ENDIF +#else + IF( UPPER ) THEN + DO 60 I = 1, N-1 + E( I ) = REAL( AB( ABOFDPOS, I+1 ) ) + 60 CONTINUE + ELSE + DO 70 I = 1, N-1 + E( I ) = REAL( AB( ABOFDPOS, I ) ) + 70 CONTINUE + ENDIF +#endif + GOTO 200 + END IF +* +* Main code start here. +* Reduce the hermitian band of A to a tridiagonal matrix. +* + THGRSIZ = N + GRSIZ = 1 + SHIFT = 3 + NBTILES = CEILING( REAL(N)/REAL(KD) ) + STEPERCOL = CEILING( REAL(SHIFT)/REAL(GRSIZ) ) + THGRNB = CEILING( REAL(N-1)/REAL(THGRSIZ) ) +* + CALL CLACPY( "A", KD+1, N, AB, LDAB, WORK( APOS ), LDA ) + CALL CLASET( "A", KD, N, ZERO, ZERO, WORK( AWPOS ), LDA ) +* +* +* openMP parallelisation start here +* +#if defined(_OPENMP) +!$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND ) +!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID ) +!$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND ) +!$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK) +!$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA ) +!$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT ) +!$OMP MASTER +#endif +* +* main bulge chasing loop +* + DO 100 THGRID = 1, THGRNB + STT = (THGRID-1)*THGRSIZ+1 + THED = MIN( (STT + THGRSIZ -1), (N-1)) + DO 110 I = STT, N-1 + ED = MIN( I, THED ) + IF( STT.GT.ED ) GOTO 100 + DO 120 M = 1, STEPERCOL + ST = STT + DO 130 SWEEPID = ST, ED + DO 140 K = 1, GRSIZ + MYID = (I-SWEEPID)*(STEPERCOL*GRSIZ) + $ + (M-1)*GRSIZ + K + IF ( MYID.EQ.1 ) THEN + TTYPE = 1 + ELSE + TTYPE = MOD( MYID, 2 ) + 2 + ENDIF + + IF( TTYPE.EQ.2 ) THEN + COLPT = (MYID/2)*KD + SWEEPID + STIND = COLPT-KD+1 + EDIND = MIN(COLPT,N) + BLKLASTIND = COLPT + ELSE + COLPT = ((MYID+1)/2)*KD + SWEEPID + STIND = COLPT-KD+1 + EDIND = MIN(COLPT,N) + IF( ( STIND.GE.EDIND-1 ).AND. + $ ( EDIND.EQ.N ) ) THEN + BLKLASTIND = N + ELSE + BLKLASTIND = 0 + ENDIF + ENDIF +* +* Call the kernel +* +#if defined(_OPENMP) + IF( TTYPE.NE.1 ) THEN +!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) +!$OMP$ DEPEND(in:WORK(MYID-1)) +!$OMP$ DEPEND(out:WORK(MYID)) + TID = OMP_GET_THREAD_NUM() + CALL CHB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +!$OMP END TASK + ELSE +!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) +!$OMP$ DEPEND(out:WORK(MYID)) + TID = OMP_GET_THREAD_NUM() + CALL CHB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +!$OMP END TASK + ENDIF +#else + CALL CHB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +#endif + IF ( BLKLASTIND.GE.(N-1) ) THEN + STT = STT + 1 + GOTO 130 + ENDIF + 140 CONTINUE + 130 CONTINUE + 120 CONTINUE + 110 CONTINUE + 100 CONTINUE +* +#if defined(_OPENMP) +!$OMP END MASTER +!$OMP END PARALLEL +#endif +* +* Copy the diagonal from A to D. Note that D is REAL thus only +* the Real part is needed, the imaginary part should be zero. +* + DO 150 I = 1, N + D( I ) = REAL( WORK( DPOS+(I-1)*LDA ) ) + 150 CONTINUE +* +* Copy the off diagonal from A to E. Note that E is REAL thus only +* the Real part is needed, the imaginary part should be zero. +* + IF( UPPER ) THEN + DO 160 I = 1, N-1 + E( I ) = REAL( WORK( OFDPOS+I*LDA ) ) + 160 CONTINUE + ELSE + DO 170 I = 1, N-1 + E( I ) = REAL( WORK( OFDPOS+(I-1)*LDA ) ) + 170 CONTINUE + ENDIF +* + 200 CONTINUE +* + HOUS( 1 ) = LHMIN + WORK( 1 ) = LWMIN + RETURN +* +* End of CHETRD_HB2ST +* + END +#undef PRECISION_COMPLEX + diff --git a/SRC/chetrd_he2hb.f b/SRC/chetrd_he2hb.f new file mode 100644 index 00000000..28f5dc60 --- /dev/null +++ b/SRC/chetrd_he2hb.f @@ -0,0 +1,517 @@ +*> \brief \b CHETRD_HE2HB +* +* @generated from zhetrd_he2hb.f, fortran z -> c, Sun Nov 6 19:34:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRD_HE2HB + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetrd.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetrd.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetrd.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, +* WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAB, LWORK, N, KD +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), AB( LDAB, * ), +* TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian +*> band-diagonal form AB by a unitary similarity transformation: +*> Q**H * A * Q = AB. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the reduced matrix if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> The reduced matrix is stored in the array AB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the unitary +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the unitary matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On exit, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (N-KD) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension LWORK. +*> On exit, if INFO = 0, or if LWORK=-1, +*> WORK(1) returns the size of LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK which should be calculated +* by a workspace query. LWORK = MAX(1, LWORK_QUERY) +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD +*> where FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice otherwise +*> putting LWORK=-1 will provide the size of WORK. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complexHEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd. +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in +*> A(i,i+kd+1:n), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = n-kd. +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in +* A(i+kd+2:n,i), and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( ab ab/v1 v1 v1 v1 ) ( ab ) +*> ( ab ab/v2 v2 v2 ) ( ab/v1 ab ) +*> ( ab ab/v3 v3 ) ( v1 ab/v2 ab ) +*> ( ab ab/v4 ) ( v1 v2 ab/v3 ab ) +*> ( ab ) ( v1 v2 v3 ab/v4 ab ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, + $ WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAB, LWORK, N, KD +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), AB( LDAB, * ), + $ TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL RONE + COMPLEX ZERO, ONE, HALF + PARAMETER ( RONE = 1.0E+0, + $ ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ), + $ HALF = ( 0.5E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, J, IINFO, LWMIN, PN, PK, LK, + $ LDT, LDW, LDS2, LDS1, + $ LS2, LS1, LW, LT, + $ TPOS, WPOS, S2POS, S1POS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CHER2K, CHEMM, CGEMM, + $ CLARFT, CGELQF, CGEQRF, CLASET +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Determine the minimal workspace size required +* and test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + LWMIN = ILAENV( 20, 'CHETRD_HE2HB', '', N, KD, -1, -1 ) + + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRD_HE2HB', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWMIN + RETURN + END IF +* +* Quick return if possible +* Copy the upper/lower portion of A into AB +* + IF( N.LE.KD+1 ) THEN + IF( UPPER ) THEN + DO 100 I = 1, N + LK = MIN( KD+1, I ) + CALL CCOPY( LK, A( I-LK+1, I ), 1, + $ AB( KD+1-LK+1, I ), 1 ) + 100 CONTINUE + ELSE + DO 110 I = 1, N + LK = MIN( KD+1, N-I+1 ) + CALL CCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 ) + 110 CONTINUE + ENDIF + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the pointer position for the workspace +* + LDT = KD + LDS1 = KD + LT = LDT*KD + LW = N*KD + LS1 = LDS1*KD + LS2 = LWMIN - LT - LW - LS1 +* LS2 = N*MAX(KD,FACTOPTNB) + TPOS = 1 + WPOS = TPOS + LT + S1POS = WPOS + LW + S2POS = S1POS + LS1 + IF( UPPER ) THEN + LDW = KD + LDS2 = KD + ELSE + LDW = N + LDS2 = N + ENDIF +* +* +* Set the workspace of the triangular matrix T to zero once such a +* way everytime T is generated the upper/lower portion will be always zero +* + CALL CLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT ) +* + IF( UPPER ) THEN + DO 10 I = 1, N - KD, KD + PN = N-I-KD+1 + PK = MIN( N-I-KD+1, KD ) +* +* Compute the LQ factorization of the current block +* + CALL CGELQF( KD, PN, A( I, I+KD ), LDA, + $ TAU( I ), WORK( S2POS ), LS2, IINFO ) +* +* Copy the upper portion of A into AB +* + DO 20 J = I, I+PK-1 + LK = MIN( KD, N-J ) + 1 + CALL CCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) + 20 CONTINUE +* + CALL CLASET( 'Lower', PK, PK, ZERO, ONE, + $ A( I, I+KD ), LDA ) +* +* Form the matrix T +* + CALL CLARFT( 'Forward', 'Rowwise', PN, PK, + $ A( I, I+KD ), LDA, TAU( I ), + $ WORK( TPOS ), LDT ) +* +* Compute W: +* + CALL CGEMM( 'Conjugate', 'No transpose', PK, PN, PK, + $ ONE, WORK( TPOS ), LDT, + $ A( I, I+KD ), LDA, + $ ZERO, WORK( S2POS ), LDS2 ) +* + CALL CHEMM( 'Right', UPLO, PK, PN, + $ ONE, A( I+KD, I+KD ), LDA, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( WPOS ), LDW ) +* + CALL CGEMM( 'No transpose', 'Conjugate', PK, PK, PN, + $ ONE, WORK( WPOS ), LDW, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( S1POS ), LDS1 ) +* + CALL CGEMM( 'No transpose', 'No transpose', PK, PN, PK, + $ -HALF, WORK( S1POS ), LDS1, + $ A( I, I+KD ), LDA, + $ ONE, WORK( WPOS ), LDW ) +* +* +* Update the unreduced submatrix A(i+kd:n,i+kd:n), using +* an update of the form: A := A - V'*W - W'*V +* + CALL CHER2K( UPLO, 'Conjugate', PN, PK, + $ -ONE, A( I, I+KD ), LDA, + $ WORK( WPOS ), LDW, + $ RONE, A( I+KD, I+KD ), LDA ) + 10 CONTINUE +* +* Copy the upper band to AB which is the band storage matrix +* + DO 30 J = N-KD+1, N + LK = MIN(KD, N-J) + 1 + CALL CCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) + 30 CONTINUE +* + ELSE +* +* Reduce the lower triangle of A to lower band matrix +* + DO 40 I = 1, N - KD, KD + PN = N-I-KD+1 + PK = MIN( N-I-KD+1, KD ) +* +* Compute the QR factorization of the current block +* + CALL CGEQRF( PN, KD, A( I+KD, I ), LDA, + $ TAU( I ), WORK( S2POS ), LS2, IINFO ) +* +* Copy the upper portion of A into AB +* + DO 50 J = I, I+PK-1 + LK = MIN( KD, N-J ) + 1 + CALL CCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) + 50 CONTINUE +* + CALL CLASET( 'Upper', PK, PK, ZERO, ONE, + $ A( I+KD, I ), LDA ) +* +* Form the matrix T +* + CALL CLARFT( 'Forward', 'Columnwise', PN, PK, + $ A( I+KD, I ), LDA, TAU( I ), + $ WORK( TPOS ), LDT ) +* +* Compute W: +* + CALL CGEMM( 'No transpose', 'No transpose', PN, PK, PK, + $ ONE, A( I+KD, I ), LDA, + $ WORK( TPOS ), LDT, + $ ZERO, WORK( S2POS ), LDS2 ) +* + CALL CHEMM( 'Left', UPLO, PN, PK, + $ ONE, A( I+KD, I+KD ), LDA, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( WPOS ), LDW ) +* + CALL CGEMM( 'Conjugate', 'No transpose', PK, PK, PN, + $ ONE, WORK( S2POS ), LDS2, + $ WORK( WPOS ), LDW, + $ ZERO, WORK( S1POS ), LDS1 ) +* + CALL CGEMM( 'No transpose', 'No transpose', PN, PK, PK, + $ -HALF, A( I+KD, I ), LDA, + $ WORK( S1POS ), LDS1, + $ ONE, WORK( WPOS ), LDW ) +* +* +* Update the unreduced submatrix A(i+kd:n,i+kd:n), using +* an update of the form: A := A - V*W' - W*V' +* + CALL CHER2K( UPLO, 'No transpose', PN, PK, + $ -ONE, A( I+KD, I ), LDA, + $ WORK( WPOS ), LDW, + $ RONE, A( I+KD, I+KD ), LDA ) +* ================================================================== +* RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED +* DO 45 J = I, I+PK-1 +* LK = MIN( KD, N-J ) + 1 +* CALL CCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 ) +* 45 CONTINUE +* ================================================================== + 40 CONTINUE +* +* Copy the lower band to AB which is the band storage matrix +* + DO 60 J = N-KD+1, N + LK = MIN(KD, N-J) + 1 + CALL CCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) + 60 CONTINUE + + END IF +* + WORK( 1 ) = LWMIN + RETURN +* +* End of CHETRD_HE2HB +* + END diff --git a/SRC/clarfy.f b/SRC/clarfy.f new file mode 100644 index 00000000..572a4723 --- /dev/null +++ b/SRC/clarfy.f @@ -0,0 +1,163 @@ +*> \brief \b CLARFY +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE CLARFY( UPLO, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INCV, LDC, N +* COMPLEX TAU +* .. +* .. Array Arguments .. +* COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARFY applies an elementary reflector, or Householder matrix, H, +*> to an n x n Hermitian matrix C, from both the left and the right. +*> +*> H is represented in the form +*> +*> H = I - tau * v * v' +*> +*> where tau is a scalar and v is a vector. +*> +*> If tau is zero, then H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix C is stored. +*> = 'U': Upper triangle +*> = 'L': Lower triangle +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension +*> (1 + (N-1)*abs(INCV)) +*> The vector v as described above. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between successive elements of v. INCV must +*> not be zero. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX +*> The value tau as described above. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC, N) +*> On entry, the matrix C. +*> On exit, C is overwritten by H * C * H'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max( 1, N ). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex_eig +* +* ===================================================================== + SUBROUTINE CLARFY( UPLO, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCV, LDC, N + COMPLEX TAU +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO, HALF + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ), + $ HALF = ( 0.5E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CHEMV, CHER2 +* .. +* .. External Functions .. + COMPLEX CDOTC + EXTERNAL CDOTC +* .. +* .. Executable Statements .. +* + IF( TAU.EQ.ZERO ) + $ RETURN +* +* Form w:= C * v +* + CALL CHEMV( UPLO, N, ONE, C, LDC, V, INCV, ZERO, WORK, 1 ) +* + ALPHA = -HALF*TAU*CDOTC( N, WORK, 1, V, INCV ) + CALL CAXPY( N, ALPHA, V, INCV, WORK, 1 ) +* +* C := C - v * w' - w * v' +* + CALL CHER2( UPLO, N, -TAU, V, INCV, WORK, 1, C, LDC ) +* + RETURN +* +* End of CLARFY +* + END diff --git a/SRC/dlarfy.f b/SRC/dlarfy.f new file mode 100644 index 00000000..089aa94e --- /dev/null +++ b/SRC/dlarfy.f @@ -0,0 +1,161 @@ +*> \brief \b DLARFY +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DLARFY( UPLO, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INCV, LDC, N +* DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARFY applies an elementary reflector, or Householder matrix, H, +*> to an n x n symmetric matrix C, from both the left and the right. +*> +*> H is represented in the form +*> +*> H = I - tau * v * v' +*> +*> where tau is a scalar and v is a vector. +*> +*> If tau is zero, then H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix C is stored. +*> = 'U': Upper triangle +*> = 'L': Lower triangle +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (1 + (N-1)*abs(INCV)) +*> The vector v as described above. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between successive elements of v. INCV must +*> not be zero. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> The value tau as described above. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC, N) +*> On entry, the matrix C. +*> On exit, C is overwritten by H * C * H'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max( 1, N ). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup double_eig +* +* ===================================================================== + SUBROUTINE DLARFY( UPLO, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCV, LDC, N + DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO, HALF + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0, HALF = 0.5D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION ALPHA +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DSYMV, DSYR2 +* .. +* .. External Functions .. + DOUBLE PRECISION DDOT + EXTERNAL DDOT +* .. +* .. Executable Statements .. +* + IF( TAU.EQ.ZERO ) + $ RETURN +* +* Form w:= C * v +* + CALL DSYMV( UPLO, N, ONE, C, LDC, V, INCV, ZERO, WORK, 1 ) +* + ALPHA = -HALF*TAU*DDOT( N, WORK, 1, V, INCV ) + CALL DAXPY( N, ALPHA, V, INCV, WORK, 1 ) +* +* C := C - v * w' - w * v' +* + CALL DSYR2( UPLO, N, -TAU, V, INCV, WORK, 1, C, LDC ) +* + RETURN +* +* End of DLARFY +* + END diff --git a/SRC/dsb2st_kernels.f b/SRC/dsb2st_kernels.f new file mode 100644 index 00000000..15d1186e --- /dev/null +++ b/SRC/dsb2st_kernels.f @@ -0,0 +1,320 @@ +*> \brief \b DSB2ST_KERNELS +* +* @generated from zhb2st_kernels.f, fortran z -> d, Sun Nov 6 19:34:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSB2ST_KERNELS + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsb2st_kernels.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsb2st_kernels.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsb2st_kernels.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSB2ST_KERNELS( UPLO, WANTZ, TTYPE, +* ST, ED, SWEEP, N, NB, IB, +* A, LDA, V, TAU, LDVT, WORK) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* LOGICAL WANTZ +* INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), V( * ), +* TAU( * ), WORK( * ) +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSB2ST_KERNELS is an internal routine used by the DSYTRD_SB2ST +*> subroutine. +*> \endverbatim +* +* Arguments: +* ========== +* +*> @param[in] n +*> The order of the matrix A. +*> +*> @param[in] nb +*> The size of the band. +*> +*> @param[in, out] A +*> A pointer to the matrix A. +*> +*> @param[in] lda +*> The leading dimension of the matrix A. +*> +*> @param[out] V +*> DOUBLE PRECISION array, dimension 2*n if eigenvalues only are +*> requested or to be queried for vectors. +*> +*> @param[out] TAU +*> DOUBLE PRECISION array, dimension (2*n). +*> The scalar factors of the Householder reflectors are stored +*> in this array. +*> +*> @param[in] st +*> internal parameter for indices. +*> +*> @param[in] ed +*> internal parameter for indices. +*> +*> @param[in] sweep +*> internal parameter for indices. +*> +*> @param[in] Vblksiz +*> internal parameter for indices. +*> +*> @param[in] wantz +*> logical which indicate if Eigenvalue are requested or both +*> Eigenvalue/Eigenvectors. +*> +*> @param[in] work +*> Workspace of size nb. +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSB2ST_KERNELS( UPLO, WANTZ, TTYPE, + $ ST, ED, SWEEP, N, NB, IB, + $ A, LDA, V, TAU, LDVT, WORK) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + LOGICAL WANTZ + INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), V( * ), + $ TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, + $ ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS, + $ DPOS, OFDPOS, AJETER + DOUBLE PRECISION CTMP +* .. +* .. External Subroutines .. + EXTERNAL DLARFG, DLARFX, DLARFY +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. +* .. Executable Statements .. +* + AJETER = IB + LDVT + UPPER = LSAME( UPLO, 'U' ) + + IF( UPPER ) THEN + DPOS = 2 * NB + 1 + OFDPOS = 2 * NB + ELSE + DPOS = 1 + OFDPOS = 2 + ENDIF + +* +* Upper case +* + IF( UPPER ) THEN +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ENDIF + GO TO ( 101, 102, 103 ) TTYPE +* + 101 CONTINUE + LM = ED - ST + 1 +* + V( VPOS ) = ONE + DO 10 I = 1, LM-1 + V( VPOS+I ) = ( A( OFDPOS-I, ST+I ) ) + A( OFDPOS-I, ST+I ) = ZERO + 10 CONTINUE + CTMP = ( A( OFDPOS, ST ) ) + CALL DLARFG( LM, CTMP, V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) + A( OFDPOS, ST ) = CTMP +* + 103 CONTINUE + LM = ED - ST + 1 + CALL DLARFY( UPLO, LM, V( VPOS ), 1, ( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + GOTO 300 +* + 102 CONTINUE + J1 = ED+1 + J2 = MIN( ED+NB, N ) + LN = ED-ST+1 + LM = J2-J1+1 + IF( LM.GT.0) THEN + CALL DLARFX( 'Left', LN, LM, V( VPOS ), + $ ( TAU( TAUPOS ) ), A( DPOS-NB, J1 ), + $ LDA-1, WORK) +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ENDIF +* + V( VPOS ) = ONE + DO 30 I = 1, LM-1 + V( VPOS+I ) = ( A( DPOS-NB-I, J1+I ) ) + A( DPOS-NB-I, J1+I ) = ZERO + 30 CONTINUE + CTMP = ( A( DPOS-NB, J1 ) ) + CALL DLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) ) + A( DPOS-NB, J1 ) = CTMP +* + CALL DLARFX( 'Right', LN-1, LM, V( VPOS ), + $ TAU( TAUPOS ), + $ A( DPOS-NB+1, J1 ), LDA-1, WORK) + ENDIF + GOTO 300 +* +* Lower case +* + ELSE +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ENDIF + GO TO ( 201, 202, 203 ) TTYPE +* + 201 CONTINUE + LM = ED - ST + 1 +* + V( VPOS ) = ONE + DO 20 I = 1, LM-1 + V( VPOS+I ) = A( OFDPOS+I, ST-1 ) + A( OFDPOS+I, ST-1 ) = ZERO + 20 CONTINUE + CALL DLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) +* + 203 CONTINUE + LM = ED - ST + 1 +* + CALL DLARFY( UPLO, LM, V( VPOS ), 1, ( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + + GOTO 300 +* + 202 CONTINUE + J1 = ED+1 + J2 = MIN( ED+NB, N ) + LN = ED-ST+1 + LM = J2-J1+1 +* + IF( LM.GT.0) THEN + CALL DLARFX( 'Right', LM, LN, V( VPOS ), + $ TAU( TAUPOS ), A( DPOS+NB, ST ), + $ LDA-1, WORK) +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ENDIF +* + V( VPOS ) = ONE + DO 40 I = 1, LM-1 + V( VPOS+I ) = A( DPOS+NB+I, ST ) + A( DPOS+NB+I, ST ) = ZERO + 40 CONTINUE + CALL DLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) +* + CALL DLARFX( 'Left', LM, LN-1, V( VPOS ), + $ ( TAU( TAUPOS ) ), + $ A( DPOS+NB-1, ST+1 ), LDA-1, WORK) + + ENDIF + GOTO 300 + ENDIF + + 300 CONTINUE + RETURN +* +* END OF DSB2ST_KERNELS +* + END diff --git a/SRC/dsbev_2stage.f b/SRC/dsbev_2stage.f new file mode 100644 index 00000000..771d29e0 --- /dev/null +++ b/SRC/dsbev_2stage.f @@ -0,0 +1,377 @@ +*> \brief <b> DSBEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices</b> +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSBEV_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsbev_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsbev_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsbev_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, +* WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, N, LWORK +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSBEV_2STAGE computes all the eigenvalues and, optionally, eigenvectors of +*> a real symmetric band matrix A using the 2stage technique for +*> the reduction to tridiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension LWORK +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS + N +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup doubleOTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, + $ WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, N, LWORK +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, WANTZ, LQUERY + INTEGER IINFO, IMAX, INDE, INDWRK, ISCALE, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANSB + EXTERNAL LSAME, DLAMCH, DLANSB, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLASCL, DSCAL, DSTEQR, DSTERF, XERBLA + $ DSYTRD_SB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + IB = ILAENV( 18, 'DSYTRD_SB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'DSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'DSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + WORK( 1 ) = LWMIN + ENDIF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -11 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBEV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( LOWER ) THEN + W( 1 ) = AB( 1, 1 ) + ELSE + W( 1 ) = AB( KD+1, 1 ) + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL DLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL DLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call DSYTRD_SB2ST to reduce symmetric band matrix to tridiagonal form. +* + INDE = 1 + INDHOUS = INDE + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL DSYTRD_SB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, + $ WORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL DSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of DSBEV_2STAGE +* + END diff --git a/SRC/dsbevd_2stage.f b/SRC/dsbevd_2stage.f new file mode 100644 index 00000000..39074681 --- /dev/null +++ b/SRC/dsbevd_2stage.f @@ -0,0 +1,412 @@ +*> \brief <b> DSBEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices</b> +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSBEVD_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsbevd_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsbevd_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsbevd_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, +* WORK, LWORK, IWORK, LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSBEVD_2STAGE computes all the eigenvalues and, optionally, eigenvectors of +*> a real symmetric band matrix A using the 2stage technique for +*> the reduction to tridiagonal. If eigenvectors are desired, it uses +*> a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension LWORK +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS + N +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 2, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup doubleOTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, + $ WORK, LWORK, IWORK, LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, INDE, INDWK2, INDWRK, ISCALE, LIWMIN, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS, + $ LLWRK2 + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANSB + EXTERNAL LSAME, DLAMCH, DLANSB, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLACPY, DLASCL, DSCAL, DSTEDC, + $ DSTERF, XERBLA, DSYTRD_SB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + IB = ILAENV( 18, 'DSYTRD_SB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'DSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'DSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 5*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = MAX( 2*N, N+LHTRD+LWTRD ) + END IF + END IF + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBEVD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AB( 1, 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL DLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL DLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call DSYTRD_SB2ST to reduce band symmetric matrix to tridiagonal form. +* + INDE = 1 + INDHOUS = INDE + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 +* + CALL DSYTRD_SB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, + $ WORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL DSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL DGEMM( 'N', 'N', N, N, N, ONE, Z, LDZ, WORK( INDWRK ), N, + $ ZERO, WORK( INDWK2 ), N ) + CALL DLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL DSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of DSBEVD_2STAGE +* + END diff --git a/SRC/dsbevx_2stage.f b/SRC/dsbevx_2stage.f new file mode 100644 index 00000000..3cb3f661 --- /dev/null +++ b/SRC/dsbevx_2stage.f @@ -0,0 +1,633 @@ +*> \brief <b> DSBEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices</b> +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSBEVX_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsbevx_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsbevx_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsbevx_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, +* LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, +* LDZ, WORK, LWORK, IWORK, IFAIL, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N, LWORK +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION AB( LDAB, * ), Q( LDQ, * ), W( * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSBEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric band matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can +*> be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ, N) +*> If JOBZ = 'V', the N-by-N orthogonal matrix used in the +*> reduction to tridiagonal form. +*> If JOBZ = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If JOBZ = 'V', then +*> LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AB to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 7*N, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS + 2*N +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, + $ LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, + $ LDZ, WORK, LWORK, IWORK, IFAIL, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N, LWORK + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), Q( LDQ, * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, TEST, VALEIG, WANTZ, + $ LQUERY + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDWRK, ISCALE, ITMP1, J, JJ, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS, + $ NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANSB + EXTERNAL LSAME, DLAMCH, DLANSB, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMV, DLACPY, DLASCL, DSCAL, + $ DSTEBZ, DSTEIN, DSTEQR, DSTERF, DSWAP, XERBLA, + $ DSYTRD_SB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + IB = ILAENV( 18, 'DSYTRD_SB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'DSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'DSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + LWMIN = 2*N + LHTRD + LWTRD + WORK( 1 ) = LWMIN + ENDIF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -20 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBEVX_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + M = 1 + IF( LOWER ) THEN + TMP1 = AB( 1, 1 ) + ELSE + TMP1 = AB( KD+1, 1 ) + END IF + IF( VALEIG ) THEN + IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) ) + $ M = 0 + END IF + IF( M.EQ.1 ) THEN + W( 1 ) = TMP1 + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + END IF + ANRM = DLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL DLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL DLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call DSYTRD_SB2ST to reduce symmetric band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDHOUS = INDE + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL DSYTRD_SB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, WORK( INDD ), + $ WORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call DSTERF or SSTEQR. If this fails for some +* eigenvalue, then try DSTEBZ. +* + TEST = .FALSE. + IF (INDEIG) THEN + IF (IL.EQ.1 .AND. IU.EQ.N) THEN + TEST = .TRUE. + END IF + END IF + IF ((ALLEIG .OR. TEST) .AND. (ABSTOL.LE.ZERO)) THEN + CALL DCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL DLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEIN. +* + DO 20 J = 1, M + CALL DCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL DGEMV( 'N', N, N, ONE, Q, LDQ, WORK, 1, ZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of DSBEVX_2STAGE +* + END diff --git a/SRC/dsyev_2stage.f b/SRC/dsyev_2stage.f new file mode 100644 index 00000000..a42e86d8 --- /dev/null +++ b/SRC/dsyev_2stage.f @@ -0,0 +1,348 @@ +*> \brief <b> DSYEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices</b> +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYEV_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsyevd_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsyevd_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsyevd_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, +* INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYEV_2STAGE computes all eigenvalues and, optionally, eigenvectors of a +*> real symmetric matrix A using the 2stage technique for +*> the reduction to tridiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension LWORK +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 2*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 2*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup doubleSYeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + $ INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, ILAENV, DLAMCH, DLANSY +* .. +* .. External Subroutines .. + EXTERNAL DLASCL, DORGTR, DSCAL, DSTEQR, DSTERF, + $ XERBLA, DSYTRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + KD = ILAENV( 17, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = 2*N + LHTRD + LWTRD + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYEV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + WORK( 1 ) = 2 + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL DLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call DSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL DSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK( INDE ), + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* DORGTR to generate the orthogonal matrix, then call DSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE +* Not available in this release, and agrument checking should not +* let it getting here + RETURN + CALL DORGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + CALL DSTEQR( JOBZ, N, W, WORK( INDE ), A, LDA, WORK( INDTAU ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of DSYEV_2STAGE +* + END diff --git a/SRC/dsyevd_2stage.f b/SRC/dsyevd_2stage.f new file mode 100644 index 00000000..161f0e9e --- /dev/null +++ b/SRC/dsyevd_2stage.f @@ -0,0 +1,406 @@ +*> \brief <b> DSYEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices</b> +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYEVD_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsyevd_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsyevd_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsyevd_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, +* IWORK, LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a +*> real symmetric matrix A using the 2stage technique for +*> the reduction to tridiagonal. If eigenvectors are desired, it uses a +*> divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, +*> dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 2*N+1 +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 2*N+1 +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be at least +*> 1 + 6*N + 2*N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed +*> to converge; i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> if INFO = i and JOBZ = 'V', then the algorithm failed +*> to compute an eigenvalue while working on the submatrix +*> lying in rows and columns INFO/(N+1) through +*> mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleSYeigen +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee \n +*> Modified description of INFO. Sven, 16 Feb 05. \n +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. +* + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, INDE, INDTAU, INDWK2, INDWRK, ISCALE, + $ LIWMIN, LLWORK, LLWRK2, LWMIN, + $ LHTRD, LWTRD, KD, IB, INDHOUS + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, DLAMCH, DLANSY, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLACPY, DLASCL, DORMTR, DSCAL, DSTEDC, DSTERF, + $ DSYTRD_2STAGE, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + KD = ILAENV( 17, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + 1 + LHTRD + LWTRD + END IF + END IF + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYEVD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL DLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call DSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 +* + CALL DSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK( INDE ), + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call DORMTR to multiply it by the +* Householder transformations stored in A. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE +* Not available in this release, and agrument checking should not +* let it getting here + RETURN + CALL DSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL DORMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ), + $ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO ) + CALL DLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL DSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DSYEVD_2STAGE +* + END diff --git a/SRC/dsyevr_2stage.f b/SRC/dsyevr_2stage.f new file mode 100644 index 00000000..c1b468dc --- /dev/null +++ b/SRC/dsyevr_2stage.f @@ -0,0 +1,740 @@ +*> \brief <b> DSYEVR_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices</b> +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYEVR_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsyevr_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsyevr_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsyevr_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, +* LWORK, IWORK, LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LWORK, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYEVR_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can be +*> selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> +*> DSYEVR_2STAGE first reduces the matrix A to tridiagonal form T with a call +*> to DSYTRD. Then, whenever possible, DSYEVR_2STAGE calls DSTEMR to compute +*> the eigenspectrum using Relatively Robust Representations. DSTEMR +*> computes eigenvalues by the dqds algorithm, while orthogonal +*> eigenvectors are computed from various "good" L D L^T representations +*> (also known as Relatively Robust Representations). Gram-Schmidt +*> orthogonalization is avoided as far as possible. More specifically, +*> the various steps of the algorithm are as follows. +*> +*> For each unreduced block (submatrix) of T, +*> (a) Compute T - sigma I = L D L^T, so that L and D +*> define all the wanted eigenvalues to high relative accuracy. +*> This means that small relative changes in the entries of D and L +*> cause only small relative changes in the eigenvalues and +*> eigenvectors. The standard (unfactored) representation of the +*> tridiagonal matrix T does not have this property in general. +*> (b) Compute the eigenvalues to suitable accuracy. +*> If the eigenvectors are desired, the algorithm attains full +*> accuracy of the computed eigenvalues only right before +*> the corresponding vectors have to be computed, see steps c) and d). +*> (c) For each cluster of close eigenvalues, select a new +*> shift close to the cluster, find a new factorization, and refine +*> the shifted eigenvalues to suitable accuracy. +*> (d) For each eigenvalue with a large enough relative separation compute +*> the corresponding eigenvector by forming a rank revealing twisted +*> factorization. Go back to (c) for any clusters that remain. +*> +*> The desired accuracy of the output can be specified by the input +*> parameter ABSTOL. +*> +*> For more details, see DSTEMR's documentation and: +*> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations +*> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," +*> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. +*> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and +*> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, +*> 2004. Also LAPACK Working Note 154. +*> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric +*> tridiagonal eigenvalue/eigenvector problem", +*> Computer Science Division Technical Report No. UCB/CSD-97-971, +*> UC Berkeley, May 1997. +*> +*> +*> Note 1 : DSYEVR_2STAGE calls DSTEMR when the full spectrum is requested +*> on machines which conform to the ieee-754 floating point standard. +*> DSYEVR_2STAGE calls DSTEBZ and SSTEIN on non-ieee machines and +*> when partial spectrum requests are made. +*> +*> Normal execution of DSTEMR may create NaNs and infinities and +*> hence may abort due to a floating point exception in environments +*> which do not handle NaNs and infinities in the ieee standard default +*> manner. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and +*> DSTEIN are called +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> +*> If high relative accuracy is important, set ABSTOL to +*> DLAMCH( 'Safe minimum' ). Doing so will guarantee that +*> eigenvalues are computed to high relative accuracy when +*> possible in future releases. The current code does not +*> make any guarantees about high relative accuracy, but +*> future releases will. See J. Barlow and J. Demmel, +*> "Computing Accurate Eigensystems of Scaled Diagonally +*> Dominant Matrices", LAPACK Working Note #7, for a discussion +*> of which matrices define their eigenvalues to high relative +*> accuracy. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> Supplying N columns is always safe. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is an output of DSTEMR (tridiagonal +*> matrix). The support of the eigenvectors of A is typically +*> 1:N because of the orthogonal transformations applied by DORMTR. +*> Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 26*N, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 5*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 5*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N). +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: Internal error +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleSYeigen +* +*> \par Contributors: +* ================== +*> +*> Inderjit Dhillon, IBM Almaden, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Ken Stanley, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Jason Riedy, Computer Science Division, University of +*> California at Berkeley, USA \n +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, + $ LWORK, IWORK, LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ, + $ TRYRAC + CHARACTER ORDER + INTEGER I, IEEEOK, IINFO, IMAX, INDD, INDDD, INDE, + $ INDEE, INDIBL, INDIFL, INDISP, INDIWO, INDTAU, + $ INDWK, INDWKN, ISCALE, J, JJ, LIWMIN, + $ LLWORK, LLWRKN, LWMIN, NSPLIT, + $ LHTRD, LWTRD, KD, IB, INDHOUS + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, ILAENV, DLAMCH, DLANSY +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DORMTR, DSCAL, DSTEBZ, DSTEMR, DSTEIN, + $ DSTERF, DSWAP, DSYTRD_2STAGE, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'DSYEVR', 'N', 1, 2, 3, 4 ) +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) ) +* + KD = ILAENV( 17, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = MAX( 26*N, 5*N + LHTRD + LWTRD ) + LIWMIN = MAX( 1, 10*N ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN +* NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 ) +* NB = MAX( NB, ILAENV( 1, 'DORMTR', UPLO, N, -1, -1, -1 ) ) +* LWKOPT = MAX( ( NB+1 )*N, LWMIN ) + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYEVR_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 7 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE + IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) THEN + Z( 1, 1 ) = ONE + ISUPPZ( 1 ) = 1 + ISUPPZ( 2 ) = 1 + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF (VALEIG) THEN + VLL = VL + VUU = VU + END IF + ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL DSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL DSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF + +* Initialize indices into workspaces. Note: The IWORK indices are +* used only if DSTERF or DSTEMR fail. + +* WORK(INDTAU:INDTAU+N-1) stores the scalar factors of the +* elementary reflectors used in DSYTRD. + INDTAU = 1 +* WORK(INDD:INDD+N-1) stores the tridiagonal's diagonal entries. + INDD = INDTAU + N +* WORK(INDE:INDE+N-1) stores the off-diagonal entries of the +* tridiagonal matrix from DSYTRD. + INDE = INDD + N +* WORK(INDDD:INDDD+N-1) is a copy of the diagonal entries over +* -written by DSTEMR (the DSTERF path copies the diagonal to W). + INDDD = INDE + N +* WORK(INDEE:INDEE+N-1) is a copy of the off-diagonal entries over +* -written while computing the eigenvalues in DSTERF and DSTEMR. + INDEE = INDDD + N +* INDHOUS is the starting offset Householder storage of stage 2 + INDHOUS = INDEE + N +* INDWK is the starting offset of the left-over workspace, and +* LLWORK is the remaining workspace size. + INDWK = INDHOUS + LHTRD + LLWORK = LWORK - INDWK + 1 + + +* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in DSTEBZ and +* stores the block indices of each of the M<=N eigenvalues. + INDIBL = 1 +* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in DSTEBZ and +* stores the starting and finishing indices of each block. + INDISP = INDIBL + N +* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors +* that corresponding to eigenvectors that fail to converge in +* DSTEIN. This information is discarded; if any fail, the driver +* returns INFO > 0. + INDIFL = INDISP + N +* INDIWO is the offset of the remaining integer workspace. + INDIWO = INDIFL + N + +* +* Call DSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form. +* +* + CALL DSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, WORK( INDD ), + $ WORK( INDE ), WORK( INDTAU ), WORK( INDHOUS ), + $ LHTRD, WORK( INDWK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired +* then call DSTERF or DSTEMR and DORMTR. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ IEEEOK.EQ.1 ) THEN + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N, WORK( INDD ), 1, W, 1 ) + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DCOPY( N, WORK( INDD ), 1, WORK( INDDD ), 1 ) +* + IF (ABSTOL .LE. TWO*N*EPS) THEN + TRYRAC = .TRUE. + ELSE + TRYRAC = .FALSE. + END IF + CALL DSTEMR( JOBZ, 'A', N, WORK( INDDD ), WORK( INDEE ), + $ VL, VU, IL, IU, M, W, Z, LDZ, N, ISUPPZ, + $ TRYRAC, WORK( INDWK ), LWORK, IWORK, LIWORK, + $ INFO ) +* +* +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEMR. +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL DORMTR( 'L', UPLO, 'N', N, M, A, LDA, + $ WORK( INDTAU ), Z, LDZ, WORK( INDWKN ), + $ LLWRKN, IINFO ) + END IF + END IF +* +* + IF( INFO.EQ.0 ) THEN +* Everything worked. Skip DSTEBZ/DSTEIN. IWORK(:) are +* undefined. + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, DSTEIN. +* Also call DSTEBZ and DSTEIN if DSTEMR fails. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWK ), IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEIN. +* + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL DORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* +* Jump here if DSTEMR/DSTEIN succeeded. + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. Note: We do not sort the IFAIL portion of IWORK. +* It may not be initialized (if DSTEMR/DSTEIN succeeded), and we do +* not return this detailed information to the user. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + W( I ) = W( J ) + W( J ) = TMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DSYEVR_2STAGE +* + END diff --git a/SRC/dsyevx_2stage.f b/SRC/dsyevx_2stage.f new file mode 100644 index 00000000..2c52e9e3 --- /dev/null +++ b/SRC/dsyevx_2stage.f @@ -0,0 +1,608 @@ +*> \brief <b> DSYEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices</b> +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYEVX_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsyevx_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsyevx_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsyevx_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYEVX_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, WORK, +* LWORK, IWORK, IFAIL, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can be +*> selected by specifying either a range of values or a range of indices +*> for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> On normal exit, the first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 8*N, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 3*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 3*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleSYeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYEVX_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, WORK, + $ LWORK, IWORK, IFAIL, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDTAU, INDWKN, INDWRK, ISCALE, + $ ITMP1, J, JJ, LLWORK, LLWRKN, + $ NSPLIT, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, ILAENV, DLAMCH, DLANSY +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DORGTR, DORMTR, DSCAL, DSTEBZ, + $ DSTEIN, DSTEQR, DSTERF, DSWAP, XERBLA, + $ DSYTRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + KD = ILAENV( 17, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = MAX( 8*N, 3*N + LHTRD + LWTRD ) + WORK( 1 ) = LWMIN + END IF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -17 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYEVX_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE + IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + END IF + ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL DSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL DSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call DSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form. +* + INDTAU = 1 + INDE = INDTAU + N + INDD = INDE + N + INDHOUS = INDD + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL DSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, WORK( INDD ), + $ WORK( INDE ), WORK( INDTAU ), WORK( INDHOUS ), + $ LHTRD, WORK( INDWRK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal to +* zero, then call DSTERF or DORGTR and SSTEQR. If this fails for +* some eigenvalue, then try DSTEBZ. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. ( ABSTOL.LE.ZERO ) ) THEN + CALL DCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL DLACPY( 'A', N, N, A, LDA, Z, LDZ ) + CALL DORGTR( UPLO, N, Z, LDZ, WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + IFAIL( I ) = 0 + 30 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 40 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEIN. +* + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL DORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 40 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 50 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 60 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of DSYEVX_2STAGE +* + END diff --git a/SRC/dsygv_2stage.f b/SRC/dsygv_2stage.f new file mode 100644 index 00000000..2c79ec8a --- /dev/null +++ b/SRC/dsygv_2stage.f @@ -0,0 +1,370 @@ +*> \brief \b DSYGV_2STAGE +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYGV_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsygv_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsygv_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsygv_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, +* WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYGV_2STAGE computes all the eigenvalues, and optionally, the eigenvectors +*> of a real generalized symmetric-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +*> Here A and B are assumed to be symmetric and B is also +*> positive definite. +*> This routine use the 2stage technique for the reduction to tridiagonal +*> which showed higher performance on recent architecture and for large +* sizes N>2000. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> matrix Z of eigenvectors. The eigenvectors are normalized +*> as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +*> or the lower triangle (if UPLO='L') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, the symmetric positive definite matrix B. +*> If UPLO = 'U', the leading N-by-N upper triangular part of B +*> contains the upper triangular part of the matrix B. +*> If UPLO = 'L', the leading N-by-N lower triangular part of B +*> contains the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**T*U or B = L*L**T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 2*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 2*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: DPOTRF or DSYEV returned an error code: +*> <= N: if INFO = i, DSYEV failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup doubleSYeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, + $ WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER NEIG, LWMIN, LHTRD, LWTRD, KD, IB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DPOTRF, DSYGST, DTRMM, DTRSM, XERBLA, + $ DSYEV_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + KD = ILAENV( 17, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = 2*N + LHTRD + LWTRD + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYGV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL DPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL DSYEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**T*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + CALL DTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**T*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + CALL DTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LWMIN + RETURN +* +* End of DSYGV_2STAGE +* + END diff --git a/SRC/dsytrd_2stage.f b/SRC/dsytrd_2stage.f new file mode 100644 index 00000000..449a279e --- /dev/null +++ b/SRC/dsytrd_2stage.f @@ -0,0 +1,337 @@ +*> \brief \b DSYTRD_2STAGE +* +* @generated from zhetrd_2stage.f, fortran z -> d, Sun Nov 6 19:34:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRD_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsytrd_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsytrd_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsytrd_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, +* HOUS2, LHOUS2, WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER VECT, UPLO +* INTEGER N, LDA, LWORK, LHOUS2, INFO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* DOUBLE PRECISION A( LDA, * ), TAU( * ), +* HOUS2( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRD_2STAGE reduces a real symmetric matrix A to real symmetric +*> tridiagonal form T by a orthogonal similarity transformation: +*> Q1**T Q2**T* A * Q2 * Q1 = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': No need for the Housholder representation, +*> in particular for the second stage (Band to +*> tridiagonal) and thus LHOUS2 is of size max(1, 4*N); +*> = 'V': the Householder representation is needed to +*> either generate Q1 Q2 or to apply Q1 Q2, +*> then LHOUS2 is to be queried and computed. +*> (NOT AVAILABLE IN THIS RELEASE). +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the band superdiagonal +*> of A are overwritten by the corresponding elements of the +*> internal band-diagonal matrix AB, and the elements above +*> the KD superdiagonal, with the array TAU, represent the orthogonal +*> matrix Q1 as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and band subdiagonal of A are over- +*> written by the corresponding elements of the internal band-diagonal +*> matrix AB, and the elements below the KD subdiagonal, with +*> the array TAU, represent the orthogonal matrix Q1 as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N-KD) +*> The scalar factors of the elementary reflectors of +*> the first stage (see Further Details). +*> \endverbatim +*> +*> \param[out] HOUS2 +*> \verbatim +*> HOUS2 is DOUBLE PRECISION array, dimension LHOUS2, that +*> store the Householder representation of the stage2 +*> band to tridiagonal. +*> \endverbatim +*> +*> \param[in] LHOUS2 +*> \verbatim +*> LHOUS2 is INTEGER +*> The dimension of the array HOUS2. LHOUS2 = MAX(1, dimension) +*> If LWORK = -1, or LHOUS2=-1, +*> then a query is assumed; the routine +*> only calculates the optimal size of the HOUS2 array, returns +*> this value as the first entry of the HOUS2 array, and no error +*> message related to LHOUS2 is issued by XERBLA. +*> LHOUS2 = MAX(1, dimension) where +*> dimension = 4*N if VECT='N' +*> not available now if VECT='H' +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK = MAX(1, dimension) +*> If LWORK = -1, or LHOUS2=-1, +*> then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup doubleSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSYTRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, + $ HOUS2, LHOUS2, WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER VECT, UPLO + INTEGER N, LDA, LWORK, LHOUS2, INFO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + DOUBLE PRECISION A( LDA, * ), TAU( * ), + $ HOUS2( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTQ + INTEGER KD, IB, LWMIN, LHMIN, LWRK, LDAB, WPOS, ABPOS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DSYTRD_SY2SB, DSYTRD_SB2ST +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + WANTQ = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS2.EQ.-1 ) +* +* Determine the block size, the workspace size and the hous size. +* + KD = ILAENV( 17, 'DSYTRD_2STAGE', VECT, N, -1, -1, -1 ) + IB = ILAENV( 18, 'DSYTRD_2STAGE', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV( 19, 'DSYTRD_2STAGE', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV( 20, 'DSYTRD_2STAGE', VECT, N, KD, IB, -1 ) +* WRITE(*,*),'DSYTRD_2STAGE N KD UPLO LHMIN LWMIN ',N, KD, UPLO, +* $ LHMIN, LWMIN +* + IF( .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LHOUS2.LT.LHMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + HOUS2( 1 ) = LHMIN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine pointer position +* + LDAB = KD+1 + LWRK = LWORK-LDAB*N + ABPOS = 1 + WPOS = ABPOS + LDAB*N + CALL DSYTRD_SY2SB( UPLO, N, KD, A, LDA, WORK( ABPOS ), LDAB, + $ TAU, WORK( WPOS ), LWRK, INFO ) + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRD_SY2SB', -INFO ) + RETURN + END IF + CALL DSYTRD_SB2ST( 'Y', VECT, UPLO, N, KD, + $ WORK( ABPOS ), LDAB, D, E, + $ HOUS2, LHOUS2, WORK( WPOS ), LWRK, INFO ) + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRD_SB2ST', -INFO ) + RETURN + END IF +* +* + HOUS2( 1 ) = LHMIN + WORK( 1 ) = LWMIN + RETURN +* +* End of DSYTRD_2STAGE +* + END diff --git a/SRC/dsytrd_sb2st.F b/SRC/dsytrd_sb2st.F new file mode 100644 index 00000000..d50debe1 --- /dev/null +++ b/SRC/dsytrd_sb2st.F @@ -0,0 +1,603 @@ +*> \brief \b DSBTRD +* +* @generated from zhetrd_hb2st.F, fortran z -> d, Sun Nov 6 19:34:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSBTRD + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsbtrd.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsbtrd.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsbtrd.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRD_SB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, +* D, E, HOUS, LHOUS, WORK, LWORK, INFO ) +* +* #define PRECISION_REAL +* +* #if defined(_OPENMP) +* use omp_lib +* #endif +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER STAGE1, UPLO, VECT +* INTEGER N, KD, IB, LDAB, LHOUS, LWORK, INFO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* DOUBLE PRECISION AB( LDAB, * ), HOUS( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSBTRD reduces a real symmetric band matrix A to real symmetric +*> tridiagonal form T by a orthogonal similarity transformation: +*> Q**T * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] STAGE +*> \verbatim +*> STAGE is CHARACTER*1 +*> = 'N': "No": to mention that the stage 1 of the reduction +*> from dense to band using the dsytrd_sy2sb routine +*> was not called before this routine to reproduce AB. +*> In other term this routine is called as standalone. +*> = 'Y': "Yes": to mention that the stage 1 of the +*> reduction from dense to band using the dsytrd_sy2sb +*> routine has been called to produce AB (e.g., AB is +*> the output of dsytrd_sy2sb. +*> \endverbatim +*> +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': No need for the Housholder representation, +*> and thus LHOUS is of size max(1, 4*N); +*> = 'V': the Householder representation is needed to +*> either generate or to apply Q later on, +*> then LHOUS is to be queried and computed. +*> (NOT AVAILABLE IN THIS RELEASE). +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> On exit, the diagonal elements of AB are overwritten by the +*> diagonal elements of the tridiagonal matrix T; if KD > 0, the +*> elements on the first superdiagonal (if UPLO = 'U') or the +*> first subdiagonal (if UPLO = 'L') are overwritten by the +*> off-diagonal elements of T; the rest of AB is overwritten by +*> values generated during the reduction. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] HOUS +*> \verbatim +*> HOUS is DOUBLE PRECISION array, dimension LHOUS, that +*> store the Householder representation. +*> \endverbatim +*> +*> \param[in] LHOUS +*> \verbatim +*> LHOUS is INTEGER +*> The dimension of the array HOUS. LHOUS = MAX(1, dimension) +*> If LWORK = -1, or LHOUS=-1, +*> then a query is assumed; the routine +*> only calculates the optimal size of the HOUS array, returns +*> this value as the first entry of the HOUS array, and no error +*> message related to LHOUS is issued by XERBLA. +*> LHOUS = MAX(1, dimension) where +*> dimension = 4*N if VECT='N' +*> not available now if VECT='H' +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK = MAX(1, dimension) +*> If LWORK = -1, or LHOUS=-1, +*> then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup real16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSYTRD_SB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, + $ D, E, HOUS, LHOUS, WORK, LWORK, INFO ) +* +#define PRECISION_REAL +* +#if defined(_OPENMP) + use omp_lib +#endif +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER STAGE1, UPLO, VECT + INTEGER N, KD, LDAB, LHOUS, LWORK, INFO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + DOUBLE PRECISION AB( LDAB, * ), HOUS( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION RZERO + DOUBLE PRECISION ZERO, ONE + PARAMETER ( RZERO = 0.0D+0, + $ ZERO = 0.0D+0, + $ ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ, UPPER, AFTERS1 + INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST, + $ ED, STIND, EDIND, BLKLASTIND, COLPT, THED, + $ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID, + $ NBTILES, TTYPE, TID, NTHREADS, DEBUG, + $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS, + $ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU, + $ SIDEV, SIZETAU, LDV, LHMIN, LWMIN +#if defined (PRECISION_COMPLEX) + DOUBLE PRECISION ABSTMP + DOUBLE PRECISION TMP +#endif +* .. +* .. External Subroutines .. + EXTERNAL DSB2ST_KERNELS, DLACPY, DLASET +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX, CEILING, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Determine the minimal workspace size required. +* Test the input parameters +* + DEBUG = 0 + INFO = 0 + AFTERS1 = LSAME( STAGE1, 'Y' ) + WANTQ = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS.EQ.-1 ) +* +* Determine the block size, the workspace size and the hous size. +* + IB = ILAENV( 18, 'DSYTRD_SB2ST', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV( 19, 'DSYTRD_SB2ST', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV( 20, 'DSYTRD_SB2ST', VECT, N, KD, IB, -1 ) +* + IF( .NOT.AFTERS1 .AND. .NOT.LSAME( STAGE1, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.(KD+1) ) THEN + INFO = -7 + ELSE IF( LHOUS.LT.LHMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + HOUS( 1 ) = LHMIN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRD_SB2ST', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine pointer position +* + LDV = KD + IB + SIZETAU = 2 * N + SIDEV = 2 * N + INDTAU = 1 + INDV = INDTAU + SIZETAU + LDA = 2 * KD + 1 + SIZEA = LDA * N + INDA = 1 + INDW = INDA + SIZEA + NTHREADS = 1 + TID = 0 +* + IF( UPPER ) THEN + APOS = INDA + KD + AWPOS = INDA + DPOS = APOS + KD + OFDPOS = DPOS - 1 + ABDPOS = KD + 1 + ABOFDPOS = KD + ELSE + APOS = INDA + AWPOS = INDA + KD + 1 + DPOS = APOS + OFDPOS = DPOS + 1 + ABDPOS = 1 + ABOFDPOS = 2 + + ENDIF +* +* Case KD=0: +* The matrix is diagonal. We just copy it (convert to "real" for +* real because D is double and the imaginary part should be 0) +* and store it in D. A sequential code here is better or +* in a parallel environment it might need two cores for D and E +* + IF( KD.EQ.0 ) THEN + DO 30 I = 1, N + D( I ) = ( AB( ABDPOS, I ) ) + 30 CONTINUE + DO 40 I = 1, N-1 + E( I ) = RZERO + 40 CONTINUE + GOTO 200 + END IF +* +* Case KD=1: +* The matrix is already Tridiagonal. We have to make diagonal +* and offdiagonal elements real, and store them in D and E. +* For that, for real precision just copy the diag and offdiag +* to D and E while for the COMPLEX case the bulge chasing is +* performed to convert the hermetian tridiagonal to symmetric +* tridiagonal. A simpler coversion formula might be used, but then +* updating the Q matrix will be required and based if Q is generated +* or not this might complicate the story. +* +C IF( KD.EQ.1 .AND. N.GT.(KD+1) .AND. AFTERS1 ) THEN + IF( KD.EQ.1 ) THEN + DO 50 I = 1, N + D( I ) = ( AB( ABDPOS, I ) ) + 50 CONTINUE +#if defined (PRECISION_COMPLEX) +* +* make off-diagonal elements real and copy them to E +* + IF( UPPER ) THEN + DO 60 I = 1, N - 1 + TMP = AB( ABOFDPOS, I+1 ) + ABSTMP = ABS( TMP ) + AB( ABOFDPOS, I+1 ) = ABSTMP + E( I ) = ABSTMP + IF( ABSTMP.NE.RZERO ) THEN + TMP = TMP / ABSTMP + ELSE + TMP = ONE + END IF + IF( I.LT.N-1 ) + $ AB( ABOFDPOS, I+2 ) = AB( ABOFDPOS, I+2 )*TMP +C IF( WANTZ ) THEN +C CALL DSCAL( N, ( TMP ), Q( 1, I+1 ), 1 ) +C END IF + 60 CONTINUE + ELSE + DO 70 I = 1, N - 1 + TMP = AB( ABOFDPOS, I ) + ABSTMP = ABS( TMP ) + AB( ABOFDPOS, I ) = ABSTMP + E( I ) = ABSTMP + IF( ABSTMP.NE.RZERO ) THEN + TMP = TMP / ABSTMP + ELSE + TMP = ONE + END IF + IF( I.LT.N-1 ) + $ AB( ABOFDPOS, I+1 ) = AB( ABOFDPOS, I+1 )*TMP +C IF( WANTQ ) THEN +C CALL DSCAL( N, TMP, Q( 1, I+1 ), 1 ) +C END IF + 70 CONTINUE + ENDIF +#else + IF( UPPER ) THEN + DO 60 I = 1, N-1 + E( I ) = ( AB( ABOFDPOS, I+1 ) ) + 60 CONTINUE + ELSE + DO 70 I = 1, N-1 + E( I ) = ( AB( ABOFDPOS, I ) ) + 70 CONTINUE + ENDIF +#endif + GOTO 200 + END IF +* +* Main code start here. +* Reduce the symmetric band of A to a tridiagonal matrix. +* + THGRSIZ = N + GRSIZ = 1 + SHIFT = 3 + NBTILES = CEILING( REAL(N)/REAL(KD) ) + STEPERCOL = CEILING( REAL(SHIFT)/REAL(GRSIZ) ) + THGRNB = CEILING( REAL(N-1)/REAL(THGRSIZ) ) +* + CALL DLACPY( "A", KD+1, N, AB, LDAB, WORK( APOS ), LDA ) + CALL DLASET( "A", KD, N, ZERO, ZERO, WORK( AWPOS ), LDA ) +* +* +* openMP parallelisation start here +* +#if defined(_OPENMP) +!$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND ) +!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID ) +!$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND ) +!$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK) +!$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA ) +!$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT ) +!$OMP MASTER +#endif +* +* main bulge chasing loop +* + DO 100 THGRID = 1, THGRNB + STT = (THGRID-1)*THGRSIZ+1 + THED = MIN( (STT + THGRSIZ -1), (N-1)) + DO 110 I = STT, N-1 + ED = MIN( I, THED ) + IF( STT.GT.ED ) GOTO 100 + DO 120 M = 1, STEPERCOL + ST = STT + DO 130 SWEEPID = ST, ED + DO 140 K = 1, GRSIZ + MYID = (I-SWEEPID)*(STEPERCOL*GRSIZ) + $ + (M-1)*GRSIZ + K + IF ( MYID.EQ.1 ) THEN + TTYPE = 1 + ELSE + TTYPE = MOD( MYID, 2 ) + 2 + ENDIF + + IF( TTYPE.EQ.2 ) THEN + COLPT = (MYID/2)*KD + SWEEPID + STIND = COLPT-KD+1 + EDIND = MIN(COLPT,N) + BLKLASTIND = COLPT + ELSE + COLPT = ((MYID+1)/2)*KD + SWEEPID + STIND = COLPT-KD+1 + EDIND = MIN(COLPT,N) + IF( ( STIND.GE.EDIND-1 ).AND. + $ ( EDIND.EQ.N ) ) THEN + BLKLASTIND = N + ELSE + BLKLASTIND = 0 + ENDIF + ENDIF +* +* Call the kernel +* +#if defined(_OPENMP) + IF( TTYPE.NE.1 ) THEN +!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) +!$OMP$ DEPEND(in:WORK(MYID-1)) +!$OMP$ DEPEND(out:WORK(MYID)) + TID = OMP_GET_THREAD_NUM() + CALL DSB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +!$OMP END TASK + ELSE +!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) +!$OMP$ DEPEND(out:WORK(MYID)) + TID = OMP_GET_THREAD_NUM() + CALL DSB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +!$OMP END TASK + ENDIF +#else + CALL DSB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +#endif + IF ( BLKLASTIND.GE.(N-1) ) THEN + STT = STT + 1 + GOTO 130 + ENDIF + 140 CONTINUE + 130 CONTINUE + 120 CONTINUE + 110 CONTINUE + 100 CONTINUE +* +#if defined(_OPENMP) +!$OMP END MASTER +!$OMP END PARALLEL +#endif +* +* Copy the diagonal from A to D. Note that D is REAL thus only +* the Real part is needed, the imaginary part should be zero. +* + DO 150 I = 1, N + D( I ) = ( WORK( DPOS+(I-1)*LDA ) ) + 150 CONTINUE +* +* Copy the off diagonal from A to E. Note that E is REAL thus only +* the Real part is needed, the imaginary part should be zero. +* + IF( UPPER ) THEN + DO 160 I = 1, N-1 + E( I ) = ( WORK( OFDPOS+I*LDA ) ) + 160 CONTINUE + ELSE + DO 170 I = 1, N-1 + E( I ) = ( WORK( OFDPOS+(I-1)*LDA ) ) + 170 CONTINUE + ENDIF +* + 200 CONTINUE +* + HOUS( 1 ) = LHMIN + WORK( 1 ) = LWMIN + RETURN +* +* End of DSYTRD_SB2ST +* + END +#undef PRECISION_REAL + diff --git a/SRC/dsytrd_sy2sb.f b/SRC/dsytrd_sy2sb.f new file mode 100644 index 00000000..8f0261df --- /dev/null +++ b/SRC/dsytrd_sy2sb.f @@ -0,0 +1,517 @@ +*> \brief \b DSYTRD_SY2SB +* +* @generated from zhetrd_he2hb.f, fortran z -> d, Sun Nov 6 19:34:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRD_SY2SB + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsytrd.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsytrd.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsytrd.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRD_SY2SB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, +* WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAB, LWORK, N, KD +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), AB( LDAB, * ), +* TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRD_SY2SB reduces a real symmetric matrix A to real symmetric +*> band-diagonal form AB by a orthogonal similarity transformation: +*> Q**T * A * Q = AB. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the reduced matrix if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> The reduced matrix is stored in the array AB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the orthogonal +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the orthogonal matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On exit, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N-KD) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension LWORK. +*> On exit, if INFO = 0, or if LWORK=-1, +*> WORK(1) returns the size of LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK which should be calculated +* by a workspace query. LWORK = MAX(1, LWORK_QUERY) +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD +*> where FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice otherwise +*> putting LWORK=-1 will provide the size of WORK. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup doubleSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(k)**T . . . H(2)**T H(1)**T, where k = n-kd. +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in +*> A(i,i+kd+1:n), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = n-kd. +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in +* A(i+kd+2:n,i), and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( ab ab/v1 v1 v1 v1 ) ( ab ) +*> ( ab ab/v2 v2 v2 ) ( ab/v1 ab ) +*> ( ab ab/v3 v3 ) ( v1 ab/v2 ab ) +*> ( ab ab/v4 ) ( v1 v2 ab/v3 ab ) +*> ( ab ) ( v1 v2 v3 ab/v4 ab ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSYTRD_SY2SB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, + $ WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAB, LWORK, N, KD +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), AB( LDAB, * ), + $ TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION RONE + DOUBLE PRECISION ZERO, ONE, HALF + PARAMETER ( RONE = 1.0D+0, + $ ZERO = 0.0D+0, + $ ONE = 1.0D+0, + $ HALF = 0.5D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, J, IINFO, LWMIN, PN, PK, LK, + $ LDT, LDW, LDS2, LDS1, + $ LS2, LS1, LW, LT, + $ TPOS, WPOS, S2POS, S1POS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DSYR2K, DSYMM, DGEMM, + $ DLARFT, DGELQF, DGEQRF, DLASET +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Determine the minimal workspace size required +* and test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + LWMIN = ILAENV( 20, 'DSYTRD_SY2SB', '', N, KD, -1, -1 ) + + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRD_SY2SB', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWMIN + RETURN + END IF +* +* Quick return if possible +* Copy the upper/lower portion of A into AB +* + IF( N.LE.KD+1 ) THEN + IF( UPPER ) THEN + DO 100 I = 1, N + LK = MIN( KD+1, I ) + CALL DCOPY( LK, A( I-LK+1, I ), 1, + $ AB( KD+1-LK+1, I ), 1 ) + 100 CONTINUE + ELSE + DO 110 I = 1, N + LK = MIN( KD+1, N-I+1 ) + CALL DCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 ) + 110 CONTINUE + ENDIF + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the pointer position for the workspace +* + LDT = KD + LDS1 = KD + LT = LDT*KD + LW = N*KD + LS1 = LDS1*KD + LS2 = LWMIN - LT - LW - LS1 +* LS2 = N*MAX(KD,FACTOPTNB) + TPOS = 1 + WPOS = TPOS + LT + S1POS = WPOS + LW + S2POS = S1POS + LS1 + IF( UPPER ) THEN + LDW = KD + LDS2 = KD + ELSE + LDW = N + LDS2 = N + ENDIF +* +* +* Set the workspace of the triangular matrix T to zero once such a +* way everytime T is generated the upper/lower portion will be always zero +* + CALL DLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT ) +* + IF( UPPER ) THEN + DO 10 I = 1, N - KD, KD + PN = N-I-KD+1 + PK = MIN( N-I-KD+1, KD ) +* +* Compute the LQ factorization of the current block +* + CALL DGELQF( KD, PN, A( I, I+KD ), LDA, + $ TAU( I ), WORK( S2POS ), LS2, IINFO ) +* +* Copy the upper portion of A into AB +* + DO 20 J = I, I+PK-1 + LK = MIN( KD, N-J ) + 1 + CALL DCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) + 20 CONTINUE +* + CALL DLASET( 'Lower', PK, PK, ZERO, ONE, + $ A( I, I+KD ), LDA ) +* +* Form the matrix T +* + CALL DLARFT( 'Forward', 'Rowwise', PN, PK, + $ A( I, I+KD ), LDA, TAU( I ), + $ WORK( TPOS ), LDT ) +* +* Compute W: +* + CALL DGEMM( 'Conjugate', 'No transpose', PK, PN, PK, + $ ONE, WORK( TPOS ), LDT, + $ A( I, I+KD ), LDA, + $ ZERO, WORK( S2POS ), LDS2 ) +* + CALL DSYMM( 'Right', UPLO, PK, PN, + $ ONE, A( I+KD, I+KD ), LDA, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( WPOS ), LDW ) +* + CALL DGEMM( 'No transpose', 'Conjugate', PK, PK, PN, + $ ONE, WORK( WPOS ), LDW, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( S1POS ), LDS1 ) +* + CALL DGEMM( 'No transpose', 'No transpose', PK, PN, PK, + $ -HALF, WORK( S1POS ), LDS1, + $ A( I, I+KD ), LDA, + $ ONE, WORK( WPOS ), LDW ) +* +* +* Update the unreduced submatrix A(i+kd:n,i+kd:n), using +* an update of the form: A := A - V'*W - W'*V +* + CALL DSYR2K( UPLO, 'Conjugate', PN, PK, + $ -ONE, A( I, I+KD ), LDA, + $ WORK( WPOS ), LDW, + $ RONE, A( I+KD, I+KD ), LDA ) + 10 CONTINUE +* +* Copy the upper band to AB which is the band storage matrix +* + DO 30 J = N-KD+1, N + LK = MIN(KD, N-J) + 1 + CALL DCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) + 30 CONTINUE +* + ELSE +* +* Reduce the lower triangle of A to lower band matrix +* + DO 40 I = 1, N - KD, KD + PN = N-I-KD+1 + PK = MIN( N-I-KD+1, KD ) +* +* Compute the QR factorization of the current block +* + CALL DGEQRF( PN, KD, A( I+KD, I ), LDA, + $ TAU( I ), WORK( S2POS ), LS2, IINFO ) +* +* Copy the upper portion of A into AB +* + DO 50 J = I, I+PK-1 + LK = MIN( KD, N-J ) + 1 + CALL DCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) + 50 CONTINUE +* + CALL DLASET( 'Upper', PK, PK, ZERO, ONE, + $ A( I+KD, I ), LDA ) +* +* Form the matrix T +* + CALL DLARFT( 'Forward', 'Columnwise', PN, PK, + $ A( I+KD, I ), LDA, TAU( I ), + $ WORK( TPOS ), LDT ) +* +* Compute W: +* + CALL DGEMM( 'No transpose', 'No transpose', PN, PK, PK, + $ ONE, A( I+KD, I ), LDA, + $ WORK( TPOS ), LDT, + $ ZERO, WORK( S2POS ), LDS2 ) +* + CALL DSYMM( 'Left', UPLO, PN, PK, + $ ONE, A( I+KD, I+KD ), LDA, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( WPOS ), LDW ) +* + CALL DGEMM( 'Conjugate', 'No transpose', PK, PK, PN, + $ ONE, WORK( S2POS ), LDS2, + $ WORK( WPOS ), LDW, + $ ZERO, WORK( S1POS ), LDS1 ) +* + CALL DGEMM( 'No transpose', 'No transpose', PN, PK, PK, + $ -HALF, A( I+KD, I ), LDA, + $ WORK( S1POS ), LDS1, + $ ONE, WORK( WPOS ), LDW ) +* +* +* Update the unreduced submatrix A(i+kd:n,i+kd:n), using +* an update of the form: A := A - V*W' - W*V' +* + CALL DSYR2K( UPLO, 'No transpose', PN, PK, + $ -ONE, A( I+KD, I ), LDA, + $ WORK( WPOS ), LDW, + $ RONE, A( I+KD, I+KD ), LDA ) +* ================================================================== +* RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED +* DO 45 J = I, I+PK-1 +* LK = MIN( KD, N-J ) + 1 +* CALL DCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 ) +* 45 CONTINUE +* ================================================================== + 40 CONTINUE +* +* Copy the lower band to AB which is the band storage matrix +* + DO 60 J = N-KD+1, N + LK = MIN(KD, N-J) + 1 + CALL DCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) + 60 CONTINUE + + END IF +* + WORK( 1 ) = LWMIN + RETURN +* +* End of DSYTRD_SY2SB +* + END diff --git a/SRC/ilaenv.f b/SRC/ilaenv.f index 42a380cf..c66f1679 100644 --- a/SRC/ilaenv.f +++ b/SRC/ilaenv.f @@ -189,7 +189,8 @@ * .. Executable Statements .. * GO TO ( 10, 10, 10, 80, 90, 100, 110, 120, - $ 130, 140, 150, 160, 160, 160, 160, 160 )ISPEC + $ 130, 140, 150, 160, 160, 160, 160, 160, + $ 170, 170, 170, 170, 170 )ISPEC * * Invalid value for ISPEC * @@ -690,6 +691,13 @@ ILAENV = IPARMQ( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) RETURN * + 170 CONTINUE +* +* 17 <= ISPEC <= 21: 2stage eigenvalues and SVD or related subroutines. +* + ILAENV = IPARAM2STAGE( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) + RETURN +* * End of ILAENV * END diff --git a/SRC/iparam2stage.F b/SRC/iparam2stage.F new file mode 100644 index 00000000..6443f16e --- /dev/null +++ b/SRC/iparam2stage.F @@ -0,0 +1,388 @@ +*> \brief \b IPARAM2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download IPARAM2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iparam2stage.F"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iparam2stage.F"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iparam2stage.F"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, +* NI, NBI, IBI, NXI ) +* #if defined(_OPENMP) +* use omp_lib +* #endif +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER*( * ) NAME, OPTS +* INTEGER ISPEC, NI, NBI, IBI, NXI +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This program sets problem and machine dependent parameters +*> useful for xHETRD_2STAGE, xHETRD_H@2HB, xHETRD_HB2ST, +*> xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD +*> and related subroutines for eigenvalue problems. +*> It is called whenever ILAENV is called with 17 <= ISPEC <= 21 +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ISPEC +*> \verbatim +*> ISPEC is integer scalar +*> ISPEC specifies which tunable parameter IPARAM2STAGE should +*> return. +*> +*> ISPEC=17: the optimal blocksize nb for the reduction to +* BAND +*> +*> ISPEC=18: the optimal blocksize ib for the eigenvectors +*> singular vectors update routine +*> +*> ISPEC=19: The length of the array that store the Housholder +*> representation for the second stage +*> Band to Tridiagonal or Bidiagonal +*> +*> ISPEC=20: The workspace needed for the routine in input. +*> +*> ISPEC=21: For future release. +*> \endverbatim +*> +*> \param[in] NAME +*> \verbatim +*> NAME is character string +*> Name of the calling subroutine +*> \endverbatim +*> +*> \param[in] OPTS +*> \verbatim +*> OPTS is CHARACTER*(*) +*> The character options to the subroutine NAME, concatenated +*> into a single character string. For example, UPLO = 'U', +*> TRANS = 'T', and DIAG = 'N' for a triangular routine would +*> be specified as OPTS = 'UTN'. +*> \endverbatim +*> +*> \param[in] NI +*> \verbatim +*> NI is INTEGER which is the size of the matrix +*> \endverbatim +*> +*> \param[in] NBI +*> \verbatim +*> NBI is INTEGER which is the used in the reduciton, +* (e.g., the size of the band), needed to compute workspace +* and LHOUS2. +*> \endverbatim +*> +*> \param[in] IBI +*> \verbatim +*> IBI is INTEGER which represent the IB of the reduciton, +* needed to compute workspace and LHOUS2. +*> \endverbatim +*> +*> \param[in] NXI +*> \verbatim +*> NXI is INTEGER needed in the future release. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup auxOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All detail are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, + $ NI, NBI, IBI, NXI ) +#if defined(_OPENMP) + use omp_lib +#endif + IMPLICIT NONE +* +* -- LAPACK auxiliary routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER*( * ) NAME, OPTS + INTEGER ISPEC, NI, NBI, IBI, NXI +* +* ================================================================ +* .. +* .. Local Scalars .. + INTEGER I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS, + $ FACTOPTNB, QROPTNB, LQOPTNB + LOGICAL RPREC, CPREC + CHARACTER PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*3 +* .. +* .. Intrinsic Functions .. + INTRINSIC CHAR, ICHAR, MAX +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Invalid value for ISPEC +* + IF( (ISPEC.LT.17).OR.(ISPEC.GT.21) ) THEN + IPARAM2STAGE = -1 + RETURN + ENDIF +* +* Get the number of threads +* + NTHREADS = 1 +#if defined(_OPENMP) +!$OMP PARALLEL + NTHREADS = OMP_GET_NUM_THREADS() +!$OMP END PARALLEL +#endif +* WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC + IF( ISPEC.EQ.19 ) GOTO 19 +* +* Convert NAME to upper case if the first character is lower case. +* + IPARAM2STAGE = -1 + SUBNAM = NAME + IC = ICHAR( SUBNAM( 1: 1 ) ) + IZ = ICHAR( 'Z' ) + IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN +* +* ASCII character set +* + IF( IC.GE.97 .AND. IC.LE.122 ) THEN + SUBNAM( 1: 1 ) = CHAR( IC-32 ) + DO 100 I = 2, 12 + IC = ICHAR( SUBNAM( I: I ) ) + IF( IC.GE.97 .AND. IC.LE.122 ) + $ SUBNAM( I: I ) = CHAR( IC-32 ) + 100 CONTINUE + END IF +* + ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN +* +* EBCDIC character set +* + IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. + $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. + $ ( IC.GE.162 .AND. IC.LE.169 ) ) THEN + SUBNAM( 1: 1 ) = CHAR( IC+64 ) + DO 110 I = 2, 12 + IC = ICHAR( SUBNAM( I: I ) ) + IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. + $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. + $ ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I: + $ I ) = CHAR( IC+64 ) + 110 CONTINUE + END IF +* + ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN +* +* Prime machines: ASCII+128 +* + IF( IC.GE.225 .AND. IC.LE.250 ) THEN + SUBNAM( 1: 1 ) = CHAR( IC-32 ) + DO 120 I = 2, 12 + IC = ICHAR( SUBNAM( I: I ) ) + IF( IC.GE.225 .AND. IC.LE.250 ) + $ SUBNAM( I: I ) = CHAR( IC-32 ) + 120 CONTINUE + END IF + END IF +* + PREC = SUBNAM( 1: 1 ) + ALGO = SUBNAM( 4: 6 ) + STAG = SUBNAM( 8:12 ) + RPREC = PREC.EQ.'S' .OR. PREC.EQ.'D' + CPREC = PREC.EQ.'C' .OR. PREC.EQ.'Z' +* +* Invalid value for PRECISION +* + IF( .NOT.( RPREC .OR. CPREC ) ) THEN + IPARAM2STAGE = -1 + RETURN + ENDIF +* WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC, +* $ ' ALGO ',ALGO,' STAGE ',STAG +* + GO TO ( 17, 17, 19, 20, 21 ) ISPEC-16 +* + 17 CONTINUE +* +* ISPEC = 17, 18: block size KD, IB +* Could be also dependent from N but for now it +* depend only on sequential or parallel +* + IF( NTHREADS.GT.4 ) THEN + IF( CPREC ) THEN + KD = 128 + IB = 32 + ELSE + KD = 160 + IB = 40 + ENDIF + ELSE IF( NTHREADS.GT.1 ) THEN + IF( CPREC ) THEN + KD = 64 + IB = 32 + ELSE + KD = 64 + IB = 32 + ENDIF + ELSE + IF( CPREC ) THEN + KD = 16 + IB = 16 + ELSE + KD = 32 + IB = 16 + ENDIF + ENDIF + IF( ISPEC.EQ.17 ) IPARAM2STAGE = KD + IF( ISPEC.EQ.18 ) IPARAM2STAGE = IB + RETURN +* + 19 CONTINUE +* +* ISPEC = 19: +* LHOUS length of the Houselholder representation +* matrix (V,T) of the second stage. should be >= 1. +* +* Will add the VECT OPTION HERE next release + VECT = OPTS(1:1) + IF( VECT.EQ.'N' ) THEN + LHOUS = MAX( 1, 4*NI ) + ELSE +* This is not correct, it need to call the ALGO and the stage2 + LHOUS = MAX( 1, 4*NI ) + IBI + ENDIF + IF( LHOUS.GE.0 ) THEN + IPARAM2STAGE = LHOUS + ELSE + IPARAM2STAGE = -1 + ENDIF + RETURN +* + 20 CONTINUE +* +* ISPEC = 20: (21 for future use) +* LWORK length of the workspace for +* either or both stages for TRD and BRD. should be >= 1. +* TRD: +* TRD_stage 1: = LT + LW + LS1 + LS2 +* = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD +* where LDT=LDS2=KD +* = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD +* TRD_stage 2: = (2NB+1)*N + KD*NTHREADS +* TRD_both : = max(stage1,stage2) + AB ( AB=(KD+1)*N ) +* = N*KD + N*max(KD+1,FACTOPTNB) +* + max(2*KD*KD, KD*NTHREADS) +* + (KD+1)*N + LWORK = -1 + SUBNAM(1:1) = PREC + SUBNAM(2:6) = 'GEQRF' + QROPTNB = ILAENV( 1, SUBNAM, ' ', NI, NBI, -1, -1 ) + SUBNAM(2:6) = 'GELQF' + LQOPTNB = ILAENV( 1, SUBNAM, ' ', NBI, NI, -1, -1 ) +* Could be QR or LQ for TRD and the max for BRD + FACTOPTNB = MAX(QROPTNB, LQOPTNB) + IF( ALGO.EQ.'TRD' ) THEN + IF( STAG.EQ.'2STAG' ) THEN + LWORK = NI*NBI + NI*MAX(NBI+1,FACTOPTNB) + $ + MAX(2*NBI*NBI, NBI*NTHREADS) + $ + (NBI+1)*NI + ELSE IF( (STAG.EQ.'HE2HB').OR.(STAG.EQ.'SY2SB') ) THEN + LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI + ELSE IF( (STAG.EQ.'HB2ST').OR.(STAG.EQ.'SB2ST') ) THEN + LWORK = (2*NBI+1)*NI + NBI*NTHREADS + ENDIF + ELSE IF( ALGO.EQ.'BRD' ) THEN + IF( STAG.EQ.'2STAG' ) THEN + LWORK = 2*NI*NBI + NI*MAX(NBI+1,FACTOPTNB) + $ + MAX(2*NBI*NBI, NBI*NTHREADS) + $ + (NBI+1)*NI + ELSE IF( STAG.EQ.'GE2GB' ) THEN + LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI + ELSE IF( STAG.EQ.'GB2BD' ) THEN + LWORK = (3*NBI+1)*NI + NBI*NTHREADS + ENDIF + ENDIF + LWORK = MAX ( 1, LWORK ) + + IF( LWORK.GT.0 ) THEN + IPARAM2STAGE = LWORK + ELSE + IPARAM2STAGE = -1 + ENDIF + RETURN +* + 21 CONTINUE +* +* ISPEC = 21 for future use + IPARAM2STAGE = NXI + RETURN +* +* ==== End of IPARAM2STAGE ==== +* + END diff --git a/SRC/slarfy.f b/SRC/slarfy.f new file mode 100644 index 00000000..19a7fa6d --- /dev/null +++ b/SRC/slarfy.f @@ -0,0 +1,161 @@ +*> \brief \b SLARFY +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE SLARFY( UPLO, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INCV, LDC, N +* REAL TAU +* .. +* .. Array Arguments .. +* REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARFY applies an elementary reflector, or Householder matrix, H, +*> to an n x n symmetric matrix C, from both the left and the right. +*> +*> H is represented in the form +*> +*> H = I - tau * v * v' +*> +*> where tau is a scalar and v is a vector. +*> +*> If tau is zero, then H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix C is stored. +*> = 'U': Upper triangle +*> = 'L': Lower triangle +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension +*> (1 + (N-1)*abs(INCV)) +*> The vector v as described above. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between successive elements of v. INCV must +*> not be zero. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL +*> The value tau as described above. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC, N) +*> On entry, the matrix C. +*> On exit, C is overwritten by H * C * H'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max( 1, N ). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup single_eig +* +* ===================================================================== + SUBROUTINE SLARFY( UPLO, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCV, LDC, N + REAL TAU +* .. +* .. Array Arguments .. + REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO, HALF + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0, HALF = 0.5E+0 ) +* .. +* .. Local Scalars .. + REAL ALPHA +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SSYMV, SSYR2 +* .. +* .. External Functions .. + REAL SDOT + EXTERNAL SDOT +* .. +* .. Executable Statements .. +* + IF( TAU.EQ.ZERO ) + $ RETURN +* +* Form w:= C * v +* + CALL SSYMV( UPLO, N, ONE, C, LDC, V, INCV, ZERO, WORK, 1 ) +* + ALPHA = -HALF*TAU*SDOT( N, WORK, 1, V, INCV ) + CALL SAXPY( N, ALPHA, V, INCV, WORK, 1 ) +* +* C := C - v * w' - w * v' +* + CALL SSYR2( UPLO, N, -TAU, V, INCV, WORK, 1, C, LDC ) +* + RETURN +* +* End of SLARFY +* + END diff --git a/SRC/ssb2st_kernels.f b/SRC/ssb2st_kernels.f new file mode 100644 index 00000000..60058dda --- /dev/null +++ b/SRC/ssb2st_kernels.f @@ -0,0 +1,320 @@ +*> \brief \b SSB2ST_KERNELS +* +* @generated from zhb2st_kernels.f, fortran z -> s, Sun Nov 6 19:34:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSB2ST_KERNELS + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssb2st_kernels.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssb2st_kernels.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssb2st_kernels.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSB2ST_KERNELS( UPLO, WANTZ, TTYPE, +* ST, ED, SWEEP, N, NB, IB, +* A, LDA, V, TAU, LDVT, WORK) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* LOGICAL WANTZ +* INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), V( * ), +* TAU( * ), WORK( * ) +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSB2ST_KERNELS is an internal routine used by the SSYTRD_SB2ST +*> subroutine. +*> \endverbatim +* +* Arguments: +* ========== +* +*> @param[in] n +*> The order of the matrix A. +*> +*> @param[in] nb +*> The size of the band. +*> +*> @param[in, out] A +*> A pointer to the matrix A. +*> +*> @param[in] lda +*> The leading dimension of the matrix A. +*> +*> @param[out] V +*> REAL array, dimension 2*n if eigenvalues only are +*> requested or to be queried for vectors. +*> +*> @param[out] TAU +*> REAL array, dimension (2*n). +*> The scalar factors of the Householder reflectors are stored +*> in this array. +*> +*> @param[in] st +*> internal parameter for indices. +*> +*> @param[in] ed +*> internal parameter for indices. +*> +*> @param[in] sweep +*> internal parameter for indices. +*> +*> @param[in] Vblksiz +*> internal parameter for indices. +*> +*> @param[in] wantz +*> logical which indicate if Eigenvalue are requested or both +*> Eigenvalue/Eigenvectors. +*> +*> @param[in] work +*> Workspace of size nb. +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SSB2ST_KERNELS( UPLO, WANTZ, TTYPE, + $ ST, ED, SWEEP, N, NB, IB, + $ A, LDA, V, TAU, LDVT, WORK) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + LOGICAL WANTZ + INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT +* .. +* .. Array Arguments .. + REAL A( LDA, * ), V( * ), + $ TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, + $ ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS, + $ DPOS, OFDPOS, AJETER + REAL CTMP +* .. +* .. External Subroutines .. + EXTERNAL SLARFG, SLARFX, SLARFY +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. +* .. Executable Statements .. +* + AJETER = IB + LDVT + UPPER = LSAME( UPLO, 'U' ) + + IF( UPPER ) THEN + DPOS = 2 * NB + 1 + OFDPOS = 2 * NB + ELSE + DPOS = 1 + OFDPOS = 2 + ENDIF + +* +* Upper case +* + IF( UPPER ) THEN +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ENDIF + GO TO ( 101, 102, 103 ) TTYPE +* + 101 CONTINUE + LM = ED - ST + 1 +* + V( VPOS ) = ONE + DO 10 I = 1, LM-1 + V( VPOS+I ) = ( A( OFDPOS-I, ST+I ) ) + A( OFDPOS-I, ST+I ) = ZERO + 10 CONTINUE + CTMP = ( A( OFDPOS, ST ) ) + CALL SLARFG( LM, CTMP, V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) + A( OFDPOS, ST ) = CTMP +* + 103 CONTINUE + LM = ED - ST + 1 + CALL SLARFY( UPLO, LM, V( VPOS ), 1, ( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + GOTO 300 +* + 102 CONTINUE + J1 = ED+1 + J2 = MIN( ED+NB, N ) + LN = ED-ST+1 + LM = J2-J1+1 + IF( LM.GT.0) THEN + CALL SLARFX( 'Left', LN, LM, V( VPOS ), + $ ( TAU( TAUPOS ) ), A( DPOS-NB, J1 ), + $ LDA-1, WORK) +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ENDIF +* + V( VPOS ) = ONE + DO 30 I = 1, LM-1 + V( VPOS+I ) = ( A( DPOS-NB-I, J1+I ) ) + A( DPOS-NB-I, J1+I ) = ZERO + 30 CONTINUE + CTMP = ( A( DPOS-NB, J1 ) ) + CALL SLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) ) + A( DPOS-NB, J1 ) = CTMP +* + CALL SLARFX( 'Right', LN-1, LM, V( VPOS ), + $ TAU( TAUPOS ), + $ A( DPOS-NB+1, J1 ), LDA-1, WORK) + ENDIF + GOTO 300 +* +* Lower case +* + ELSE +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ENDIF + GO TO ( 201, 202, 203 ) TTYPE +* + 201 CONTINUE + LM = ED - ST + 1 +* + V( VPOS ) = ONE + DO 20 I = 1, LM-1 + V( VPOS+I ) = A( OFDPOS+I, ST-1 ) + A( OFDPOS+I, ST-1 ) = ZERO + 20 CONTINUE + CALL SLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) +* + 203 CONTINUE + LM = ED - ST + 1 +* + CALL SLARFY( UPLO, LM, V( VPOS ), 1, ( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + + GOTO 300 +* + 202 CONTINUE + J1 = ED+1 + J2 = MIN( ED+NB, N ) + LN = ED-ST+1 + LM = J2-J1+1 +* + IF( LM.GT.0) THEN + CALL SLARFX( 'Right', LM, LN, V( VPOS ), + $ TAU( TAUPOS ), A( DPOS+NB, ST ), + $ LDA-1, WORK) +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ENDIF +* + V( VPOS ) = ONE + DO 40 I = 1, LM-1 + V( VPOS+I ) = A( DPOS+NB+I, ST ) + A( DPOS+NB+I, ST ) = ZERO + 40 CONTINUE + CALL SLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) +* + CALL SLARFX( 'Left', LM, LN-1, V( VPOS ), + $ ( TAU( TAUPOS ) ), + $ A( DPOS+NB-1, ST+1 ), LDA-1, WORK) + + ENDIF + GOTO 300 + ENDIF + + 300 CONTINUE + RETURN +* +* END OF SSB2ST_KERNELS +* + END diff --git a/SRC/ssbev_2stage.f b/SRC/ssbev_2stage.f new file mode 100644 index 00000000..821c00a3 --- /dev/null +++ b/SRC/ssbev_2stage.f @@ -0,0 +1,377 @@ +*> \brief <b> SSBEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices</b> +* +* @generated from dsbev_2stage.f, fortran d -> s, Sat Nov 5 23:58:09 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSBEV_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssbev_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssbev_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssbev_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, +* WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, N, LWORK +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSBEV_2STAGE computes all the eigenvalues and, optionally, eigenvectors of +*> a real symmetric band matrix A using the 2stage technique for +*> the reduction to tridiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension LWORK +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS + N +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup realOTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, + $ WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, N, LWORK +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, WANTZ, LQUERY + INTEGER IINFO, IMAX, INDE, INDWRK, ISCALE, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANSB + EXTERNAL LSAME, SLAMCH, SLANSB, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLASCL, SSCAL, SSTEQR, SSTERF, XERBLA + $ SSYTRD_SB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + IB = ILAENV( 18, 'SSYTRD_SB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'SSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'SSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + WORK( 1 ) = LWMIN + ENDIF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -11 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBEV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( LOWER ) THEN + W( 1 ) = AB( 1, 1 ) + ELSE + W( 1 ) = AB( KD+1, 1 ) + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL SLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL SLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call SSYTRD_SB2ST to reduce symmetric band matrix to tridiagonal form. +* + INDE = 1 + INDHOUS = INDE + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL SSYTRD_SB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, + $ WORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL SSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of SSBEV_2STAGE +* + END diff --git a/SRC/ssbevd_2stage.f b/SRC/ssbevd_2stage.f new file mode 100644 index 00000000..8a306306 --- /dev/null +++ b/SRC/ssbevd_2stage.f @@ -0,0 +1,412 @@ +*> \brief <b> SSBEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices</b> +* +* @generated from dsbevd_2stage.f, fortran d -> s, Sat Nov 5 23:58:03 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSBEVD_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssbevd_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssbevd_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssbevd_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, +* WORK, LWORK, IWORK, LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSBEVD_2STAGE computes all the eigenvalues and, optionally, eigenvectors of +*> a real symmetric band matrix A using the 2stage technique for +*> the reduction to tridiagonal. If eigenvectors are desired, it uses +*> a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension LWORK +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS + N +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 2, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup realOTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, + $ WORK, LWORK, IWORK, LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, INDE, INDWK2, INDWRK, ISCALE, LIWMIN, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS, + $ LLWRK2 + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANSB + EXTERNAL LSAME, SLAMCH, SLANSB, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SLACPY, SLASCL, SSCAL, SSTEDC, + $ SSTERF, XERBLA, SSYTRD_SB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + IB = ILAENV( 18, 'SSYTRD_SB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'SSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'SSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 5*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = MAX( 2*N, N+LHTRD+LWTRD ) + END IF + END IF + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBEVD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AB( 1, 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL SLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL SLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call SSYTRD_SB2ST to reduce band symmetric matrix to tridiagonal form. +* + INDE = 1 + INDHOUS = INDE + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 +* + CALL SSYTRD_SB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, + $ WORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call SSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL SSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL SGEMM( 'N', 'N', N, N, N, ONE, Z, LDZ, WORK( INDWRK ), N, + $ ZERO, WORK( INDWK2 ), N ) + CALL SLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL SSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of SSBEVD_2STAGE +* + END diff --git a/SRC/ssbevx_2stage.f b/SRC/ssbevx_2stage.f new file mode 100644 index 00000000..d3a588c4 --- /dev/null +++ b/SRC/ssbevx_2stage.f @@ -0,0 +1,633 @@ +*> \brief <b> SSBEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices</b> +* +* @generated from dsbevx_2stage.f, fortran d -> s, Sat Nov 5 23:58:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSBEVX_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssbevx_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssbevx_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssbevx_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, +* LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, +* LDZ, WORK, LWORK, IWORK, IFAIL, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N, LWORK +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL AB( LDAB, * ), Q( LDQ, * ), W( * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSBEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric band matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can +*> be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ, N) +*> If JOBZ = 'V', the N-by-N orthogonal matrix used in the +*> reduction to tridiagonal form. +*> If JOBZ = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If JOBZ = 'V', then +*> LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AB to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 7*N, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS + 2*N +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, + $ LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, + $ LDZ, WORK, LWORK, IWORK, IFAIL, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N, LWORK + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL AB( LDAB, * ), Q( LDQ, * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, TEST, VALEIG, WANTZ, + $ LQUERY + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDWRK, ISCALE, ITMP1, J, JJ, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS, + $ NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANSB + EXTERNAL LSAME, SLAMCH, SLANSB, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMV, SLACPY, SLASCL, SSCAL, + $ SSTEBZ, SSTEIN, SSTEQR, SSTERF, SSWAP, XERBLA, + $ SSYTRD_SB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + IB = ILAENV( 18, 'SSYTRD_SB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'SSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'SSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + LWMIN = 2*N + LHTRD + LWTRD + WORK( 1 ) = LWMIN + ENDIF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -20 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBEVX_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + M = 1 + IF( LOWER ) THEN + TMP1 = AB( 1, 1 ) + ELSE + TMP1 = AB( KD+1, 1 ) + END IF + IF( VALEIG ) THEN + IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) ) + $ M = 0 + END IF + IF( M.EQ.1 ) THEN + W( 1 ) = TMP1 + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + END IF + ANRM = SLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL SLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL SLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call SSYTRD_SB2ST to reduce symmetric band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDHOUS = INDE + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL SSYTRD_SB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, WORK( INDD ), + $ WORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call SSTERF or SSTEQR. If this fails for some +* eigenvalue, then try SSTEBZ. +* + TEST = .FALSE. + IF (INDEIG) THEN + IF (IL.EQ.1 .AND. IU.EQ.N) THEN + TEST = .TRUE. + END IF + END IF + IF ((ALLEIG .OR. TEST) .AND. (ABSTOL.LE.ZERO)) THEN + CALL SCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL SLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEIN. +* + DO 20 J = 1, M + CALL SCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL SGEMV( 'N', N, N, ONE, Q, LDQ, WORK, 1, ZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of SSBEVX_2STAGE +* + END diff --git a/SRC/ssyev_2stage.f b/SRC/ssyev_2stage.f new file mode 100644 index 00000000..52f11c35 --- /dev/null +++ b/SRC/ssyev_2stage.f @@ -0,0 +1,348 @@ +*> \brief <b> SSYEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices</b> +* +* @generated from dsyev_2stage.f, fortran d -> s, Sat Nov 5 23:55:51 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYEV_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssyevd_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssyevd_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssyevd_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, +* INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYEV_2STAGE computes all eigenvalues and, optionally, eigenvectors of a +*> real symmetric matrix A using the 2stage technique for +*> the reduction to tridiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension LWORK +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 2*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 2*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup realSYeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + $ INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANSY + EXTERNAL LSAME, ILAENV, SLAMCH, SLANSY +* .. +* .. External Subroutines .. + EXTERNAL SLASCL, SORGTR, SSCAL, SSTEQR, SSTERF, + $ XERBLA, SSYTRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + KD = ILAENV( 17, 'SSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'SSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = 2*N + LHTRD + LWTRD + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYEV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + WORK( 1 ) = 2 + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL SLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call SSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL SSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK( INDE ), + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* SORGTR to generate the orthogonal matrix, then call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE +* Not available in this release, and agrument checking should not +* let it getting here + RETURN + CALL SORGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + CALL SSTEQR( JOBZ, N, W, WORK( INDE ), A, LDA, WORK( INDTAU ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of SSYEV_2STAGE +* + END diff --git a/SRC/ssyevd_2stage.f b/SRC/ssyevd_2stage.f new file mode 100644 index 00000000..8510b645 --- /dev/null +++ b/SRC/ssyevd_2stage.f @@ -0,0 +1,406 @@ +*> \brief <b> SSYEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices</b> +* +* @generated from dsyevd_2stage.f, fortran d -> s, Sat Nov 5 23:55:54 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYEVD_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssyevd_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssyevd_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssyevd_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, +* IWORK, LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a +*> real symmetric matrix A using the 2stage technique for +*> the reduction to tridiagonal. If eigenvectors are desired, it uses a +*> divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, +*> dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 2*N+1 +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 2*N+1 +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be at least +*> 1 + 6*N + 2*N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed +*> to converge; i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> if INFO = i and JOBZ = 'V', then the algorithm failed +*> to compute an eigenvalue while working on the submatrix +*> lying in rows and columns INFO/(N+1) through +*> mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup realSYeigen +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee \n +*> Modified description of INFO. Sven, 16 Feb 05. \n +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. +* + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, INDE, INDTAU, INDWK2, INDWRK, ISCALE, + $ LIWMIN, LLWORK, LLWRK2, LWMIN, + $ LHTRD, LWTRD, KD, IB, INDHOUS + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANSY + EXTERNAL LSAME, SLAMCH, SLANSY, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SLASCL, SORMTR, SSCAL, SSTEDC, SSTERF, + $ SSYTRD_2STAGE, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + KD = ILAENV( 17, 'SSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'SSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + 1 + LHTRD + LWTRD + END IF + END IF + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYEVD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL SLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call SSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 +* + CALL SSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK( INDE ), + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* SSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call SORMTR to multiply it by the +* Householder transformations stored in A. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE +* Not available in this release, and agrument checking should not +* let it getting here + RETURN + CALL SSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL SORMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ), + $ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO ) + CALL SLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL SSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of SSYEVD_2STAGE +* + END diff --git a/SRC/ssyevr_2stage.f b/SRC/ssyevr_2stage.f new file mode 100644 index 00000000..27b99303 --- /dev/null +++ b/SRC/ssyevr_2stage.f @@ -0,0 +1,745 @@ +*> \brief <b> SSYEVR_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices</b> +* +* @generated from dsyevr_2stage.f, fortran d -> s, Sat Nov 5 23:50:10 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYEVR_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssyevr_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssyevr_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssyevr_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, +* LWORK, IWORK, LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LWORK, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* REAL A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYEVR_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can be +*> selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> +*> SSYEVR_2STAGE first reduces the matrix A to tridiagonal form T with a call +*> to SSYTRD. Then, whenever possible, SSYEVR_2STAGE calls SSTEMR to compute +*> the eigenspectrum using Relatively Robust Representations. SSTEMR +*> computes eigenvalues by the dqds algorithm, while orthogonal +*> eigenvectors are computed from various "good" L D L^T representations +*> (also known as Relatively Robust Representations). Gram-Schmidt +*> orthogonalization is avoided as far as possible. More specifically, +*> the various steps of the algorithm are as follows. +*> +*> For each unreduced block (submatrix) of T, +*> (a) Compute T - sigma I = L D L^T, so that L and D +*> define all the wanted eigenvalues to high relative accuracy. +*> This means that small relative changes in the entries of D and L +*> cause only small relative changes in the eigenvalues and +*> eigenvectors. The standard (unfactored) representation of the +*> tridiagonal matrix T does not have this property in general. +*> (b) Compute the eigenvalues to suitable accuracy. +*> If the eigenvectors are desired, the algorithm attains full +*> accuracy of the computed eigenvalues only right before +*> the corresponding vectors have to be computed, see steps c) and d). +*> (c) For each cluster of close eigenvalues, select a new +*> shift close to the cluster, find a new factorization, and refine +*> the shifted eigenvalues to suitable accuracy. +*> (d) For each eigenvalue with a large enough relative separation compute +*> the corresponding eigenvector by forming a rank revealing twisted +*> factorization. Go back to (c) for any clusters that remain. +*> +*> The desired accuracy of the output can be specified by the input +*> parameter ABSTOL. +*> +*> For more details, see SSTEMR's documentation and: +*> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations +*> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," +*> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. +*> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and +*> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, +*> 2004. Also LAPACK Working Note 154. +*> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric +*> tridiagonal eigenvalue/eigenvector problem", +*> Computer Science Division Technical Report No. UCB/CSD-97-971, +*> UC Berkeley, May 1997. +*> +*> +*> Note 1 : SSYEVR_2STAGE calls SSTEMR when the full spectrum is requested +*> on machines which conform to the ieee-754 floating point standard. +*> SSYEVR_2STAGE calls SSTEBZ and SSTEIN on non-ieee machines and +*> when partial spectrum requests are made. +*> +*> Normal execution of SSTEMR may create NaNs and infinities and +*> hence may abort due to a floating point exception in environments +*> which do not handle NaNs and infinities in the ieee standard default +*> manner. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> For RANGE = 'V' or 'I' and IU - IL < N - 1, SSTEBZ and +*> SSTEIN are called +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> +*> If high relative accuracy is important, set ABSTOL to +*> SLAMCH( 'Safe minimum' ). Doing so will guarantee that +*> eigenvalues are computed to high relative accuracy when +*> possible in future releases. The current code does not +*> make any guarantees about high relative accuracy, but +*> future releases will. See J. Barlow and J. Demmel, +*> "Computing Accurate Eigensystems of Scaled Diagonally +*> Dominant Matrices", LAPACK Working Note #7, for a discussion +*> of which matrices define their eigenvalues to high relative +*> accuracy. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> Supplying N columns is always safe. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is an output of SSTEMR (tridiagonal +*> matrix). The support of the eigenvectors of A is typically +*> 1:N because of the orthogonal transformations applied by SORMTR. +*> Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 26*N, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 5*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 5*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N). +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: Internal error +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realSYeigen +* +*> \par Contributors: +* ================== +*> +*> Inderjit Dhillon, IBM Almaden, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Ken Stanley, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Jason Riedy, Computer Science Division, University of +*> California at Berkeley, USA \n +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, + $ LWORK, IWORK, LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + REAL A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ, + $ TRYRAC, TEST + CHARACTER ORDER + INTEGER I, IEEEOK, IINFO, IMAX, INDD, INDDD, INDE, + $ INDEE, INDIBL, INDIFL, INDISP, INDIWO, INDTAU, + $ INDWK, INDWKN, ISCALE, J, JJ, LIWMIN, + $ LLWORK, LLWRKN, LWMIN, NSPLIT, + $ LHTRD, LWTRD, KD, IB, INDHOUS + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANSY + EXTERNAL LSAME, ILAENV, SLAMCH, SLANSY +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SORMTR, SSCAL, SSTEBZ, SSTEMR, SSTEIN, + $ SSTERF, SSWAP, SSYTRD_2STAGE, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'SSYEVR', 'N', 1, 2, 3, 4 ) +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) ) +* + KD = ILAENV( 17, 'SSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'SSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = MAX( 26*N, 5*N + LHTRD + LWTRD ) + LIWMIN = MAX( 1, 10*N ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN +* NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) +* NB = MAX( NB, ILAENV( 1, 'SORMTR', UPLO, N, -1, -1, -1 ) ) +* LWKOPT = MAX( ( NB+1 )*N, LWMIN ) + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYEVR_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 26 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE + IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) THEN + Z( 1, 1 ) = ONE + ISUPPZ( 1 ) = 1 + ISUPPZ( 2 ) = 1 + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF (VALEIG) THEN + VLL = VL + VUU = VU + END IF + ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL SSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL SSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF + +* Initialize indices into workspaces. Note: The IWORK indices are +* used only if SSTERF or SSTEMR fail. + +* WORK(INDTAU:INDTAU+N-1) stores the scalar factors of the +* elementary reflectors used in SSYTRD. + INDTAU = 1 +* WORK(INDD:INDD+N-1) stores the tridiagonal's diagonal entries. + INDD = INDTAU + N +* WORK(INDE:INDE+N-1) stores the off-diagonal entries of the +* tridiagonal matrix from SSYTRD. + INDE = INDD + N +* WORK(INDDD:INDDD+N-1) is a copy of the diagonal entries over +* -written by SSTEMR (the SSTERF path copies the diagonal to W). + INDDD = INDE + N +* WORK(INDEE:INDEE+N-1) is a copy of the off-diagonal entries over +* -written while computing the eigenvalues in SSTERF and SSTEMR. + INDEE = INDDD + N +* INDHOUS is the starting offset Householder storage of stage 2 + INDHOUS = INDEE + N +* INDWK is the starting offset of the left-over workspace, and +* LLWORK is the remaining workspace size. + INDWK = INDHOUS + LHTRD + LLWORK = LWORK - INDWK + 1 + + +* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in SSTEBZ and +* stores the block indices of each of the M<=N eigenvalues. + INDIBL = 1 +* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in SSTEBZ and +* stores the starting and finishing indices of each block. + INDISP = INDIBL + N +* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors +* that corresponding to eigenvectors that fail to converge in +* SSTEIN. This information is discarded; if any fail, the driver +* returns INFO > 0. + INDIFL = INDISP + N +* INDIWO is the offset of the remaining integer workspace. + INDIWO = INDIFL + N + +* +* Call SSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form. +* +* + CALL SSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, WORK( INDD ), + $ WORK( INDE ), WORK( INDTAU ), WORK( INDHOUS ), + $ LHTRD, WORK( INDWK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired +* then call SSTERF or SSTEMR and SORMTR. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG.OR.TEST ) .AND. ( IEEEOK.EQ.1 ) ) THEN + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N, WORK( INDD ), 1, W, 1 ) + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SCOPY( N, WORK( INDD ), 1, WORK( INDDD ), 1 ) +* + IF (ABSTOL .LE. TWO*N*EPS) THEN + TRYRAC = .TRUE. + ELSE + TRYRAC = .FALSE. + END IF + CALL SSTEMR( JOBZ, 'A', N, WORK( INDDD ), WORK( INDEE ), + $ VL, VU, IL, IU, M, W, Z, LDZ, N, ISUPPZ, + $ TRYRAC, WORK( INDWK ), LWORK, IWORK, LIWORK, + $ INFO ) +* +* +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEMR. +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL SORMTR( 'L', UPLO, 'N', N, M, A, LDA, + $ WORK( INDTAU ), Z, LDZ, WORK( INDWKN ), + $ LLWRKN, IINFO ) + END IF + END IF +* +* + IF( INFO.EQ.0 ) THEN +* Everything worked. Skip SSTEBZ/SSTEIN. IWORK(:) are +* undefined. + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. +* Also call SSTEBZ and SSTEIN if SSTEMR fails. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWK ), IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEIN. +* + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL SORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* +* Jump here if SSTEMR/SSTEIN succeeded. + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. Note: We do not sort the IFAIL portion of IWORK. +* It may not be initialized (if SSTEMR/SSTEIN succeeded), and we do +* not return this detailed information to the user. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + W( I ) = W( J ) + W( J ) = TMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of SSYEVR_2STAGE +* + END diff --git a/SRC/ssyevx_2stage.f b/SRC/ssyevx_2stage.f new file mode 100644 index 00000000..96a73ecd --- /dev/null +++ b/SRC/ssyevx_2stage.f @@ -0,0 +1,608 @@ +*> \brief <b> SSYEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices</b> +* +* @generated from dsyevx_2stage.f, fortran d -> s, Sat Nov 5 23:55:46 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYEVX_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssyevx_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssyevx_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssyevx_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYEVX_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, WORK, +* LWORK, IWORK, IFAIL, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can be +*> selected by specifying either a range of values or a range of indices +*> for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> On normal exit, the first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 8*N, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 3*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 3*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realSYeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYEVX_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, WORK, + $ LWORK, IWORK, IFAIL, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDTAU, INDWKN, INDWRK, ISCALE, + $ ITMP1, J, JJ, LLWORK, LLWRKN, + $ NSPLIT, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANSY + EXTERNAL LSAME, ILAENV, SLAMCH, SLANSY +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SORGTR, SORMTR, SSCAL, SSTEBZ, + $ SSTEIN, SSTEQR, SSTERF, SSWAP, XERBLA, + $ SSYTRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + KD = ILAENV( 17, 'SSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'SSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = MAX( 8*N, 3*N + LHTRD + LWTRD ) + WORK( 1 ) = LWMIN + END IF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -17 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYEVX_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE + IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + END IF + ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL SSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL SSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call SSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form. +* + INDTAU = 1 + INDE = INDTAU + N + INDD = INDE + N + INDHOUS = INDD + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL SSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, WORK( INDD ), + $ WORK( INDE ), WORK( INDTAU ), WORK( INDHOUS ), + $ LHTRD, WORK( INDWRK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal to +* zero, then call SSTERF or SORGTR and SSTEQR. If this fails for +* some eigenvalue, then try SSTEBZ. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. ( ABSTOL.LE.ZERO ) ) THEN + CALL SCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL SLACPY( 'A', N, N, A, LDA, Z, LDZ ) + CALL SORGTR( UPLO, N, Z, LDZ, WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + IFAIL( I ) = 0 + 30 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 40 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEIN. +* + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL SORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 40 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 50 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 60 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of SSYEVX_2STAGE +* + END diff --git a/SRC/ssygv_2stage.f b/SRC/ssygv_2stage.f new file mode 100644 index 00000000..6eb172e9 --- /dev/null +++ b/SRC/ssygv_2stage.f @@ -0,0 +1,371 @@ +*> \brief \b SSYGV_2STAGE +* +* @generated from dsygv_2stage.f, fortran d -> s, Sun Nov 6 12:54:29 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYGV_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssygv_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssygv_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssygv_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, +* WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYGV_2STAGE computes all the eigenvalues, and optionally, the eigenvectors +*> of a real generalized symmetric-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +*> Here A and B are assumed to be symmetric and B is also +*> positive definite. +*> This routine use the 2stage technique for the reduction to tridiagonal +*> which showed higher performance on recent architecture and for large +* sizes N>2000. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> matrix Z of eigenvectors. The eigenvectors are normalized +*> as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +*> or the lower triangle (if UPLO='L') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB, N) +*> On entry, the symmetric positive definite matrix B. +*> If UPLO = 'U', the leading N-by-N upper triangular part of B +*> contains the upper triangular part of the matrix B. +*> If UPLO = 'L', the leading N-by-N lower triangular part of B +*> contains the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**T*U or B = L*L**T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 2*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 2*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: SPOTRF or SSYEV returned an error code: +*> <= N: if INFO = i, SSYEV failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup realSYeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, + $ WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER NEIG, + $ LLWORK, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SPOTRF, SSYGST, STRMM, STRSM, XERBLA, + $ SSYEV_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + KD = ILAENV( 17, 'SSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'SSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = 2*N + LHTRD + LWTRD + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYGV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL SPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL SSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL SSYEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**T*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + CALL STRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**T*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + CALL STRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LWMIN + RETURN +* +* End of SSYGV_2STAGE +* + END diff --git a/SRC/ssytrd_2stage.f b/SRC/ssytrd_2stage.f new file mode 100644 index 00000000..fba3dd45 --- /dev/null +++ b/SRC/ssytrd_2stage.f @@ -0,0 +1,337 @@ +*> \brief \b SSYTRD_2STAGE +* +* @generated from zhetrd_2stage.f, fortran z -> s, Sun Nov 6 19:34:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRD_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssytrd_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssytrd_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssytrd_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, +* HOUS2, LHOUS2, WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER VECT, UPLO +* INTEGER N, LDA, LWORK, LHOUS2, INFO +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* REAL A( LDA, * ), TAU( * ), +* HOUS2( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRD_2STAGE reduces a real symmetric matrix A to real symmetric +*> tridiagonal form T by a orthogonal similarity transformation: +*> Q1**T Q2**T* A * Q2 * Q1 = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': No need for the Housholder representation, +*> in particular for the second stage (Band to +*> tridiagonal) and thus LHOUS2 is of size max(1, 4*N); +*> = 'V': the Householder representation is needed to +*> either generate Q1 Q2 or to apply Q1 Q2, +*> then LHOUS2 is to be queried and computed. +*> (NOT AVAILABLE IN THIS RELEASE). +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the band superdiagonal +*> of A are overwritten by the corresponding elements of the +*> internal band-diagonal matrix AB, and the elements above +*> the KD superdiagonal, with the array TAU, represent the orthogonal +*> matrix Q1 as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and band subdiagonal of A are over- +*> written by the corresponding elements of the internal band-diagonal +*> matrix AB, and the elements below the KD subdiagonal, with +*> the array TAU, represent the orthogonal matrix Q1 as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (N-KD) +*> The scalar factors of the elementary reflectors of +*> the first stage (see Further Details). +*> \endverbatim +*> +*> \param[out] HOUS2 +*> \verbatim +*> HOUS2 is REAL array, dimension LHOUS2, that +*> store the Householder representation of the stage2 +*> band to tridiagonal. +*> \endverbatim +*> +*> \param[in] LHOUS2 +*> \verbatim +*> LHOUS2 is INTEGER +*> The dimension of the array HOUS2. LHOUS2 = MAX(1, dimension) +*> If LWORK = -1, or LHOUS2=-1, +*> then a query is assumed; the routine +*> only calculates the optimal size of the HOUS2 array, returns +*> this value as the first entry of the HOUS2 array, and no error +*> message related to LHOUS2 is issued by XERBLA. +*> LHOUS2 = MAX(1, dimension) where +*> dimension = 4*N if VECT='N' +*> not available now if VECT='H' +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK = MAX(1, dimension) +*> If LWORK = -1, or LHOUS2=-1, +*> then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup realSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SSYTRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, + $ HOUS2, LHOUS2, WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER VECT, UPLO + INTEGER N, LDA, LWORK, LHOUS2, INFO +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + REAL A( LDA, * ), TAU( * ), + $ HOUS2( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTQ + INTEGER KD, IB, LWMIN, LHMIN, LWRK, LDAB, WPOS, ABPOS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SSYTRD_SY2SB, SSYTRD_SB2ST +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + WANTQ = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS2.EQ.-1 ) +* +* Determine the block size, the workspace size and the hous size. +* + KD = ILAENV( 17, 'SSYTRD_2STAGE', VECT, N, -1, -1, -1 ) + IB = ILAENV( 18, 'SSYTRD_2STAGE', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV( 19, 'SSYTRD_2STAGE', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV( 20, 'SSYTRD_2STAGE', VECT, N, KD, IB, -1 ) +* WRITE(*,*),'SSYTRD_2STAGE N KD UPLO LHMIN LWMIN ',N, KD, UPLO, +* $ LHMIN, LWMIN +* + IF( .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LHOUS2.LT.LHMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + HOUS2( 1 ) = LHMIN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine pointer position +* + LDAB = KD+1 + LWRK = LWORK-LDAB*N + ABPOS = 1 + WPOS = ABPOS + LDAB*N + CALL SSYTRD_SY2SB( UPLO, N, KD, A, LDA, WORK( ABPOS ), LDAB, + $ TAU, WORK( WPOS ), LWRK, INFO ) + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRD_SY2SB', -INFO ) + RETURN + END IF + CALL SSYTRD_SB2ST( 'Y', VECT, UPLO, N, KD, + $ WORK( ABPOS ), LDAB, D, E, + $ HOUS2, LHOUS2, WORK( WPOS ), LWRK, INFO ) + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRD_SB2ST', -INFO ) + RETURN + END IF +* +* + HOUS2( 1 ) = LHMIN + WORK( 1 ) = LWMIN + RETURN +* +* End of SSYTRD_2STAGE +* + END diff --git a/SRC/ssytrd_sb2st.F b/SRC/ssytrd_sb2st.F new file mode 100644 index 00000000..edbcf125 --- /dev/null +++ b/SRC/ssytrd_sb2st.F @@ -0,0 +1,603 @@ +*> \brief \b SSBTRD +* +* @generated from zhetrd_hb2st.F, fortran z -> s, Sun Nov 6 19:34:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSBTRD + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssbtrd.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssbtrd.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssbtrd.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRD_SB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, +* D, E, HOUS, LHOUS, WORK, LWORK, INFO ) +* +* #define PRECISION_REAL +* +* #if defined(_OPENMP) +* use omp_lib +* #endif +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER STAGE1, UPLO, VECT +* INTEGER N, KD, IB, LDAB, LHOUS, LWORK, INFO +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* REAL AB( LDAB, * ), HOUS( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSBTRD reduces a real symmetric band matrix A to real symmetric +*> tridiagonal form T by a orthogonal similarity transformation: +*> Q**T * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] STAGE +*> \verbatim +*> STAGE is CHARACTER*1 +*> = 'N': "No": to mention that the stage 1 of the reduction +*> from dense to band using the ssytrd_sy2sb routine +*> was not called before this routine to reproduce AB. +*> In other term this routine is called as standalone. +*> = 'Y': "Yes": to mention that the stage 1 of the +*> reduction from dense to band using the ssytrd_sy2sb +*> routine has been called to produce AB (e.g., AB is +*> the output of ssytrd_sy2sb. +*> \endverbatim +*> +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': No need for the Housholder representation, +*> and thus LHOUS is of size max(1, 4*N); +*> = 'V': the Householder representation is needed to +*> either generate or to apply Q later on, +*> then LHOUS is to be queried and computed. +*> (NOT AVAILABLE IN THIS RELEASE). +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> On exit, the diagonal elements of AB are overwritten by the +*> diagonal elements of the tridiagonal matrix T; if KD > 0, the +*> elements on the first superdiagonal (if UPLO = 'U') or the +*> first subdiagonal (if UPLO = 'L') are overwritten by the +*> off-diagonal elements of T; the rest of AB is overwritten by +*> values generated during the reduction. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] HOUS +*> \verbatim +*> HOUS is REAL array, dimension LHOUS, that +*> store the Householder representation. +*> \endverbatim +*> +*> \param[in] LHOUS +*> \verbatim +*> LHOUS is INTEGER +*> The dimension of the array HOUS. LHOUS = MAX(1, dimension) +*> If LWORK = -1, or LHOUS=-1, +*> then a query is assumed; the routine +*> only calculates the optimal size of the HOUS array, returns +*> this value as the first entry of the HOUS array, and no error +*> message related to LHOUS is issued by XERBLA. +*> LHOUS = MAX(1, dimension) where +*> dimension = 4*N if VECT='N' +*> not available now if VECT='H' +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK = MAX(1, dimension) +*> If LWORK = -1, or LHOUS=-1, +*> then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup real16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SSYTRD_SB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, + $ D, E, HOUS, LHOUS, WORK, LWORK, INFO ) +* +#define PRECISION_REAL +* +#if defined(_OPENMP) + use omp_lib +#endif +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER STAGE1, UPLO, VECT + INTEGER N, KD, LDAB, LHOUS, LWORK, INFO +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + REAL AB( LDAB, * ), HOUS( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL RZERO + REAL ZERO, ONE + PARAMETER ( RZERO = 0.0E+0, + $ ZERO = 0.0E+0, + $ ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ, UPPER, AFTERS1 + INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST, + $ ED, STIND, EDIND, BLKLASTIND, COLPT, THED, + $ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID, + $ NBTILES, TTYPE, TID, NTHREADS, DEBUG, + $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS, + $ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU, + $ SISEV, SIZETAU, LDV, LHMIN, LWMIN +#if defined (PRECISION_COMPLEX) + REAL ABSTMP + REAL TMP +#endif +* .. +* .. External Subroutines .. + EXTERNAL SSB2ST_KERNELS, SLACPY, SLASET +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX, CEILING, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Determine the minimal workspace size required. +* Test the input parameters +* + DEBUG = 0 + INFO = 0 + AFTERS1 = LSAME( STAGE1, 'Y' ) + WANTQ = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS.EQ.-1 ) +* +* Determine the block size, the workspace size and the hous size. +* + IB = ILAENV( 18, 'SSYTRD_SB2ST', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV( 19, 'SSYTRD_SB2ST', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV( 20, 'SSYTRD_SB2ST', VECT, N, KD, IB, -1 ) +* + IF( .NOT.AFTERS1 .AND. .NOT.LSAME( STAGE1, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.(KD+1) ) THEN + INFO = -7 + ELSE IF( LHOUS.LT.LHMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + HOUS( 1 ) = LHMIN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRD_SB2ST', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine pointer position +* + LDV = KD + IB + SIZETAU = 2 * N + SISEV = 2 * N + INDTAU = 1 + INDV = INDTAU + SIZETAU + LDA = 2 * KD + 1 + SIZEA = LDA * N + INDA = 1 + INDW = INDA + SIZEA + NTHREADS = 1 + TID = 0 +* + IF( UPPER ) THEN + APOS = INDA + KD + AWPOS = INDA + DPOS = APOS + KD + OFDPOS = DPOS - 1 + ABDPOS = KD + 1 + ABOFDPOS = KD + ELSE + APOS = INDA + AWPOS = INDA + KD + 1 + DPOS = APOS + OFDPOS = DPOS + 1 + ABDPOS = 1 + ABOFDPOS = 2 + + ENDIF +* +* Case KD=0: +* The matrix is diagonal. We just copy it (convert to "real" for +* real because D is double and the imaginary part should be 0) +* and store it in D. A sequential code here is better or +* in a parallel environment it might need two cores for D and E +* + IF( KD.EQ.0 ) THEN + DO 30 I = 1, N + D( I ) = ( AB( ABDPOS, I ) ) + 30 CONTINUE + DO 40 I = 1, N-1 + E( I ) = RZERO + 40 CONTINUE + GOTO 200 + END IF +* +* Case KD=1: +* The matrix is already Tridiagonal. We have to make diagonal +* and offdiagonal elements real, and store them in D and E. +* For that, for real precision just copy the diag and offdiag +* to D and E while for the COMPLEX case the bulge chasing is +* performed to convert the hermetian tridiagonal to symmetric +* tridiagonal. A simpler coversion formula might be used, but then +* updating the Q matrix will be required and based if Q is generated +* or not this might complicate the story. +* +C IF( KD.EQ.1 .AND. N.GT.(KD+1) .AND. AFTERS1 ) THEN + IF( KD.EQ.1 ) THEN + DO 50 I = 1, N + D( I ) = ( AB( ABDPOS, I ) ) + 50 CONTINUE +#if defined (PRECISION_COMPLEX) +* +* make off-diagonal elements real and copy them to E +* + IF( UPPER ) THEN + DO 60 I = 1, N - 1 + TMP = AB( ABOFDPOS, I+1 ) + ABSTMP = ABS( TMP ) + AB( ABOFDPOS, I+1 ) = ABSTMP + E( I ) = ABSTMP + IF( ABSTMP.NE.RZERO ) THEN + TMP = TMP / ABSTMP + ELSE + TMP = ONE + END IF + IF( I.LT.N-1 ) + $ AB( ABOFDPOS, I+2 ) = AB( ABOFDPOS, I+2 )*TMP +C IF( WANTZ ) THEN +C CALL SSCAL( N, ( TMP ), Q( 1, I+1 ), 1 ) +C END IF + 60 CONTINUE + ELSE + DO 70 I = 1, N - 1 + TMP = AB( ABOFDPOS, I ) + ABSTMP = ABS( TMP ) + AB( ABOFDPOS, I ) = ABSTMP + E( I ) = ABSTMP + IF( ABSTMP.NE.RZERO ) THEN + TMP = TMP / ABSTMP + ELSE + TMP = ONE + END IF + IF( I.LT.N-1 ) + $ AB( ABOFDPOS, I+1 ) = AB( ABOFDPOS, I+1 )*TMP +C IF( WANTQ ) THEN +C CALL SSCAL( N, TMP, Q( 1, I+1 ), 1 ) +C END IF + 70 CONTINUE + ENDIF +#else + IF( UPPER ) THEN + DO 60 I = 1, N-1 + E( I ) = ( AB( ABOFDPOS, I+1 ) ) + 60 CONTINUE + ELSE + DO 70 I = 1, N-1 + E( I ) = ( AB( ABOFDPOS, I ) ) + 70 CONTINUE + ENDIF +#endif + GOTO 200 + END IF +* +* Main code start here. +* Reduce the symmetric band of A to a tridiagonal matrix. +* + THGRSIZ = N + GRSIZ = 1 + SHIFT = 3 + NBTILES = CEILING( REAL(N)/REAL(KD) ) + STEPERCOL = CEILING( REAL(SHIFT)/REAL(GRSIZ) ) + THGRNB = CEILING( REAL(N-1)/REAL(THGRSIZ) ) +* + CALL SLACPY( "A", KD+1, N, AB, LDAB, WORK( APOS ), LDA ) + CALL SLASET( "A", KD, N, ZERO, ZERO, WORK( AWPOS ), LDA ) +* +* +* openMP parallelisation start here +* +#if defined(_OPENMP) +!$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND ) +!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID ) +!$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND ) +!$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK) +!$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA ) +!$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT ) +!$OMP MASTER +#endif +* +* main bulge chasing loop +* + DO 100 THGRID = 1, THGRNB + STT = (THGRID-1)*THGRSIZ+1 + THED = MIN( (STT + THGRSIZ -1), (N-1)) + DO 110 I = STT, N-1 + ED = MIN( I, THED ) + IF( STT.GT.ED ) GOTO 100 + DO 120 M = 1, STEPERCOL + ST = STT + DO 130 SWEEPID = ST, ED + DO 140 K = 1, GRSIZ + MYID = (I-SWEEPID)*(STEPERCOL*GRSIZ) + $ + (M-1)*GRSIZ + K + IF ( MYID.EQ.1 ) THEN + TTYPE = 1 + ELSE + TTYPE = MOD( MYID, 2 ) + 2 + ENDIF + + IF( TTYPE.EQ.2 ) THEN + COLPT = (MYID/2)*KD + SWEEPID + STIND = COLPT-KD+1 + EDIND = MIN(COLPT,N) + BLKLASTIND = COLPT + ELSE + COLPT = ((MYID+1)/2)*KD + SWEEPID + STIND = COLPT-KD+1 + EDIND = MIN(COLPT,N) + IF( ( STIND.GE.EDIND-1 ).AND. + $ ( EDIND.EQ.N ) ) THEN + BLKLASTIND = N + ELSE + BLKLASTIND = 0 + ENDIF + ENDIF +* +* Call the kernel +* +#if defined(_OPENMP) + IF( TTYPE.NE.1 ) THEN +!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) +!$OMP$ DEPEND(in:WORK(MYID-1)) +!$OMP$ DEPEND(out:WORK(MYID)) + TID = OMP_GET_THREAD_NUM() + CALL SSB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +!$OMP END TASK + ELSE +!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) +!$OMP$ DEPEND(out:WORK(MYID)) + TID = OMP_GET_THREAD_NUM() + CALL SSB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +!$OMP END TASK + ENDIF +#else + CALL SSB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +#endif + IF ( BLKLASTIND.GE.(N-1) ) THEN + STT = STT + 1 + GOTO 130 + ENDIF + 140 CONTINUE + 130 CONTINUE + 120 CONTINUE + 110 CONTINUE + 100 CONTINUE +* +#if defined(_OPENMP) +!$OMP END MASTER +!$OMP END PARALLEL +#endif +* +* Copy the diagonal from A to D. Note that D is REAL thus only +* the Real part is needed, the imaginary part should be zero. +* + DO 150 I = 1, N + D( I ) = ( WORK( DPOS+(I-1)*LDA ) ) + 150 CONTINUE +* +* Copy the off diagonal from A to E. Note that E is REAL thus only +* the Real part is needed, the imaginary part should be zero. +* + IF( UPPER ) THEN + DO 160 I = 1, N-1 + E( I ) = ( WORK( OFDPOS+I*LDA ) ) + 160 CONTINUE + ELSE + DO 170 I = 1, N-1 + E( I ) = ( WORK( OFDPOS+(I-1)*LDA ) ) + 170 CONTINUE + ENDIF +* + 200 CONTINUE +* + HOUS( 1 ) = LHMIN + WORK( 1 ) = LWMIN + RETURN +* +* End of SSYTRD_SB2ST +* + END +#undef PRECISION_REAL + diff --git a/SRC/ssytrd_sy2sb.f b/SRC/ssytrd_sy2sb.f new file mode 100644 index 00000000..3dbbaf1f --- /dev/null +++ b/SRC/ssytrd_sy2sb.f @@ -0,0 +1,517 @@ +*> \brief \b SSYTRD_SY2SB +* +* @generated from zhetrd_he2hb.f, fortran z -> s, Sun Nov 6 19:34:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRD_SY2SB + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssytrd.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssytrd.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssytrd.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRD_SY2SB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, +* WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAB, LWORK, N, KD +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), AB( LDAB, * ), +* TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRD_SY2SB reduces a real symmetric matrix A to real symmetric +*> band-diagonal form AB by a orthogonal similarity transformation: +*> Q**T * A * Q = AB. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the reduced matrix if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> The reduced matrix is stored in the array AB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the orthogonal +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the orthogonal matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On exit, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (N-KD) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension LWORK. +*> On exit, if INFO = 0, or if LWORK=-1, +*> WORK(1) returns the size of LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK which should be calculated +* by a workspace query. LWORK = MAX(1, LWORK_QUERY) +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD +*> where FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice otherwise +*> putting LWORK=-1 will provide the size of WORK. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup realSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(k)**T . . . H(2)**T H(1)**T, where k = n-kd. +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in +*> A(i,i+kd+1:n), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = n-kd. +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in +* A(i+kd+2:n,i), and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( ab ab/v1 v1 v1 v1 ) ( ab ) +*> ( ab ab/v2 v2 v2 ) ( ab/v1 ab ) +*> ( ab ab/v3 v3 ) ( v1 ab/v2 ab ) +*> ( ab ab/v4 ) ( v1 v2 ab/v3 ab ) +*> ( ab ) ( v1 v2 v3 ab/v4 ab ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SSYTRD_SY2SB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, + $ WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAB, LWORK, N, KD +* .. +* .. Array Arguments .. + REAL A( LDA, * ), AB( LDAB, * ), + $ TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL RONE + REAL ZERO, ONE, HALF + PARAMETER ( RONE = 1.0E+0, + $ ZERO = 0.0E+0, + $ ONE = 1.0E+0, + $ HALF = 0.5E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, J, IINFO, LWMIN, PN, PK, LK, + $ LDT, LDW, LDS2, LDS1, + $ LS2, LS1, LW, LT, + $ TPOS, WPOS, S2POS, S1POS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SSYR2K, SSYMM, SGEMM, + $ SLARFT, SGELQF, SGEQRF, SLASET +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Determine the minimal workspace size required +* and test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + LWMIN = ILAENV( 20, 'SSYTRD_SY2SB', '', N, KD, -1, -1 ) + + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRD_SY2SB', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWMIN + RETURN + END IF +* +* Quick return if possible +* Copy the upper/lower portion of A into AB +* + IF( N.LE.KD+1 ) THEN + IF( UPPER ) THEN + DO 100 I = 1, N + LK = MIN( KD+1, I ) + CALL SCOPY( LK, A( I-LK+1, I ), 1, + $ AB( KD+1-LK+1, I ), 1 ) + 100 CONTINUE + ELSE + DO 110 I = 1, N + LK = MIN( KD+1, N-I+1 ) + CALL SCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 ) + 110 CONTINUE + ENDIF + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the pointer position for the workspace +* + LDT = KD + LDS1 = KD + LT = LDT*KD + LW = N*KD + LS1 = LDS1*KD + LS2 = LWMIN - LT - LW - LS1 +* LS2 = N*MAX(KD,FACTOPTNB) + TPOS = 1 + WPOS = TPOS + LT + S1POS = WPOS + LW + S2POS = S1POS + LS1 + IF( UPPER ) THEN + LDW = KD + LDS2 = KD + ELSE + LDW = N + LDS2 = N + ENDIF +* +* +* Set the workspace of the triangular matrix T to zero once such a +* way everytime T is generated the upper/lower portion will be always zero +* + CALL SLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT ) +* + IF( UPPER ) THEN + DO 10 I = 1, N - KD, KD + PN = N-I-KD+1 + PK = MIN( N-I-KD+1, KD ) +* +* Compute the LQ factorization of the current block +* + CALL SGELQF( KD, PN, A( I, I+KD ), LDA, + $ TAU( I ), WORK( S2POS ), LS2, IINFO ) +* +* Copy the upper portion of A into AB +* + DO 20 J = I, I+PK-1 + LK = MIN( KD, N-J ) + 1 + CALL SCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) + 20 CONTINUE +* + CALL SLASET( 'Lower', PK, PK, ZERO, ONE, + $ A( I, I+KD ), LDA ) +* +* Form the matrix T +* + CALL SLARFT( 'Forward', 'Rowwise', PN, PK, + $ A( I, I+KD ), LDA, TAU( I ), + $ WORK( TPOS ), LDT ) +* +* Compute W: +* + CALL SGEMM( 'Conjugate', 'No transpose', PK, PN, PK, + $ ONE, WORK( TPOS ), LDT, + $ A( I, I+KD ), LDA, + $ ZERO, WORK( S2POS ), LDS2 ) +* + CALL SSYMM( 'Right', UPLO, PK, PN, + $ ONE, A( I+KD, I+KD ), LDA, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( WPOS ), LDW ) +* + CALL SGEMM( 'No transpose', 'Conjugate', PK, PK, PN, + $ ONE, WORK( WPOS ), LDW, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( S1POS ), LDS1 ) +* + CALL SGEMM( 'No transpose', 'No transpose', PK, PN, PK, + $ -HALF, WORK( S1POS ), LDS1, + $ A( I, I+KD ), LDA, + $ ONE, WORK( WPOS ), LDW ) +* +* +* Update the unreduced submatrix A(i+kd:n,i+kd:n), using +* an update of the form: A := A - V'*W - W'*V +* + CALL SSYR2K( UPLO, 'Conjugate', PN, PK, + $ -ONE, A( I, I+KD ), LDA, + $ WORK( WPOS ), LDW, + $ RONE, A( I+KD, I+KD ), LDA ) + 10 CONTINUE +* +* Copy the upper band to AB which is the band storage matrix +* + DO 30 J = N-KD+1, N + LK = MIN(KD, N-J) + 1 + CALL SCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) + 30 CONTINUE +* + ELSE +* +* Reduce the lower triangle of A to lower band matrix +* + DO 40 I = 1, N - KD, KD + PN = N-I-KD+1 + PK = MIN( N-I-KD+1, KD ) +* +* Compute the QR factorization of the current block +* + CALL SGEQRF( PN, KD, A( I+KD, I ), LDA, + $ TAU( I ), WORK( S2POS ), LS2, IINFO ) +* +* Copy the upper portion of A into AB +* + DO 50 J = I, I+PK-1 + LK = MIN( KD, N-J ) + 1 + CALL SCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) + 50 CONTINUE +* + CALL SLASET( 'Upper', PK, PK, ZERO, ONE, + $ A( I+KD, I ), LDA ) +* +* Form the matrix T +* + CALL SLARFT( 'Forward', 'Columnwise', PN, PK, + $ A( I+KD, I ), LDA, TAU( I ), + $ WORK( TPOS ), LDT ) +* +* Compute W: +* + CALL SGEMM( 'No transpose', 'No transpose', PN, PK, PK, + $ ONE, A( I+KD, I ), LDA, + $ WORK( TPOS ), LDT, + $ ZERO, WORK( S2POS ), LDS2 ) +* + CALL SSYMM( 'Left', UPLO, PN, PK, + $ ONE, A( I+KD, I+KD ), LDA, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( WPOS ), LDW ) +* + CALL SGEMM( 'Conjugate', 'No transpose', PK, PK, PN, + $ ONE, WORK( S2POS ), LDS2, + $ WORK( WPOS ), LDW, + $ ZERO, WORK( S1POS ), LDS1 ) +* + CALL SGEMM( 'No transpose', 'No transpose', PN, PK, PK, + $ -HALF, A( I+KD, I ), LDA, + $ WORK( S1POS ), LDS1, + $ ONE, WORK( WPOS ), LDW ) +* +* +* Update the unreduced submatrix A(i+kd:n,i+kd:n), using +* an update of the form: A := A - V*W' - W*V' +* + CALL SSYR2K( UPLO, 'No transpose', PN, PK, + $ -ONE, A( I+KD, I ), LDA, + $ WORK( WPOS ), LDW, + $ RONE, A( I+KD, I+KD ), LDA ) +* ================================================================== +* RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED +* DO 45 J = I, I+PK-1 +* LK = MIN( KD, N-J ) + 1 +* CALL SCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 ) +* 45 CONTINUE +* ================================================================== + 40 CONTINUE +* +* Copy the lower band to AB which is the band storage matrix +* + DO 60 J = N-KD+1, N + LK = MIN(KD, N-J) + 1 + CALL SCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) + 60 CONTINUE + + END IF +* + WORK( 1 ) = LWMIN + RETURN +* +* End of SSYTRD_SY2SB +* + END diff --git a/SRC/zhb2st_kernels.f b/SRC/zhb2st_kernels.f new file mode 100644 index 00000000..ab03b303 --- /dev/null +++ b/SRC/zhb2st_kernels.f @@ -0,0 +1,320 @@ +*> \brief \b ZHB2ST_KERNELS +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHB2ST_KERNELS + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhb2st_kernels.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhb2st_kernels.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhb2st_kernels.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHB2ST_KERNELS( UPLO, WANTZ, TTYPE, +* ST, ED, SWEEP, N, NB, IB, +* A, LDA, V, TAU, LDVT, WORK) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* LOGICAL WANTZ +* INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), V( * ), +* TAU( * ), WORK( * ) +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHB2ST_KERNELS is an internal routine used by the ZHETRD_HB2ST +*> subroutine. +*> \endverbatim +* +* Arguments: +* ========== +* +*> @param[in] n +*> The order of the matrix A. +*> +*> @param[in] nb +*> The size of the band. +*> +*> @param[in, out] A +*> A pointer to the matrix A. +*> +*> @param[in] lda +*> The leading dimension of the matrix A. +*> +*> @param[out] V +*> COMPLEX*16 array, dimension 2*n if eigenvalues only are +*> requested or to be queried for vectors. +*> +*> @param[out] TAU +*> COMPLEX*16 array, dimension (2*n). +*> The scalar factors of the Householder reflectors are stored +*> in this array. +*> +*> @param[in] st +*> internal parameter for indices. +*> +*> @param[in] ed +*> internal parameter for indices. +*> +*> @param[in] sweep +*> internal parameter for indices. +*> +*> @param[in] Vblksiz +*> internal parameter for indices. +*> +*> @param[in] wantz +*> logical which indicate if Eigenvalue are requested or both +*> Eigenvalue/Eigenvectors. +*> +*> @param[in] work +*> Workspace of size nb. +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHB2ST_KERNELS( UPLO, WANTZ, TTYPE, + $ ST, ED, SWEEP, N, NB, IB, + $ A, LDA, V, TAU, LDVT, WORK) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + LOGICAL WANTZ + INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), V( * ), + $ TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS, + $ DPOS, OFDPOS, AJETER + COMPLEX*16 CTMP +* .. +* .. External Subroutines .. + EXTERNAL ZLARFG, ZLARFX, ZLARFY +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MOD +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. +* .. Executable Statements .. +* + AJETER = IB + LDVT + UPPER = LSAME( UPLO, 'U' ) + + IF( UPPER ) THEN + DPOS = 2 * NB + 1 + OFDPOS = 2 * NB + ELSE + DPOS = 1 + OFDPOS = 2 + ENDIF + +* +* Upper case +* + IF( UPPER ) THEN +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ENDIF + GO TO ( 101, 102, 103 ) TTYPE +* + 101 CONTINUE + LM = ED - ST + 1 +* + V( VPOS ) = ONE + DO 10 I = 1, LM-1 + V( VPOS+I ) = DCONJG( A( OFDPOS-I, ST+I ) ) + A( OFDPOS-I, ST+I ) = ZERO + 10 CONTINUE + CTMP = DCONJG( A( OFDPOS, ST ) ) + CALL ZLARFG( LM, CTMP, V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) + A( OFDPOS, ST ) = CTMP +* + 103 CONTINUE + LM = ED - ST + 1 + CALL ZLARFY( UPLO, LM, V( VPOS ), 1, DCONJG( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + GOTO 300 +* + 102 CONTINUE + J1 = ED+1 + J2 = MIN( ED+NB, N ) + LN = ED-ST+1 + LM = J2-J1+1 + IF( LM.GT.0) THEN + CALL ZLARFX( 'Left', LN, LM, V( VPOS ), + $ DCONJG( TAU( TAUPOS ) ), A( DPOS-NB, J1 ), + $ LDA-1, WORK) +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ENDIF +* + V( VPOS ) = ONE + DO 30 I = 1, LM-1 + V( VPOS+I ) = DCONJG( A( DPOS-NB-I, J1+I ) ) + A( DPOS-NB-I, J1+I ) = ZERO + 30 CONTINUE + CTMP = DCONJG( A( DPOS-NB, J1 ) ) + CALL ZLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) ) + A( DPOS-NB, J1 ) = CTMP +* + CALL ZLARFX( 'Right', LN-1, LM, V( VPOS ), + $ TAU( TAUPOS ), + $ A( DPOS-NB+1, J1 ), LDA-1, WORK) + ENDIF + GOTO 300 +* +* Lower case +* + ELSE +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ENDIF + GO TO ( 201, 202, 203 ) TTYPE +* + 201 CONTINUE + LM = ED - ST + 1 +* + V( VPOS ) = ONE + DO 20 I = 1, LM-1 + V( VPOS+I ) = A( OFDPOS+I, ST-1 ) + A( OFDPOS+I, ST-1 ) = ZERO + 20 CONTINUE + CALL ZLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) +* + 203 CONTINUE + LM = ED - ST + 1 +* + CALL ZLARFY( UPLO, LM, V( VPOS ), 1, DCONJG( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + + GOTO 300 +* + 202 CONTINUE + J1 = ED+1 + J2 = MIN( ED+NB, N ) + LN = ED-ST+1 + LM = J2-J1+1 +* + IF( LM.GT.0) THEN + CALL ZLARFX( 'Right', LM, LN, V( VPOS ), + $ TAU( TAUPOS ), A( DPOS+NB, ST ), + $ LDA-1, WORK) +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ENDIF +* + V( VPOS ) = ONE + DO 40 I = 1, LM-1 + V( VPOS+I ) = A( DPOS+NB+I, ST ) + A( DPOS+NB+I, ST ) = ZERO + 40 CONTINUE + CALL ZLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) +* + CALL ZLARFX( 'Left', LM, LN-1, V( VPOS ), + $ DCONJG( TAU( TAUPOS ) ), + $ A( DPOS+NB-1, ST+1 ), LDA-1, WORK) + + ENDIF + GOTO 300 + ENDIF + + 300 CONTINUE + RETURN +* +* END OF ZHB2ST_KERNELS +* + END diff --git a/SRC/zhbev_2stage.f b/SRC/zhbev_2stage.f new file mode 100644 index 00000000..f1088b87 --- /dev/null +++ b/SRC/zhbev_2stage.f @@ -0,0 +1,386 @@ +*> \brief <b> ZHBEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices</b> +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHBEV_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhbev_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhbev_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhbev_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, +* WORK, LWORK, RWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, N, LWORK +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHBEV_2STAGE computes all the eigenvalues and, optionally, eigenvectors of +*> a complex Hermitian band matrix A using the 2stage technique for +*> the reduction to tridiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension LWORK +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (max(1,3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complex16OTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, + $ WORK, LWORK, RWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, N, LWORK +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, WANTZ, LQUERY + INTEGER IINFO, IMAX, INDE, INDWRK, INDRWK, ISCALE, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANHB + EXTERNAL LSAME, DLAMCH, ZLANHB, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZLASCL, ZSTEQR + $ ZHETRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + IB = ILAENV( 18, 'ZHETRD_HB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + LWMIN = LHTRD + LWTRD + WORK( 1 ) = LWMIN + ENDIF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -11 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBEV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( LOWER ) THEN + W( 1 ) = DBLE( AB( 1, 1 ) ) + ELSE + W( 1 ) = DBLE( AB( KD+1, 1 ) ) + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call ZHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. +* + INDE = 1 + INDHOUS = 1 + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL ZHETRD_HB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, + $ RWORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + INDRWK = INDE + N + CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of ZHBEV_2STAGE +* + END diff --git a/SRC/zhbevd_2stage.f b/SRC/zhbevd_2stage.f new file mode 100644 index 00000000..e4daae74 --- /dev/null +++ b/SRC/zhbevd_2stage.f @@ -0,0 +1,458 @@ +*> \brief <b> ZHBEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices</b> +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHBEVD_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhbevd_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhbevd_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhbevd_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, +* WORK, LWORK, RWORK, LRWORK, IWORK, +* LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHBEVD_2STAGE computes all the eigenvalues and, optionally, eigenvectors of +*> a complex Hermitian band matrix A using the 2stage technique for +*> the reduction to tridiagonal. If eigenvectors are desired, it +*> uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, +*> dimension (LRWORK) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of array RWORK. +*> If N <= 1, LRWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LRWORK must be at least N. +*> If JOBZ = 'V' and N > 1, LRWORK must be at least +*> 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N . +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complex16OTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, + $ WORK, LWORK, RWORK, LRWORK, IWORK, + $ LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDWK2, INDRWK, ISCALE, + $ LLWORK, INDWK, LHTRD, LWTRD, IB, INDHOUS, + $ LIWMIN, LLRWK, LLWK2, LRWMIN, LWMIN + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANHB + EXTERNAL LSAME, DLAMCH, ZLANHB, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZGEMM, ZLACPY, + $ ZLASCL, ZSTEDC, ZHETRD_HB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + ELSE + IB = ILAENV( 18, 'ZHETRD_HB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + IF( WANTZ ) THEN + LWMIN = 2*N**2 + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = MAX( N, LHTRD + LWTRD ) + LRWMIN = N + LIWMIN = 1 + END IF + END IF + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBEVD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = DBLE( AB( 1, 1 ) ) + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call ZHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. +* + INDE = 1 + INDRWK = INDE + N + LLRWK = LRWORK - INDRWK + 1 + INDHOUS = 1 + INDWK = INDHOUS + LHTRD + LLWORK = LWORK - INDWK + 1 + INDWK2 = INDWK + N*N + LLWK2 = LWORK - INDWK2 + 1 +* + CALL ZHETRD_HB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, + $ RWORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ), + $ LLWK2, RWORK( INDRWK ), LLRWK, IWORK, LIWORK, + $ INFO ) + CALL ZGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO, + $ WORK( INDWK2 ), N ) + CALL ZLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of ZHBEVD_2STAGE +* + END diff --git a/SRC/zhbevx_2stage.f b/SRC/zhbevx_2stage.f new file mode 100644 index 00000000..3efdcc74 --- /dev/null +++ b/SRC/zhbevx_2stage.f @@ -0,0 +1,646 @@ +*> \brief <b> ZHBEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices</b> +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHBEVX_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhbevx_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhbevx_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhbevx_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, +* Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, +* Z, LDZ, WORK, LWORK, RWORK, IWORK, +* IFAIL, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N, LWORK +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 AB( LDAB, * ), Q( LDQ, * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHBEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian band matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors +*> can be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ, N) +*> If JOBZ = 'V', the N-by-N unitary matrix used in the +*> reduction to tridiagonal form. +*> If JOBZ = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If JOBZ = 'V', then +*> LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AB to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, + $ Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, + $ Z, LDZ, WORK, LWORK, RWORK, IWORK, + $ IFAIL, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N, LWORK + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AB( LDAB, * ), Q( LDQ, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, TEST, VALEIG, WANTZ, + $ LQUERY + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDWRK, ISCALE, ITMP1, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS, + $ J, JJ, NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU + COMPLEX*16 CTMP1 +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANHB + EXTERNAL LSAME, DLAMCH, ZLANHB, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZCOPY, + $ ZGEMV, ZLACPY, ZLASCL, ZSTEIN, ZSTEQR, + $ ZSWAP, ZHETRD_HB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + IB = ILAENV( 18, 'ZHETRD_HB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + LWMIN = LHTRD + LWTRD + WORK( 1 ) = LWMIN + ENDIF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -20 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBEVX_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + M = 1 + IF( LOWER ) THEN + CTMP1 = AB( 1, 1 ) + ELSE + CTMP1 = AB( KD+1, 1 ) + END IF + TMP1 = DBLE( CTMP1 ) + IF( VALEIG ) THEN + IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) ) + $ M = 0 + END IF + IF( M.EQ.1 ) THEN + W( 1 ) = DBLE( CTMP1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + END IF + ANRM = ZLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call ZHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N +* + INDHOUS = 1 + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL ZHETRD_HB2ST( 'N', JOBZ, UPLO, N, KD, AB, LDAB, + $ RWORK( INDD ), RWORK( INDE ), WORK( INDHOUS ), + $ LHTRD, WORK( INDWRK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call DSTERF or ZSTEQR. If this fails for some +* eigenvalue, then try DSTEBZ. +* + TEST = .FALSE. + IF (INDEIG) THEN + IF (IL.EQ.1 .AND. IU.EQ.N) THEN + TEST = .TRUE. + END IF + END IF + IF ((ALLEIG .OR. TEST) .AND. (ABSTOL.LE.ZERO)) THEN + CALL DCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL DSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL ZLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL ZSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL ZSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEIN. +* + DO 20 J = 1, M + CALL ZCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL ZGEMV( 'N', N, N, CONE, Q, LDQ, WORK, 1, CZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of ZHBEVX_2STAGE +* + END diff --git a/SRC/zheev_2stage.f b/SRC/zheev_2stage.f new file mode 100644 index 00000000..5aca4da2 --- /dev/null +++ b/SRC/zheev_2stage.f @@ -0,0 +1,355 @@ +*> \brief <b> ZHEEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices</b> +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEEV_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheev_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheev_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheev_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, +* RWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEEV_2STAGE computes all eigenvalues and, optionally, eigenvectors of a +*> complex Hermitian matrix A using the 2stage technique for +*> the reduction to tridiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (max(1, 3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complex16HEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + $ RWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANHE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANHE +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZLASCL, ZSTEQR, + $ ZUNGTR, ZHETRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + KD = ILAENV( 17, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEEV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = DBLE( A( 1, 1 ) ) + WORK( 1 ) = 1 + IF( WANTZ ) + $ A( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL ZLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL ZHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, RWORK( INDE ), + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* ZUNGTR to generate the unitary matrix, then call ZSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZUNGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + INDWRK = INDE + N + CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), A, LDA, + $ RWORK( INDWRK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of ZHEEV_2STAGE +* + END diff --git a/SRC/zheevd_2stage.f b/SRC/zheevd_2stage.f new file mode 100644 index 00000000..79a0e886 --- /dev/null +++ b/SRC/zheevd_2stage.f @@ -0,0 +1,451 @@ +*> \brief <b> ZHEEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices</b> +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEEVD_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheevd_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheevd_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheevd_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, +* RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a +*> complex Hermitian matrix A using the 2stage technique for +*> the reduction to tridiagonal. If eigenvectors are desired, it uses a +*> divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N+1 +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N+1 +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2 +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, +*> dimension (LRWORK) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of the array RWORK. +*> If N <= 1, LRWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LRWORK must be at least N. +*> If JOBZ = 'V' and N > 1, LRWORK must be at least +*> 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed +*> to converge; i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> if INFO = i and JOBZ = 'V', then the algorithm failed +*> to compute an eigenvalue while working on the submatrix +*> lying in rows and columns INFO/(N+1) through +*> mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complex16HEeigen +* +*> \par Further Details: +* ===================== +*> +*> Modified description of INFO. Sven, 16 Feb 05. +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + $ RWORK, LRWORK, IWORK, LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWK2, + $ INDWRK, ISCALE, LIWMIN, LLRWK, LLWORK, + $ LLWRK2, LRWMIN, LWMIN, + $ LHTRD, LWTRD, KD, IB, INDHOUS + + + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANHE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANHE +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZLACPY, ZLASCL, + $ ZSTEDC, ZUNMTR, ZHETRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + ELSE + KD = ILAENV( 17, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + IF( WANTZ ) THEN + LWMIN = 2*N + N*N + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + 1 + LHTRD + LWTRD + LRWMIN = N + LIWMIN = 1 + END IF + END IF + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEEVD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = DBLE( A( 1, 1 ) ) + IF( WANTZ ) + $ A( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL ZLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. +* + INDE = 1 + INDRWK = INDE + N + LLRWK = LRWORK - INDRWK + 1 + INDTAU = 1 + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 +* + CALL ZHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, RWORK( INDE ), + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call ZUNMTR to multiply it to the +* Householder transformations represented as Householder vectors in +* A. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, RWORK( INDRWK ), LLRWK, + $ IWORK, LIWORK, INFO ) + CALL ZUNMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ), + $ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO ) + CALL ZLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of ZHEEVD_2STAGE +* + END diff --git a/SRC/zheevr_2stage.f b/SRC/zheevr_2stage.f new file mode 100644 index 00000000..bfd43056 --- /dev/null +++ b/SRC/zheevr_2stage.f @@ -0,0 +1,779 @@ +*> \brief <b> ZHEEVR_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices</b> +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEEVR_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheevr_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheevr_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheevr_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, +* WORK, LWORK, RWORK, LRWORK, IWORK, +* LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK, +* $ M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEEVR_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can +*> be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> +*> ZHEEVR_2STAGE first reduces the matrix A to tridiagonal form T with a call +*> to ZHETRD. Then, whenever possible, ZHEEVR_2STAGE calls ZSTEMR to compute +*> eigenspectrum using Relatively Robust Representations. ZSTEMR +*> computes eigenvalues by the dqds algorithm, while orthogonal +*> eigenvectors are computed from various "good" L D L^T representations +*> (also known as Relatively Robust Representations). Gram-Schmidt +*> orthogonalization is avoided as far as possible. More specifically, +*> the various steps of the algorithm are as follows. +*> +*> For each unreduced block (submatrix) of T, +*> (a) Compute T - sigma I = L D L^T, so that L and D +*> define all the wanted eigenvalues to high relative accuracy. +*> This means that small relative changes in the entries of D and L +*> cause only small relative changes in the eigenvalues and +*> eigenvectors. The standard (unfactored) representation of the +*> tridiagonal matrix T does not have this property in general. +*> (b) Compute the eigenvalues to suitable accuracy. +*> If the eigenvectors are desired, the algorithm attains full +*> accuracy of the computed eigenvalues only right before +*> the corresponding vectors have to be computed, see steps c) and d). +*> (c) For each cluster of close eigenvalues, select a new +*> shift close to the cluster, find a new factorization, and refine +*> the shifted eigenvalues to suitable accuracy. +*> (d) For each eigenvalue with a large enough relative separation compute +*> the corresponding eigenvector by forming a rank revealing twisted +*> factorization. Go back to (c) for any clusters that remain. +*> +*> The desired accuracy of the output can be specified by the input +*> parameter ABSTOL. +*> +*> For more details, see DSTEMR's documentation and: +*> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations +*> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," +*> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. +*> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and +*> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, +*> 2004. Also LAPACK Working Note 154. +*> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric +*> tridiagonal eigenvalue/eigenvector problem", +*> Computer Science Division Technical Report No. UCB/CSD-97-971, +*> UC Berkeley, May 1997. +*> +*> +*> Note 1 : ZHEEVR_2STAGE calls ZSTEMR when the full spectrum is requested +*> on machines which conform to the ieee-754 floating point standard. +*> ZHEEVR_2STAGE calls DSTEBZ and ZSTEIN on non-ieee machines and +*> when partial spectrum requests are made. +*> +*> Normal execution of ZSTEMR may create NaNs and infinities and +*> hence may abort due to a floating point exception in environments +*> which do not handle NaNs and infinities in the ieee standard default +*> manner. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and +*> ZSTEIN are called +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> +*> If high relative accuracy is important, set ABSTOL to +*> DLAMCH( 'Safe minimum' ). Doing so will guarantee that +*> eigenvalues are computed to high relative accuracy when +*> possible in future releases. The current code does not +*> make any guarantees about high relative accuracy, but +*> furutre releases will. See J. Barlow and J. Demmel, +*> "Computing Accurate Eigensystems of Scaled Diagonally +*> Dominant Matrices", LAPACK Working Note #7, for a discussion +*> of which matrices define their eigenvalues to high relative +*> accuracy. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is an output of ZSTEMR (tridiagonal +*> matrix). The support of the eigenvectors of A is typically +*> 1:N because of the unitary transformations applied by ZUNMTR. +*> Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 26*N, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO = 0, RWORK(1) returns the optimal +*> (and minimal) LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The length of the array RWORK. LRWORK >= max(1,24*N). +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal +*> (and minimal) LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N). +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: Internal error +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16HEeigen +* +*> \par Contributors: +* ================== +*> +*> Inderjit Dhillon, IBM Almaden, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Ken Stanley, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Jason Riedy, Computer Science Division, University of +*> California at Berkeley, USA \n +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHEEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, + $ WORK, LWORK, RWORK, LRWORK, IWORK, + $ LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK, + $ M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ, TRYRAC + CHARACTER ORDER + INTEGER I, IEEEOK, IINFO, IMAX, INDIBL, INDIFL, INDISP, + $ INDIWO, INDRD, INDRDD, INDRE, INDREE, INDRWK, + $ INDTAU, INDWK, INDWKN, ISCALE, ITMP1, J, JJ, + $ LIWMIN, LLWORK, LLRWORK, LLWRKN, LRWMIN, + $ LWMIN, NSPLIT, LHTRD, LWTRD, KD, IB, INDHOUS + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANSY + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANSY +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZDSCAL, + $ ZHETRD_2STAGE, ZSTEMR, ZSTEIN, ZSWAP, ZUNMTR +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'ZHEEVR', 'N', 1, 2, 3, 4 ) +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LRWORK.EQ.-1 ) .OR. + $ ( LIWORK.EQ.-1 ) ) +* + KD = ILAENV( 17, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + LRWMIN = MAX( 1, 24*N ) + LIWMIN = MAX( 1, 10*N ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -22 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEEVR_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 2 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = DBLE( A( 1, 1 ) ) + ELSE + IF( VL.LT.DBLE( A( 1, 1 ) ) .AND. VU.GE.DBLE( A( 1, 1 ) ) ) + $ THEN + M = 1 + W( 1 ) = DBLE( A( 1, 1 ) ) + END IF + END IF + IF( WANTZ ) THEN + Z( 1, 1 ) = ONE + ISUPPZ( 1 ) = 1 + ISUPPZ( 2 ) = 1 + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF (VALEIG) THEN + VLL = VL + VUU = VU + END IF + ANRM = ZLANSY( 'M', UPLO, N, A, LDA, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL ZDSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL ZDSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF + +* Initialize indices into workspaces. Note: The IWORK indices are +* used only if DSTERF or ZSTEMR fail. + +* WORK(INDTAU:INDTAU+N-1) stores the complex scalar factors of the +* elementary reflectors used in ZHETRD. + INDTAU = 1 +* INDWK is the starting offset of the remaining complex workspace, +* and LLWORK is the remaining complex workspace size. + INDHOUS = INDTAU + N + INDWK = INDHOUS + LHTRD + LLWORK = LWORK - INDWK + 1 + +* RWORK(INDRD:INDRD+N-1) stores the real tridiagonal's diagonal +* entries. + INDRD = 1 +* RWORK(INDRE:INDRE+N-1) stores the off-diagonal entries of the +* tridiagonal matrix from ZHETRD. + INDRE = INDRD + N +* RWORK(INDRDD:INDRDD+N-1) is a copy of the diagonal entries over +* -written by ZSTEMR (the DSTERF path copies the diagonal to W). + INDRDD = INDRE + N +* RWORK(INDREE:INDREE+N-1) is a copy of the off-diagonal entries over +* -written while computing the eigenvalues in DSTERF and ZSTEMR. + INDREE = INDRDD + N +* INDRWK is the starting offset of the left-over real workspace, and +* LLRWORK is the remaining workspace size. + INDRWK = INDREE + N + LLRWORK = LRWORK - INDRWK + 1 + +* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in DSTEBZ and +* stores the block indices of each of the M<=N eigenvalues. + INDIBL = 1 +* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in DSTEBZ and +* stores the starting and finishing indices of each block. + INDISP = INDIBL + N +* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors +* that corresponding to eigenvectors that fail to converge in +* ZSTEIN. This information is discarded; if any fail, the driver +* returns INFO > 0. + INDIFL = INDISP + N +* INDIWO is the offset of the remaining integer workspace. + INDIWO = INDIFL + N + +* +* Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. +* + CALL ZHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, RWORK( INDRD ), + $ RWORK( INDRE ), WORK( INDTAU ), + $ WORK( INDHOUS ), LHTRD, + $ WORK( INDWK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired +* then call DSTERF or ZSTEMR and ZUNMTR. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG.OR.TEST ) .AND. ( IEEEOK.EQ.1 ) ) THEN + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N, RWORK( INDRD ), 1, W, 1 ) + CALL DCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 ) + CALL DSTERF( N, W, RWORK( INDREE ), INFO ) + ELSE + CALL DCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 ) + CALL DCOPY( N, RWORK( INDRD ), 1, RWORK( INDRDD ), 1 ) +* + IF (ABSTOL .LE. TWO*N*EPS) THEN + TRYRAC = .TRUE. + ELSE + TRYRAC = .FALSE. + END IF + CALL ZSTEMR( JOBZ, 'A', N, RWORK( INDRDD ), + $ RWORK( INDREE ), VL, VU, IL, IU, M, W, + $ Z, LDZ, N, ISUPPZ, TRYRAC, + $ RWORK( INDRWK ), LLRWORK, + $ IWORK, LIWORK, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEMR. +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN + INDWKN = INDWK + LLWRKN = LWORK - INDWKN + 1 + CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA, + $ WORK( INDTAU ), Z, LDZ, WORK( INDWKN ), + $ LLWRKN, IINFO ) + END IF + END IF +* +* + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. +* Also call DSTEBZ and ZSTEIN if ZSTEMR fails. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDRD ), RWORK( INDRE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL ZSTEIN( N, RWORK( INDRD ), RWORK( INDRE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEIN. +* + INDWKN = INDWK + LLWRKN = LWORK - INDWKN + 1 + CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of ZHEEVR_2STAGE +* + END diff --git a/SRC/zheevx_2stage.f b/SRC/zheevx_2stage.f new file mode 100644 index 00000000..e33d55e0 --- /dev/null +++ b/SRC/zheevx_2stage.f @@ -0,0 +1,618 @@ +*> \brief <b> ZHEEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices</b> +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEEVX_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheevx_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheevx_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheevx_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEEVX_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, WORK, +* LWORK, RWORK, IWORK, IFAIL, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can +*> be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> On normal exit, the first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 8*N, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16HEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHEEVX_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, WORK, + $ LWORK, RWORK, IWORK, IFAIL, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDTAU, INDWRK, ISCALE, + $ ITMP1, J, JJ, LLWORK, + $ NSPLIT, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANHE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANHE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZDSCAL, + $ ZLACPY, ZSTEIN, ZSTEQR, ZSWAP, ZUNGTR, ZUNMTR, + $ ZHETRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + KD = ILAENV( 17, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + WORK( 1 ) = LWMIN + END IF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -17 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEEVX_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = DBLE( A( 1, 1 ) ) + ELSE IF( VALEIG ) THEN + IF( VL.LT.DBLE( A( 1, 1 ) ) .AND. VU.GE.DBLE( A( 1, 1 ) ) ) + $ THEN + M = 1 + W( 1 ) = DBLE( A( 1, 1 ) ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + END IF + ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL ZDSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL ZDSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDTAU = 1 + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL ZHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, RWORK( INDD ), + $ RWORK( INDE ), WORK( INDTAU ), + $ WORK( INDHOUS ), LHTRD, WORK( INDWRK ), + $ LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal to +* zero, then call DSTERF or ZUNGTR and ZSTEQR. If this fails for +* some eigenvalue, then try DSTEBZ. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. ( ABSTOL.LE.ZERO ) ) THEN + CALL DCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL DSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL ZLACPY( 'A', N, N, A, LDA, Z, LDZ ) + CALL ZUNGTR( UPLO, N, Z, LDZ, WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL ZSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + IFAIL( I ) = 0 + 30 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 40 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL ZSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEIN. +* + CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWRK ), LLWORK, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 40 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 50 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 60 CONTINUE + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of ZHEEVX_2STAGE +* + END diff --git a/SRC/zhegv_2stage.f b/SRC/zhegv_2stage.f new file mode 100644 index 00000000..5079d240 --- /dev/null +++ b/SRC/zhegv_2stage.f @@ -0,0 +1,379 @@ +*> \brief \b ZHEGV_2STAGE +* +* @precisions fortran z -> c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEGV_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhegv_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhegv_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhegv_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, +* WORK, LWORK, RWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEGV_2STAGE computes all the eigenvalues, and optionally, the eigenvectors +*> of a complex generalized Hermitian-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +*> Here A and B are assumed to be Hermitian and B is also +*> positive definite. +*> This routine use the 2stage technique for the reduction to tridiagonal +*> which showed higher performance on recent architecture and for large +* sizes N>2000. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> matrix Z of eigenvectors. The eigenvectors are normalized +*> as follows: +*> if ITYPE = 1 or 2, Z**H*B*Z = I; +*> if ITYPE = 3, Z**H*inv(B)*Z = I. +*> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +*> or the lower triangle (if UPLO='L') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB, N) +*> On entry, the Hermitian positive definite matrix B. +*> If UPLO = 'U', the leading N-by-N upper triangular part of B +*> contains the upper triangular part of the matrix B. +*> If UPLO = 'L', the leading N-by-N lower triangular part of B +*> contains the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**H*U or B = L*L**H. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (max(1, 3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: ZPOTRF or ZHEEV returned an error code: +*> <= N: if INFO = i, ZHEEV failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complex16HEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, + $ WORK, LWORK, RWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.6.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER NEIG, LWMIN, LHTRD, LWTRD, KD, IB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHEGST, ZPOTRF, ZTRMM, ZTRSM, + $ ZHEEV_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + KD = ILAENV( 17, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV( 18, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV( 19, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV( 20, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEGV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL ZPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL ZHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, + $ WORK, LWORK, RWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + CALL ZTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**H *y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + CALL ZTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of ZHEGV_2STAGE +* + END diff --git a/SRC/zhetrd_2stage.f b/SRC/zhetrd_2stage.f new file mode 100644 index 00000000..62fd7539 --- /dev/null +++ b/SRC/zhetrd_2stage.f @@ -0,0 +1,337 @@ +*> \brief \b ZHETRD_2STAGE +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRD_2STAGE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrd_2stage.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrd_2stage.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrd_2stage.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, +* HOUS2, LHOUS2, WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER VECT, UPLO +* INTEGER N, LDA, LWORK, LHOUS2, INFO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* COMPLEX*16 A( LDA, * ), TAU( * ), +* HOUS2( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRD_2STAGE reduces a complex Hermitian matrix A to real symmetric +*> tridiagonal form T by a unitary similarity transformation: +*> Q1**H Q2**H* A * Q2 * Q1 = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': No need for the Housholder representation, +*> in particular for the second stage (Band to +*> tridiagonal) and thus LHOUS2 is of size max(1, 4*N); +*> = 'V': the Householder representation is needed to +*> either generate Q1 Q2 or to apply Q1 Q2, +*> then LHOUS2 is to be queried and computed. +*> (NOT AVAILABLE IN THIS RELEASE). +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the band superdiagonal +*> of A are overwritten by the corresponding elements of the +*> internal band-diagonal matrix AB, and the elements above +*> the KD superdiagonal, with the array TAU, represent the unitary +*> matrix Q1 as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and band subdiagonal of A are over- +*> written by the corresponding elements of the internal band-diagonal +*> matrix AB, and the elements below the KD subdiagonal, with +*> the array TAU, represent the unitary matrix Q1 as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N-KD) +*> The scalar factors of the elementary reflectors of +*> the first stage (see Further Details). +*> \endverbatim +*> +*> \param[out] HOUS2 +*> \verbatim +*> HOUS2 is COMPLEX*16 array, dimension LHOUS2, that +*> store the Householder representation of the stage2 +*> band to tridiagonal. +*> \endverbatim +*> +*> \param[in] LHOUS2 +*> \verbatim +*> LHOUS2 is INTEGER +*> The dimension of the array HOUS2. LHOUS2 = MAX(1, dimension) +*> If LWORK = -1, or LHOUS2=-1, +*> then a query is assumed; the routine +*> only calculates the optimal size of the HOUS2 array, returns +*> this value as the first entry of the HOUS2 array, and no error +*> message related to LHOUS2 is issued by XERBLA. +*> LHOUS2 = MAX(1, dimension) where +*> dimension = 4*N if VECT='N' +*> not available now if VECT='H' +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK = MAX(1, dimension) +*> If LWORK = -1, or LHOUS2=-1, +*> then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complex16HEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHETRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, + $ HOUS2, LHOUS2, WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER VECT, UPLO + INTEGER N, LDA, LWORK, LHOUS2, INFO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 A( LDA, * ), TAU( * ), + $ HOUS2( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTQ + INTEGER KD, IB, LWMIN, LHMIN, LWRK, LDAB, WPOS, ABPOS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHETRD_HE2HB, ZHETRD_HB2ST +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + WANTQ = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS2.EQ.-1 ) +* +* Determine the block size, the workspace size and the hous size. +* + KD = ILAENV( 17, 'ZHETRD_2STAGE', VECT, N, -1, -1, -1 ) + IB = ILAENV( 18, 'ZHETRD_2STAGE', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV( 19, 'ZHETRD_2STAGE', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV( 20, 'ZHETRD_2STAGE', VECT, N, KD, IB, -1 ) +* WRITE(*,*),'ZHETRD_2STAGE N KD UPLO LHMIN LWMIN ',N, KD, UPLO, +* $ LHMIN, LWMIN +* + IF( .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LHOUS2.LT.LHMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + HOUS2( 1 ) = LHMIN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine pointer position +* + LDAB = KD+1 + LWRK = LWORK-LDAB*N + ABPOS = 1 + WPOS = ABPOS + LDAB*N + CALL ZHETRD_HE2HB( UPLO, N, KD, A, LDA, WORK( ABPOS ), LDAB, + $ TAU, WORK( WPOS ), LWRK, INFO ) + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRD_HE2HB', -INFO ) + RETURN + END IF + CALL ZHETRD_HB2ST( 'Y', VECT, UPLO, N, KD, + $ WORK( ABPOS ), LDAB, D, E, + $ HOUS2, LHOUS2, WORK( WPOS ), LWRK, INFO ) + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRD_HB2ST', -INFO ) + RETURN + END IF +* +* + HOUS2( 1 ) = LHMIN + WORK( 1 ) = LWMIN + RETURN +* +* End of ZHETRD_2STAGE +* + END diff --git a/SRC/zhetrd_hb2st.F b/SRC/zhetrd_hb2st.F new file mode 100644 index 00000000..5d62e30d --- /dev/null +++ b/SRC/zhetrd_hb2st.F @@ -0,0 +1,603 @@ +*> \brief \b ZHBTRD +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHBTRD + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhbtrd.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhbtrd.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhbtrd.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, +* D, E, HOUS, LHOUS, WORK, LWORK, INFO ) +* +* #define PRECISION_COMPLEX +* +* #if defined(_OPENMP) +* use omp_lib +* #endif +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER STAGE1, UPLO, VECT +* INTEGER N, KD, IB, LDAB, LHOUS, LWORK, INFO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* COMPLEX*16 AB( LDAB, * ), HOUS( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHBTRD reduces a complex Hermitian band matrix A to real symmetric +*> tridiagonal form T by a unitary similarity transformation: +*> Q**H * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] STAGE +*> \verbatim +*> STAGE is CHARACTER*1 +*> = 'N': "No": to mention that the stage 1 of the reduction +*> from dense to band using the zhetrd_he2hb routine +*> was not called before this routine to reproduce AB. +*> In other term this routine is called as standalone. +*> = 'Y': "Yes": to mention that the stage 1 of the +*> reduction from dense to band using the zhetrd_he2hb +*> routine has been called to produce AB (e.g., AB is +*> the output of zhetrd_he2hb. +*> \endverbatim +*> +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': No need for the Housholder representation, +*> and thus LHOUS is of size max(1, 4*N); +*> = 'V': the Householder representation is needed to +*> either generate or to apply Q later on, +*> then LHOUS is to be queried and computed. +*> (NOT AVAILABLE IN THIS RELEASE). +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> On exit, the diagonal elements of AB are overwritten by the +*> diagonal elements of the tridiagonal matrix T; if KD > 0, the +*> elements on the first superdiagonal (if UPLO = 'U') or the +*> first subdiagonal (if UPLO = 'L') are overwritten by the +*> off-diagonal elements of T; the rest of AB is overwritten by +*> values generated during the reduction. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] HOUS +*> \verbatim +*> HOUS is COMPLEX*16 array, dimension LHOUS, that +*> store the Householder representation. +*> \endverbatim +*> +*> \param[in] LHOUS +*> \verbatim +*> LHOUS is INTEGER +*> The dimension of the array HOUS. LHOUS = MAX(1, dimension) +*> If LWORK = -1, or LHOUS=-1, +*> then a query is assumed; the routine +*> only calculates the optimal size of the HOUS array, returns +*> this value as the first entry of the HOUS array, and no error +*> message related to LHOUS is issued by XERBLA. +*> LHOUS = MAX(1, dimension) where +*> dimension = 4*N if VECT='N' +*> not available now if VECT='H' +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK = MAX(1, dimension) +*> If LWORK = -1, or LHOUS=-1, +*> then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, + $ D, E, HOUS, LHOUS, WORK, LWORK, INFO ) +* +#define PRECISION_COMPLEX +* +#if defined(_OPENMP) + use omp_lib +#endif +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER STAGE1, UPLO, VECT + INTEGER N, KD, LDAB, LHOUS, LWORK, INFO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 AB( LDAB, * ), HOUS( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION RZERO + COMPLEX*16 ZERO, ONE + PARAMETER ( RZERO = 0.0D+0, + $ ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ, UPPER, AFTERS1 + INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST, + $ ED, STIND, EDIND, BLKLASTIND, COLPT, THED, + $ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID, + $ NBTILES, TTYPE, TID, NTHREADS, DEBUG, + $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS, + $ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU, + $ SIZEV, SIZETAU, LDV, LHMIN, LWMIN +#if defined (PRECISION_COMPLEX) + DOUBLE PRECISION ABSTMP + COMPLEX*16 TMP +#endif +* .. +* .. External Subroutines .. + EXTERNAL ZHB2ST_KERNELS, ZLACPY, ZLASET +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX, CEILING, DBLE, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Determine the minimal workspace size required. +* Test the input parameters +* + DEBUG = 0 + INFO = 0 + AFTERS1 = LSAME( STAGE1, 'Y' ) + WANTQ = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS.EQ.-1 ) +* +* Determine the block size, the workspace size and the hous size. +* + IB = ILAENV( 18, 'ZHETRD_HB2ST', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV( 19, 'ZHETRD_HB2ST', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV( 20, 'ZHETRD_HB2ST', VECT, N, KD, IB, -1 ) +* + IF( .NOT.AFTERS1 .AND. .NOT.LSAME( STAGE1, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.(KD+1) ) THEN + INFO = -7 + ELSE IF( LHOUS.LT.LHMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + HOUS( 1 ) = LHMIN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRD_HB2ST', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine pointer position +* + LDV = KD + IB + SIZETAU = 2 * N + SIZEV = 2 * N + INDTAU = 1 + INDV = INDTAU + SIZETAU + LDA = 2 * KD + 1 + SIZEA = LDA * N + INDA = 1 + INDW = INDA + SIZEA + NTHREADS = 1 + TID = 0 +* + IF( UPPER ) THEN + APOS = INDA + KD + AWPOS = INDA + DPOS = APOS + KD + OFDPOS = DPOS - 1 + ABDPOS = KD + 1 + ABOFDPOS = KD + ELSE + APOS = INDA + AWPOS = INDA + KD + 1 + DPOS = APOS + OFDPOS = DPOS + 1 + ABDPOS = 1 + ABOFDPOS = 2 + + ENDIF +* +* Case KD=0: +* The matrix is diagonal. We just copy it (convert to "real" for +* complex because D is double and the imaginary part should be 0) +* and store it in D. A sequential code here is better or +* in a parallel environment it might need two cores for D and E +* + IF( KD.EQ.0 ) THEN + DO 30 I = 1, N + D( I ) = DBLE( AB( ABDPOS, I ) ) + 30 CONTINUE + DO 40 I = 1, N-1 + E( I ) = RZERO + 40 CONTINUE + GOTO 200 + END IF +* +* Case KD=1: +* The matrix is already Tridiagonal. We have to make diagonal +* and offdiagonal elements real, and store them in D and E. +* For that, for real precision just copy the diag and offdiag +* to D and E while for the COMPLEX case the bulge chasing is +* performed to convert the hermetian tridiagonal to symmetric +* tridiagonal. A simpler coversion formula might be used, but then +* updating the Q matrix will be required and based if Q is generated +* or not this might complicate the story. +* +C IF( KD.EQ.1 .AND. N.GT.(KD+1) .AND. AFTERS1 ) THEN + IF( KD.EQ.1 ) THEN + DO 50 I = 1, N + D( I ) = DBLE( AB( ABDPOS, I ) ) + 50 CONTINUE +#if defined (PRECISION_COMPLEX) +* +* make off-diagonal elements real and copy them to E +* + IF( UPPER ) THEN + DO 60 I = 1, N - 1 + TMP = AB( ABOFDPOS, I+1 ) + ABSTMP = ABS( TMP ) + AB( ABOFDPOS, I+1 ) = ABSTMP + E( I ) = ABSTMP + IF( ABSTMP.NE.RZERO ) THEN + TMP = TMP / ABSTMP + ELSE + TMP = ONE + END IF + IF( I.LT.N-1 ) + $ AB( ABOFDPOS, I+2 ) = AB( ABOFDPOS, I+2 )*TMP +C IF( WANTZ ) THEN +C CALL ZSCAL( N, DCONJG( TMP ), Q( 1, I+1 ), 1 ) +C END IF + 60 CONTINUE + ELSE + DO 70 I = 1, N - 1 + TMP = AB( ABOFDPOS, I ) + ABSTMP = ABS( TMP ) + AB( ABOFDPOS, I ) = ABSTMP + E( I ) = ABSTMP + IF( ABSTMP.NE.RZERO ) THEN + TMP = TMP / ABSTMP + ELSE + TMP = ONE + END IF + IF( I.LT.N-1 ) + $ AB( ABOFDPOS, I+1 ) = AB( ABOFDPOS, I+1 )*TMP +C IF( WANTQ ) THEN +C CALL ZSCAL( N, TMP, Q( 1, I+1 ), 1 ) +C END IF + 70 CONTINUE + ENDIF +#else + IF( UPPER ) THEN + DO 60 I = 1, N-1 + E( I ) = DBLE( AB( ABOFDPOS, I+1 ) ) + 60 CONTINUE + ELSE + DO 70 I = 1, N-1 + E( I ) = DBLE( AB( ABOFDPOS, I ) ) + 70 CONTINUE + ENDIF +#endif + GOTO 200 + END IF +* +* Main code start here. +* Reduce the hermitian band of A to a tridiagonal matrix. +* + THGRSIZ = N + GRSIZ = 1 + SHIFT = 3 + NBTILES = CEILING( REAL(N)/REAL(KD) ) + STEPERCOL = CEILING( REAL(SHIFT)/REAL(GRSIZ) ) + THGRNB = CEILING( REAL(N-1)/REAL(THGRSIZ) ) +* + CALL ZLACPY( "A", KD+1, N, AB, LDAB, WORK( APOS ), LDA ) + CALL ZLASET( "A", KD, N, ZERO, ZERO, WORK( AWPOS ), LDA ) +* +* +* openMP parallelisation start here +* +#if defined(_OPENMP) +!$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND ) +!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID ) +!$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND ) +!$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK) +!$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA ) +!$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT ) +!$OMP MASTER +#endif +* +* main bulge chasing loop +* + DO 100 THGRID = 1, THGRNB + STT = (THGRID-1)*THGRSIZ+1 + THED = MIN( (STT + THGRSIZ -1), (N-1)) + DO 110 I = STT, N-1 + ED = MIN( I, THED ) + IF( STT.GT.ED ) GOTO 100 + DO 120 M = 1, STEPERCOL + ST = STT + DO 130 SWEEPID = ST, ED + DO 140 K = 1, GRSIZ + MYID = (I-SWEEPID)*(STEPERCOL*GRSIZ) + $ + (M-1)*GRSIZ + K + IF ( MYID.EQ.1 ) THEN + TTYPE = 1 + ELSE + TTYPE = MOD( MYID, 2 ) + 2 + ENDIF + + IF( TTYPE.EQ.2 ) THEN + COLPT = (MYID/2)*KD + SWEEPID + STIND = COLPT-KD+1 + EDIND = MIN(COLPT,N) + BLKLASTIND = COLPT + ELSE + COLPT = ((MYID+1)/2)*KD + SWEEPID + STIND = COLPT-KD+1 + EDIND = MIN(COLPT,N) + IF( ( STIND.GE.EDIND-1 ).AND. + $ ( EDIND.EQ.N ) ) THEN + BLKLASTIND = N + ELSE + BLKLASTIND = 0 + ENDIF + ENDIF +* +* Call the kernel +* +#if defined(_OPENMP) + IF( TTYPE.NE.1 ) THEN +!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) +!$OMP$ DEPEND(in:WORK(MYID-1)) +!$OMP$ DEPEND(out:WORK(MYID)) + TID = OMP_GET_THREAD_NUM() + CALL ZHB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +!$OMP END TASK + ELSE +!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) +!$OMP$ DEPEND(out:WORK(MYID)) + TID = OMP_GET_THREAD_NUM() + CALL ZHB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +!$OMP END TASK + ENDIF +#else + CALL ZHB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +#endif + IF ( BLKLASTIND.GE.(N-1) ) THEN + STT = STT + 1 + GOTO 130 + ENDIF + 140 CONTINUE + 130 CONTINUE + 120 CONTINUE + 110 CONTINUE + 100 CONTINUE +* +#if defined(_OPENMP) +!$OMP END MASTER +!$OMP END PARALLEL +#endif +* +* Copy the diagonal from A to D. Note that D is REAL thus only +* the Real part is needed, the imaginary part should be zero. +* + DO 150 I = 1, N + D( I ) = DBLE( WORK( DPOS+(I-1)*LDA ) ) + 150 CONTINUE +* +* Copy the off diagonal from A to E. Note that E is REAL thus only +* the Real part is needed, the imaginary part should be zero. +* + IF( UPPER ) THEN + DO 160 I = 1, N-1 + E( I ) = DBLE( WORK( OFDPOS+I*LDA ) ) + 160 CONTINUE + ELSE + DO 170 I = 1, N-1 + E( I ) = DBLE( WORK( OFDPOS+(I-1)*LDA ) ) + 170 CONTINUE + ENDIF +* + 200 CONTINUE +* + HOUS( 1 ) = LHMIN + WORK( 1 ) = LWMIN + RETURN +* +* End of ZHETRD_HB2ST +* + END +#undef PRECISION_COMPLEX + diff --git a/SRC/zhetrd_he2hb.f b/SRC/zhetrd_he2hb.f new file mode 100644 index 00000000..9403b73e --- /dev/null +++ b/SRC/zhetrd_he2hb.f @@ -0,0 +1,517 @@ +*> \brief \b ZHETRD_HE2HB +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRD_HE2HB + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrd.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrd.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrd.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, +* WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAB, LWORK, N, KD +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), AB( LDAB, * ), +* TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian +*> band-diagonal form AB by a unitary similarity transformation: +*> Q**H * A * Q = AB. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the reduced matrix if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> The reduced matrix is stored in the array AB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the unitary +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the unitary matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On exit, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N-KD) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension LWORK. +*> On exit, if INFO = 0, or if LWORK=-1, +*> WORK(1) returns the size of LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK which should be calculated +* by a workspace query. LWORK = MAX(1, LWORK_QUERY) +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD +*> where FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice otherwise +*> putting LWORK=-1 will provide the size of WORK. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2016 +* +*> \ingroup complex16HEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd. +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in +*> A(i,i+kd+1:n), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = n-kd. +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in +* A(i+kd+2:n,i), and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( ab ab/v1 v1 v1 v1 ) ( ab ) +*> ( ab ab/v2 v2 v2 ) ( ab/v1 ab ) +*> ( ab ab/v3 v3 ) ( v1 ab/v2 ab ) +*> ( ab ab/v4 ) ( v1 v2 ab/v3 ab ) +*> ( ab ) ( v1 v2 v3 ab/v4 ab ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, + $ WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAB, LWORK, N, KD +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), AB( LDAB, * ), + $ TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION RONE + COMPLEX*16 ZERO, ONE, HALF + PARAMETER ( RONE = 1.0D+0, + $ ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ), + $ HALF = ( 0.5D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, J, IINFO, LWMIN, PN, PK, LK, + $ LDT, LDW, LDS2, LDS1, + $ LS2, LS1, LW, LT, + $ TPOS, WPOS, S2POS, S1POS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHER2K, ZHEMM, ZGEMM, + $ ZLARFT, ZGELQF, ZGEQRF, ZLASET +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Determine the minimal workspace size required +* and test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + LWMIN = ILAENV( 20, 'ZHETRD_HE2HB', '', N, KD, -1, -1 ) + + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRD_HE2HB', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWMIN + RETURN + END IF +* +* Quick return if possible +* Copy the upper/lower portion of A into AB +* + IF( N.LE.KD+1 ) THEN + IF( UPPER ) THEN + DO 100 I = 1, N + LK = MIN( KD+1, I ) + CALL ZCOPY( LK, A( I-LK+1, I ), 1, + $ AB( KD+1-LK+1, I ), 1 ) + 100 CONTINUE + ELSE + DO 110 I = 1, N + LK = MIN( KD+1, N-I+1 ) + CALL ZCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 ) + 110 CONTINUE + ENDIF + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the pointer position for the workspace +* + LDT = KD + LDS1 = KD + LT = LDT*KD + LW = N*KD + LS1 = LDS1*KD + LS2 = LWMIN - LT - LW - LS1 +* LS2 = N*MAX(KD,FACTOPTNB) + TPOS = 1 + WPOS = TPOS + LT + S1POS = WPOS + LW + S2POS = S1POS + LS1 + IF( UPPER ) THEN + LDW = KD + LDS2 = KD + ELSE + LDW = N + LDS2 = N + ENDIF +* +* +* Set the workspace of the triangular matrix T to zero once such a +* way everytime T is generated the upper/lower portion will be always zero +* + CALL ZLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT ) +* + IF( UPPER ) THEN + DO 10 I = 1, N - KD, KD + PN = N-I-KD+1 + PK = MIN( N-I-KD+1, KD ) +* +* Compute the LQ factorization of the current block +* + CALL ZGELQF( KD, PN, A( I, I+KD ), LDA, + $ TAU( I ), WORK( S2POS ), LS2, IINFO ) +* +* Copy the upper portion of A into AB +* + DO 20 J = I, I+PK-1 + LK = MIN( KD, N-J ) + 1 + CALL ZCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) + 20 CONTINUE +* + CALL ZLASET( 'Lower', PK, PK, ZERO, ONE, + $ A( I, I+KD ), LDA ) +* +* Form the matrix T +* + CALL ZLARFT( 'Forward', 'Rowwise', PN, PK, + $ A( I, I+KD ), LDA, TAU( I ), + $ WORK( TPOS ), LDT ) +* +* Compute W: +* + CALL ZGEMM( 'Conjugate', 'No transpose', PK, PN, PK, + $ ONE, WORK( TPOS ), LDT, + $ A( I, I+KD ), LDA, + $ ZERO, WORK( S2POS ), LDS2 ) +* + CALL ZHEMM( 'Right', UPLO, PK, PN, + $ ONE, A( I+KD, I+KD ), LDA, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( WPOS ), LDW ) +* + CALL ZGEMM( 'No transpose', 'Conjugate', PK, PK, PN, + $ ONE, WORK( WPOS ), LDW, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( S1POS ), LDS1 ) +* + CALL ZGEMM( 'No transpose', 'No transpose', PK, PN, PK, + $ -HALF, WORK( S1POS ), LDS1, + $ A( I, I+KD ), LDA, + $ ONE, WORK( WPOS ), LDW ) +* +* +* Update the unreduced submatrix A(i+kd:n,i+kd:n), using +* an update of the form: A := A - V'*W - W'*V +* + CALL ZHER2K( UPLO, 'Conjugate', PN, PK, + $ -ONE, A( I, I+KD ), LDA, + $ WORK( WPOS ), LDW, + $ RONE, A( I+KD, I+KD ), LDA ) + 10 CONTINUE +* +* Copy the upper band to AB which is the band storage matrix +* + DO 30 J = N-KD+1, N + LK = MIN(KD, N-J) + 1 + CALL ZCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) + 30 CONTINUE +* + ELSE +* +* Reduce the lower triangle of A to lower band matrix +* + DO 40 I = 1, N - KD, KD + PN = N-I-KD+1 + PK = MIN( N-I-KD+1, KD ) +* +* Compute the QR factorization of the current block +* + CALL ZGEQRF( PN, KD, A( I+KD, I ), LDA, + $ TAU( I ), WORK( S2POS ), LS2, IINFO ) +* +* Copy the upper portion of A into AB +* + DO 50 J = I, I+PK-1 + LK = MIN( KD, N-J ) + 1 + CALL ZCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) + 50 CONTINUE +* + CALL ZLASET( 'Upper', PK, PK, ZERO, ONE, + $ A( I+KD, I ), LDA ) +* +* Form the matrix T +* + CALL ZLARFT( 'Forward', 'Columnwise', PN, PK, + $ A( I+KD, I ), LDA, TAU( I ), + $ WORK( TPOS ), LDT ) +* +* Compute W: +* + CALL ZGEMM( 'No transpose', 'No transpose', PN, PK, PK, + $ ONE, A( I+KD, I ), LDA, + $ WORK( TPOS ), LDT, + $ ZERO, WORK( S2POS ), LDS2 ) +* + CALL ZHEMM( 'Left', UPLO, PN, PK, + $ ONE, A( I+KD, I+KD ), LDA, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( WPOS ), LDW ) +* + CALL ZGEMM( 'Conjugate', 'No transpose', PK, PK, PN, + $ ONE, WORK( S2POS ), LDS2, + $ WORK( WPOS ), LDW, + $ ZERO, WORK( S1POS ), LDS1 ) +* + CALL ZGEMM( 'No transpose', 'No transpose', PN, PK, PK, + $ -HALF, A( I+KD, I ), LDA, + $ WORK( S1POS ), LDS1, + $ ONE, WORK( WPOS ), LDW ) +* +* +* Update the unreduced submatrix A(i+kd:n,i+kd:n), using +* an update of the form: A := A - V*W' - W*V' +* + CALL ZHER2K( UPLO, 'No transpose', PN, PK, + $ -ONE, A( I+KD, I ), LDA, + $ WORK( WPOS ), LDW, + $ RONE, A( I+KD, I+KD ), LDA ) +* ================================================================== +* RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED +* DO 45 J = I, I+PK-1 +* LK = MIN( KD, N-J ) + 1 +* CALL ZCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 ) +* 45 CONTINUE +* ================================================================== + 40 CONTINUE +* +* Copy the lower band to AB which is the band storage matrix +* + DO 60 J = N-KD+1, N + LK = MIN(KD, N-J) + 1 + CALL ZCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) + 60 CONTINUE + + END IF +* + WORK( 1 ) = LWMIN + RETURN +* +* End of ZHETRD_HE2HB +* + END diff --git a/SRC/zlarfy.f b/SRC/zlarfy.f new file mode 100644 index 00000000..39b795f0 --- /dev/null +++ b/SRC/zlarfy.f @@ -0,0 +1,163 @@ +*> \brief \b ZLARFY +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZLARFY( UPLO, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INCV, LDC, N +* COMPLEX*16 TAU +* .. +* .. Array Arguments .. +* COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARFY applies an elementary reflector, or Householder matrix, H, +*> to an n x n Hermitian matrix C, from both the left and the right. +*> +*> H is represented in the form +*> +*> H = I - tau * v * v' +*> +*> where tau is a scalar and v is a vector. +*> +*> If tau is zero, then H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix C is stored. +*> = 'U': Upper triangle +*> = 'L': Lower triangle +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (1 + (N-1)*abs(INCV)) +*> The vector v as described above. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between successive elements of v. INCV must +*> not be zero. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 +*> The value tau as described above. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC, N) +*> On entry, the matrix C. +*> On exit, C is overwritten by H * C * H'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max( 1, N ). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16_eig +* +* ===================================================================== + SUBROUTINE ZLARFY( UPLO, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCV, LDC, N + COMPLEX*16 TAU +* .. +* .. Array Arguments .. + COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO, HALF + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ), + $ HALF = ( 0.5D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZHEMV, ZHER2 +* .. +* .. External Functions .. + COMPLEX*16 ZDOTC + EXTERNAL ZDOTC +* .. +* .. Executable Statements .. +* + IF( TAU.EQ.ZERO ) + $ RETURN +* +* Form w:= C * v +* + CALL ZHEMV( UPLO, N, ONE, C, LDC, V, INCV, ZERO, WORK, 1 ) +* + ALPHA = -HALF*TAU*ZDOTC( N, WORK, 1, V, INCV ) + CALL ZAXPY( N, ALPHA, V, INCV, WORK, 1 ) +* +* C := C - v * w' - w * v' +* + CALL ZHER2( UPLO, N, -TAU, V, INCV, WORK, 1, C, LDC ) +* + RETURN +* +* End of ZLARFY +* + END diff --git a/TESTING/EIG/Makefile b/TESTING/EIG/Makefile index 6811cc2c..413e2359 100644 --- a/TESTING/EIG/Makefile +++ b/TESTING/EIG/Makefile @@ -51,11 +51,11 @@ SCIGTST = slafts.o slahd2.o slasum.o slatb9.o sstech.o sstect.o \ SEIGTST = schkee.o \ sbdt01.o sbdt02.o sbdt03.o sbdt04.o sbdt05.o\ schkbb.o schkbd.o schkbk.o schkbl.o schkec.o \ - schkgg.o schkgk.o schkgl.o schkhs.o schksb.o schkst.o \ + schkgg.o schkgk.o schkgl.o schkhs.o schksb.o schkst.o schkst2stg.o schksb2stg.o \ sckcsd.o sckglm.o sckgqr.o sckgsv.o scklse.o scsdts.o \ sdrges.o sdrgev.o sdrges3.o sdrgev3.o sdrgsx.o sdrgvx.o \ - sdrvbd.o sdrves.o sdrvev.o sdrvsg.o \ - sdrvst.o sdrvsx.o sdrvvx.o \ + sdrvbd.o sdrves.o sdrvev.o sdrvsg.o sdrvsg2stg.o \ + sdrvst.o sdrvst2stg.o sdrvsx.o sdrvvx.o \ serrbd.o serrec.o serred.o serrgg.o serrhs.o serrst.o \ sget02.o sget10.o sget22.o sget23.o sget24.o sget31.o \ sget32.o sget33.o sget34.o sget35.o sget36.o \ @@ -68,11 +68,11 @@ SEIGTST = schkee.o \ CEIGTST = cchkee.o \ cbdt01.o cbdt02.o cbdt03.o cbdt05.o\ cchkbb.o cchkbd.o cchkbk.o cchkbl.o cchkec.o \ - cchkgg.o cchkgk.o cchkgl.o cchkhb.o cchkhs.o cchkst.o \ + cchkgg.o cchkgk.o cchkgl.o cchkhb.o cchkhs.o cchkst.o cchkst2stg.o cchkhb2stg.o \ cckcsd.o cckglm.o cckgqr.o cckgsv.o ccklse.o ccsdts.o \ cdrges.o cdrgev.o cdrges3.o cdrgev3.o cdrgsx.o cdrgvx.o \ - cdrvbd.o cdrves.o cdrvev.o cdrvsg.o \ - cdrvst.o cdrvsx.o cdrvvx.o \ + cdrvbd.o cdrves.o cdrvev.o cdrvsg.o cdrvsg2stg.o \ + cdrvst.o cdrvst2stg.o cdrvsx.o cdrvvx.o \ cerrbd.o cerrec.o cerred.o cerrgg.o cerrhs.o cerrst.o \ cget02.o cget10.o cget22.o cget23.o cget24.o \ cget35.o cget36.o cget37.o cget38.o cget51.o cget52.o \ @@ -88,11 +88,11 @@ DZIGTST = dlafts.o dlahd2.o dlasum.o dlatb9.o dstech.o dstect.o \ DEIGTST = dchkee.o \ dbdt01.o dbdt02.o dbdt03.o dbdt04.o dbdt05.o\ dchkbb.o dchkbd.o dchkbk.o dchkbl.o dchkec.o \ - dchkgg.o dchkgk.o dchkgl.o dchkhs.o dchksb.o dchkst.o \ + dchkgg.o dchkgk.o dchkgl.o dchkhs.o dchksb.o dchkst.o dchkst2stg.o dchksb2stg.o \ dckcsd.o dckglm.o dckgqr.o dckgsv.o dcklse.o dcsdts.o \ ddrges.o ddrgev.o ddrges3.o ddrgev3.o ddrgsx.o ddrgvx.o \ - ddrvbd.o ddrves.o ddrvev.o ddrvsg.o \ - ddrvst.o ddrvsx.o ddrvvx.o \ + ddrvbd.o ddrves.o ddrvev.o ddrvsg.o ddrvsg2stg.o \ + ddrvst.o ddrvst2stg.o ddrvsx.o ddrvvx.o \ derrbd.o derrec.o derred.o derrgg.o derrhs.o derrst.o \ dget02.o dget10.o dget22.o dget23.o dget24.o dget31.o \ dget32.o dget33.o dget34.o dget35.o dget36.o \ @@ -105,11 +105,11 @@ DEIGTST = dchkee.o \ ZEIGTST = zchkee.o \ zbdt01.o zbdt02.o zbdt03.o zbdt05.o\ zchkbb.o zchkbd.o zchkbk.o zchkbl.o zchkec.o \ - zchkgg.o zchkgk.o zchkgl.o zchkhb.o zchkhs.o zchkst.o \ + zchkgg.o zchkgk.o zchkgl.o zchkhb.o zchkhs.o zchkst.o zchkst2stg.o zchkhb2stg.o \ zckcsd.o zckglm.o zckgqr.o zckgsv.o zcklse.o zcsdts.o \ zdrges.o zdrgev.o zdrges3.o zdrgev3.o zdrgsx.o zdrgvx.o \ - zdrvbd.o zdrves.o zdrvev.o zdrvsg.o \ - zdrvst.o zdrvsx.o zdrvvx.o \ + zdrvbd.o zdrves.o zdrvev.o zdrvsg.o zdrvsg2stg.o \ + zdrvst.o zdrvst2stg.o zdrvsx.o zdrvvx.o \ zerrbd.o zerrec.o zerred.o zerrgg.o zerrhs.o zerrst.o \ zget02.o zget10.o zget22.o zget23.o zget24.o \ zget35.o zget36.o zget37.o zget38.o zget51.o zget52.o \ diff --git a/TESTING/EIG/cchkee.f b/TESTING/EIG/cchkee.f index d5f3f729..2fd530f6 100644 --- a/TESTING/EIG/cchkee.f +++ b/TESTING/EIG/cchkee.f @@ -1102,7 +1102,8 @@ $ CDRGEV, CDRGSX, CDRGVX, CDRVBD, CDRVES, CDRVEV, $ CDRVSG, CDRVST, CDRVSX, CDRVVX, CERRBD, $ CERRED, CERRGG, CERRHS, CERRST, ILAVER, XLAENV, - $ CDRGES3, CDRGEV3 + $ CDRGES3, CDRGEV3, + $ CCHKST2STG, CDRVST2STG, CCHKHB2STG * .. * .. Intrinsic Functions .. INTRINSIC LEN, MIN @@ -1149,7 +1150,7 @@ PATH = LINE( 1: 3 ) NEP = LSAMEN( 3, PATH, 'NEP' ) .OR. LSAMEN( 3, PATH, 'CHS' ) SEP = LSAMEN( 3, PATH, 'SEP' ) .OR. LSAMEN( 3, PATH, 'CST' ) .OR. - $ LSAMEN( 3, PATH, 'CSG' ) + $ LSAMEN( 3, PATH, 'CSG' ) .OR. LSAMEN( 3, PATH, 'SE2' ) SVD = LSAMEN( 3, PATH, 'SVD' ) .OR. LSAMEN( 3, PATH, 'CBD' ) CEV = LSAMEN( 3, PATH, 'CEV' ) CES = LSAMEN( 3, PATH, 'CES' ) @@ -1829,7 +1830,8 @@ $ WRITE( NOUT, FMT = 9980 )'CCHKHS', INFO 270 CONTINUE * - ELSE IF( LSAMEN( 3, C3, 'CST' ) .OR. LSAMEN( 3, C3, 'SEP' ) ) THEN + ELSE IF( LSAMEN( 3, C3, 'CST' ) .OR. LSAMEN( 3, C3, 'SEP' ) + $ .OR. LSAMEN( 3, C3, 'SE2' ) ) THEN * * ---------------------------------- * SEP: Symmetric Eigenvalue Problem @@ -1859,6 +1861,17 @@ WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), $ NXVAL( I ) IF( TSTCHK ) THEN + IF( LSAMEN( 3, C3, 'SE2' ) ) THEN + CALL CCHKST2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), + $ DR( 1, 1 ), DR( 1, 2 ), DR( 1, 3 ), + $ DR( 1, 4 ), DR( 1, 5 ), DR( 1, 6 ), + $ DR( 1, 7 ), DR( 1, 8 ), DR( 1, 9 ), + $ DR( 1, 10 ), DR( 1, 11 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), DC( 1, 1 ), A( 1, 6 ), + $ WORK, LWORK, RWORK, LWORK, IWORK, LIWORK, + $ RESULT, INFO ) + ELSE CALL CCHKST( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), $ DR( 1, 1 ), DR( 1, 2 ), DR( 1, 3 ), @@ -1868,16 +1881,26 @@ $ A( 1, 4 ), A( 1, 5 ), DC( 1, 1 ), A( 1, 6 ), $ WORK, LWORK, RWORK, LWORK, IWORK, LIWORK, $ RESULT, INFO ) + ENDIF IF( INFO.NE.0 ) $ WRITE( NOUT, FMT = 9980 )'CCHKST', INFO END IF IF( TSTDRV ) THEN + IF( LSAMEN( 3, C3, 'SE2' ) ) THEN + CALL CDRVST2STG( NN, NVAL, 18, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, DR( 1, 3 ), DR( 1, 4 ), + $ DR( 1, 5 ), DR( 1, 8 ), DR( 1, 9 ), + $ DR( 1, 10 ), A( 1, 2 ), NMAX, A( 1, 3 ), + $ DC( 1, 1 ), A( 1, 4 ), WORK, LWORK, RWORK, + $ LWORK, IWORK, LIWORK, RESULT, INFO ) + ELSE CALL CDRVST( NN, NVAL, 18, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, DR( 1, 3 ), DR( 1, 4 ), - $ DR( 1, 5 ), DR( 1, 8 ), DR( 1, 9 ), - $ DR( 1, 10 ), A( 1, 2 ), NMAX, A( 1, 3 ), - $ DC( 1, 1 ), A( 1, 4 ), WORK, LWORK, RWORK, - $ LWORK, IWORK, LIWORK, RESULT, INFO ) + $ A( 1, 1 ), NMAX, DR( 1, 3 ), DR( 1, 4 ), + $ DR( 1, 5 ), DR( 1, 8 ), DR( 1, 9 ), + $ DR( 1, 10 ), A( 1, 2 ), NMAX, A( 1, 3 ), + $ DC( 1, 1 ), A( 1, 4 ), WORK, LWORK, RWORK, + $ LWORK, IWORK, LIWORK, RESULT, INFO ) + ENDIF IF( INFO.NE.0 ) $ WRITE( NOUT, FMT = 9980 )'CDRVST', INFO END IF @@ -1910,12 +1933,18 @@ WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), $ NXVAL( I ) IF( TSTCHK ) THEN - CALL CDRVSG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, - $ DR( 1, 3 ), A( 1, 3 ), NMAX, A( 1, 4 ), - $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), WORK, - $ LWORK, RWORK, LWORK, IWORK, LIWORK, RESULT, - $ INFO ) +* CALL CDRVSG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, +* $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, +* $ DR( 1, 3 ), A( 1, 3 ), NMAX, A( 1, 4 ), +* $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), WORK, +* $ LWORK, RWORK, LWORK, IWORK, LIWORK, RESULT, +* $ INFO ) + CALL CDRVSG2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, + $ DR( 1, 3 ), DR( 1, 4 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), A( 1, 6 ), + $ A( 1, 7 ), WORK, LWORK, RWORK, LWORK, + $ IWORK, LIWORK, RESULT, INFO ) IF( INFO.NE.0 ) $ WRITE( NOUT, FMT = 9980 )'CDRVSG', INFO END IF @@ -2278,10 +2307,15 @@ CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) IF( TSTERR ) $ CALL CERRST( 'CHB', NOUT ) - CALL CCHKHB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), DR( 1, 2 ), - $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT, - $ INFO ) +* CALL CCHKHB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH, +* $ NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), DR( 1, 2 ), +* $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT, +* $ INFO ) + CALL CCHKHB2STG( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, + $ THRESH, NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), + $ DR( 1, 2 ), DR( 1, 3 ), DR( 1, 4 ), DR( 1, 5 ), + $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT, + $ INFO ) IF( INFO.NE.0 ) $ WRITE( NOUT, FMT = 9980 )'CCHKHB', INFO * diff --git a/TESTING/EIG/cchkhb2stg.f b/TESTING/EIG/cchkhb2stg.f new file mode 100644 index 00000000..d4aba4b9 --- /dev/null +++ b/TESTING/EIG/cchkhb2stg.f @@ -0,0 +1,880 @@ +*> \brief \b CCHKHBSTG +* +* @generated from zchkhb2stg.f, fortran z -> c, Sun Nov 6 00:22:35 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE CCHKHBSTG( NSIZES, NN, NWDTHS, KK, NTYPES, DOTYPE, +* ISEED, THRESH, NOUNIT, A, LDA, SD, SE, D1, +* D2, D3, U, LDU, WORK, LWORK, RWORK RESULT, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDU, LWORK, NOUNIT, NSIZES, NTYPES, +* $ NWDTHS +* REAL THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER ISEED( 4 ), KK( * ), NN( * ) +* REAL RESULT( * ), RWORK( * ), SD( * ), SE( * ) +* COMPLEX A( LDA, * ), U( LDU, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CCHKHBSTG tests the reduction of a Hermitian band matrix to tridiagonal +*> from, used with the Hermitian eigenvalue problem. +*> +*> CHBTRD factors a Hermitian band matrix A as U S U* , where * means +*> conjugate transpose, S is symmetric tridiagonal, and U is unitary. +*> CHBTRD can use either just the lower or just the upper triangle +*> of A; CCHKHBSTG checks both cases. +*> +*> CHETRD_HB2ST factors a Hermitian band matrix A as U S U* , +*> where * means conjugate transpose, S is symmetric tridiagonal, and U is +*> unitary. CHETRD_HB2ST can use either just the lower or just +*> the upper triangle of A; CCHKHBSTG checks both cases. +*> +*> DSTEQR factors S as Z D1 Z'. +*> D1 is the matrix of eigenvalues computed when Z is not computed +*> and from the S resulting of DSBTRD "U" (used as reference for DSYTRD_SB2ST) +*> D2 is the matrix of eigenvalues computed when Z is not computed +*> and from the S resulting of DSYTRD_SB2ST "U". +*> D3 is the matrix of eigenvalues computed when Z is not computed +*> and from the S resulting of DSYTRD_SB2ST "L". +*> +*> When CCHKHBSTG is called, a number of matrix "sizes" ("n's"), a number +*> of bandwidths ("k's"), and a number of matrix "types" are +*> specified. For each size ("n"), each bandwidth ("k") less than or +*> equal to "n", and each type of matrix, one matrix will be generated +*> and used to test the hermitian banded reduction routine. For each +*> matrix, a number of tests will be performed: +*> +*> (1) | A - V S V* | / ( |A| n ulp ) computed by CHBTRD with +*> UPLO='U' +*> +*> (2) | I - UU* | / ( n ulp ) +*> +*> (3) | A - V S V* | / ( |A| n ulp ) computed by CHBTRD with +*> UPLO='L' +*> +*> (4) | I - UU* | / ( n ulp ) +*> +*> (5) | D1 - D2 | / ( |D1| ulp ) where D1 is computed by +*> DSBTRD with UPLO='U' and +*> D2 is computed by +*> CHETRD_HB2ST with UPLO='U' +*> +*> (6) | D1 - D3 | / ( |D1| ulp ) where D1 is computed by +*> DSBTRD with UPLO='U' and +*> D3 is computed by +*> CHETRD_HB2ST with UPLO='L' +*> +*> The "sizes" are specified by an array NN(1:NSIZES); the value of +*> each element NN(j) specifies one size. +*> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); +*> if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. +*> Currently, the list of possible types is: +*> +*> (1) The zero matrix. +*> (2) The identity matrix. +*> +*> (3) A diagonal matrix with evenly spaced entries +*> 1, ..., ULP and random signs. +*> (ULP = (first number larger than 1) - 1 ) +*> (4) A diagonal matrix with geometrically spaced entries +*> 1, ..., ULP and random signs. +*> (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP +*> and random signs. +*> +*> (6) Same as (4), but multiplied by SQRT( overflow threshold ) +*> (7) Same as (4), but multiplied by SQRT( underflow threshold ) +*> +*> (8) A matrix of the form U* D U, where U is unitary and +*> D has evenly spaced entries 1, ..., ULP with random signs +*> on the diagonal. +*> +*> (9) A matrix of the form U* D U, where U is unitary and +*> D has geometrically spaced entries 1, ..., ULP with random +*> signs on the diagonal. +*> +*> (10) A matrix of the form U* D U, where U is unitary and +*> D has "clustered" entries 1, ULP,..., ULP with random +*> signs on the diagonal. +*> +*> (11) Same as (8), but multiplied by SQRT( overflow threshold ) +*> (12) Same as (8), but multiplied by SQRT( underflow threshold ) +*> +*> (13) Hermitian matrix with random entries chosen from (-1,1). +*> (14) Same as (13), but multiplied by SQRT( overflow threshold ) +*> (15) Same as (13), but multiplied by SQRT( underflow threshold ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NSIZES +*> \verbatim +*> NSIZES is INTEGER +*> The number of sizes of matrices to use. If it is zero, +*> CCHKHBSTG does nothing. It must be at least zero. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER array, dimension (NSIZES) +*> An array containing the sizes to be used for the matrices. +*> Zero values will be skipped. The values must be at least +*> zero. +*> \endverbatim +*> +*> \param[in] NWDTHS +*> \verbatim +*> NWDTHS is INTEGER +*> The number of bandwidths to use. If it is zero, +*> CCHKHBSTG does nothing. It must be at least zero. +*> \endverbatim +*> +*> \param[in] KK +*> \verbatim +*> KK is INTEGER array, dimension (NWDTHS) +*> An array containing the bandwidths to be used for the band +*> matrices. The values must be at least zero. +*> \endverbatim +*> +*> \param[in] NTYPES +*> \verbatim +*> NTYPES is INTEGER +*> The number of elements in DOTYPE. If it is zero, CCHKHBSTG +*> does nothing. It must be at least zero. If it is MAXTYP+1 +*> and NSIZES is 1, then an additional type, MAXTYP+1 is +*> defined, which is to use whatever matrix is in A. This +*> is only useful if DOTYPE(1:MAXTYP) is .FALSE. and +*> DOTYPE(MAXTYP+1) is .TRUE. . +*> \endverbatim +*> +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> If DOTYPE(j) is .TRUE., then for each size in NN a +*> matrix of that size and of type j will be generated. +*> If NTYPES is smaller than the maximum number of types +*> defined (PARAMETER MAXTYP), then types NTYPES+1 through +*> MAXTYP will not be generated. If NTYPES is larger +*> than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) +*> will be ignored. +*> \endverbatim +*> +*> \param[in,out] ISEED +*> \verbatim +*> ISEED is INTEGER array, dimension (4) +*> On entry ISEED specifies the seed of the random number +*> generator. The array elements should be between 0 and 4095; +*> if not they will be reduced mod 4096. Also, ISEED(4) must +*> be odd. The random number generator uses a linear +*> congruential sequence limited to small integers, and so +*> should produce machine independent random numbers. The +*> values of ISEED are changed on exit, and can be used in the +*> next call to CCHKHBSTG to continue the same random number +*> sequence. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is REAL +*> A test will count as "failed" if the "error", computed as +*> described above, exceeds THRESH. Note that the error +*> is scaled to be O(1), so THRESH should be a reasonably +*> small multiple of 1, e.g., 10 or 100. In particular, +*> it should not depend on the precision (single vs. double) +*> or the size of the matrix. It must be at least zero. +*> \endverbatim +*> +*> \param[in] NOUNIT +*> \verbatim +*> NOUNIT is INTEGER +*> The FORTRAN unit number for printing out error messages +*> (e.g., if a routine returns IINFO not equal to 0.) +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension +*> (LDA, max(NN)) +*> Used to hold the matrix whose eigenvalues are to be +*> computed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. It must be at least 2 (not 1!) +*> and at least max( KK )+1. +*> \endverbatim +*> +*> \param[out] SD +*> \verbatim +*> SD is REAL array, dimension (max(NN)) +*> Used to hold the diagonal of the tridiagonal matrix computed +*> by CHBTRD. +*> \endverbatim +*> +*> \param[out] SE +*> \verbatim +*> SE is REAL array, dimension (max(NN)) +*> Used to hold the off-diagonal of the tridiagonal matrix +*> computed by CHBTRD. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX array, dimension (LDU, max(NN)) +*> Used to hold the unitary matrix computed by CHBTRD. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of U. It must be at least 1 +*> and at least max( NN ). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The number of entries in WORK. This must be at least +*> max( LDA+1, max(NN)+1 )*max(NN). +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array +*> \endverbatim +*> +*> \param[out] RESULT +*> \verbatim +*> RESULT is REAL array, dimension (4) +*> The values computed by the tests described above. +*> The values are currently limited to 1/ulp, to avoid +*> overflow. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> If 0, then everything ran OK. +*> +*>----------------------------------------------------------------------- +*> +*> Some Local Variables and Parameters: +*> ---- ----- --------- --- ---------- +*> ZERO, ONE Real 0 and 1. +*> MAXTYP The number of types defined. +*> NTEST The number of tests performed, or which can +*> be performed so far, for the current matrix. +*> NTESTT The total number of tests performed so far. +*> NMAX Largest value in NN. +*> NMATS The number of matrices generated so far. +*> NERRS The number of tests which have exceeded THRESH +*> so far. +*> COND, IMODE Values to be passed to the matrix generators. +*> ANORM Norm of A; passed to matrix generators. +*> +*> OVFL, UNFL Overflow and underflow thresholds. +*> ULP, ULPINV Finest relative precision and its inverse. +*> RTOVFL, RTUNFL Square roots of the previous 2 values. +*> The following four arrays decode JTYPE: +*> KTYPE(j) The general type (1-10) for type "j". +*> KMODE(j) The MODE value to be passed to the matrix +*> generator for type "j". +*> KMAGN(j) The order of magnitude ( O(1), +*> O(overflow^(1/2) ), O(underflow^(1/2) ) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex_eig +* +* ===================================================================== + SUBROUTINE CCHKHB2STG( NSIZES, NN, NWDTHS, KK, NTYPES, DOTYPE, + $ ISEED, THRESH, NOUNIT, A, LDA, SD, SE, D1, + $ D2, D3, U, LDU, WORK, LWORK, RWORK, RESULT, + $ INFO ) +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDU, LWORK, NOUNIT, NSIZES, NTYPES, + $ NWDTHS + REAL THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER ISEED( 4 ), KK( * ), NN( * ) + REAL RESULT( * ), RWORK( * ), SD( * ), SE( * ), + $ D1( * ), D2( * ), D3( * ) + COMPLEX A( LDA, * ), U( LDU, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) + REAL ZERO, ONE, TWO, TEN + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0, + $ TEN = 10.0E+0 ) + REAL HALF + PARAMETER ( HALF = ONE / TWO ) + INTEGER MAXTYP + PARAMETER ( MAXTYP = 15 ) +* .. +* .. Local Scalars .. + LOGICAL BADNN, BADNNB + INTEGER I, IINFO, IMODE, ITYPE, J, JC, JCOL, JR, JSIZE, + $ JTYPE, JWIDTH, K, KMAX, MTYPES, N, NERRS, + $ NMATS, NMAX, NTEST, NTESTT + REAL ANINV, ANORM, COND, OVFL, RTOVFL, RTUNFL, + $ TEMP1, TEMP2, TEMP3, TEMP4, ULP, ULPINV, UNFL +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ), IOLDSD( 4 ), KMAGN( MAXTYP ), + $ KMODE( MAXTYP ), KTYPE( MAXTYP ) +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLASUM, XERBLA, CHBT21, CHBTRD, CLACPY, CLASET, + $ CLATMR, CLATMS, CHBTRD_HB2ST, CSTEQR +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, CONJG, MAX, MIN, SQRT +* .. +* .. Data statements .. + DATA KTYPE / 1, 2, 5*4, 5*5, 3*8 / + DATA KMAGN / 2*1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, + $ 2, 3 / + DATA KMODE / 2*0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, + $ 0, 0 / +* .. +* .. Executable Statements .. +* +* Check for errors +* + NTESTT = 0 + INFO = 0 +* +* Important constants +* + BADNN = .FALSE. + NMAX = 1 + DO 10 J = 1, NSIZES + NMAX = MAX( NMAX, NN( J ) ) + IF( NN( J ).LT.0 ) + $ BADNN = .TRUE. + 10 CONTINUE +* + BADNNB = .FALSE. + KMAX = 0 + DO 20 J = 1, NSIZES + KMAX = MAX( KMAX, KK( J ) ) + IF( KK( J ).LT.0 ) + $ BADNNB = .TRUE. + 20 CONTINUE + KMAX = MIN( NMAX-1, KMAX ) +* +* Check for errors +* + IF( NSIZES.LT.0 ) THEN + INFO = -1 + ELSE IF( BADNN ) THEN + INFO = -2 + ELSE IF( NWDTHS.LT.0 ) THEN + INFO = -3 + ELSE IF( BADNNB ) THEN + INFO = -4 + ELSE IF( NTYPES.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.KMAX+1 ) THEN + INFO = -11 + ELSE IF( LDU.LT.NMAX ) THEN + INFO = -15 + ELSE IF( ( MAX( LDA, NMAX )+1 )*NMAX.GT.LWORK ) THEN + INFO = -17 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CCHKHBSTG', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( NSIZES.EQ.0 .OR. NTYPES.EQ.0 .OR. NWDTHS.EQ.0 ) + $ RETURN +* +* More Important constants +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + ULP = SLAMCH( 'Epsilon' )*SLAMCH( 'Base' ) + ULPINV = ONE / ULP + RTUNFL = SQRT( UNFL ) + RTOVFL = SQRT( OVFL ) +* +* Loop over sizes, types +* + NERRS = 0 + NMATS = 0 +* + DO 190 JSIZE = 1, NSIZES + N = NN( JSIZE ) + ANINV = ONE / REAL( MAX( 1, N ) ) +* + DO 180 JWIDTH = 1, NWDTHS + K = KK( JWIDTH ) + IF( K.GT.N ) + $ GO TO 180 + K = MAX( 0, MIN( N-1, K ) ) +* + IF( NSIZES.NE.1 ) THEN + MTYPES = MIN( MAXTYP, NTYPES ) + ELSE + MTYPES = MIN( MAXTYP+1, NTYPES ) + END IF +* + DO 170 JTYPE = 1, MTYPES + IF( .NOT.DOTYPE( JTYPE ) ) + $ GO TO 170 + NMATS = NMATS + 1 + NTEST = 0 +* + DO 30 J = 1, 4 + IOLDSD( J ) = ISEED( J ) + 30 CONTINUE +* +* Compute "A". +* Store as "Upper"; later, we will copy to other format. +* +* Control parameters: +* +* KMAGN KMODE KTYPE +* =1 O(1) clustered 1 zero +* =2 large clustered 2 identity +* =3 small exponential (none) +* =4 arithmetic diagonal, (w/ eigenvalues) +* =5 random log hermitian, w/ eigenvalues +* =6 random (none) +* =7 random diagonal +* =8 random hermitian +* =9 positive definite +* =10 diagonally dominant tridiagonal +* + IF( MTYPES.GT.MAXTYP ) + $ GO TO 100 +* + ITYPE = KTYPE( JTYPE ) + IMODE = KMODE( JTYPE ) +* +* Compute norm +* + GO TO ( 40, 50, 60 )KMAGN( JTYPE ) +* + 40 CONTINUE + ANORM = ONE + GO TO 70 +* + 50 CONTINUE + ANORM = ( RTOVFL*ULP )*ANINV + GO TO 70 +* + 60 CONTINUE + ANORM = RTUNFL*N*ULPINV + GO TO 70 +* + 70 CONTINUE +* + CALL CLASET( 'Full', LDA, N, CZERO, CZERO, A, LDA ) + IINFO = 0 + IF( JTYPE.LE.15 ) THEN + COND = ULPINV + ELSE + COND = ULPINV*ANINV / TEN + END IF +* +* Special Matrices -- Identity & Jordan block +* +* Zero +* + IF( ITYPE.EQ.1 ) THEN + IINFO = 0 +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Identity +* + DO 80 JCOL = 1, N + A( K+1, JCOL ) = ANORM + 80 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Diagonal Matrix, [Eigen]values Specified +* + CALL CLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, + $ COND, ANORM, 0, 0, 'Q', A( K+1, 1 ), LDA, + $ WORK, IINFO ) +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Hermitian, eigenvalues specified +* + CALL CLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, + $ COND, ANORM, K, K, 'Q', A, LDA, WORK, + $ IINFO ) +* + ELSE IF( ITYPE.EQ.7 ) THEN +* +* Diagonal, random eigenvalues +* + CALL CLATMR( N, N, 'S', ISEED, 'H', WORK, 6, ONE, + $ CONE, 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, 0, 0, + $ ZERO, ANORM, 'Q', A( K+1, 1 ), LDA, + $ IDUMMA, IINFO ) +* + ELSE IF( ITYPE.EQ.8 ) THEN +* +* Hermitian, random eigenvalues +* + CALL CLATMR( N, N, 'S', ISEED, 'H', WORK, 6, ONE, + $ CONE, 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, K, K, + $ ZERO, ANORM, 'Q', A, LDA, IDUMMA, IINFO ) +* + ELSE IF( ITYPE.EQ.9 ) THEN +* +* Positive definite, eigenvalues specified. +* + CALL CLATMS( N, N, 'S', ISEED, 'P', RWORK, IMODE, + $ COND, ANORM, K, K, 'Q', A, LDA, + $ WORK( N+1 ), IINFO ) +* + ELSE IF( ITYPE.EQ.10 ) THEN +* +* Positive definite tridiagonal, eigenvalues specified. +* + IF( N.GT.1 ) + $ K = MAX( 1, K ) + CALL CLATMS( N, N, 'S', ISEED, 'P', RWORK, IMODE, + $ COND, ANORM, 1, 1, 'Q', A( K, 1 ), LDA, + $ WORK, IINFO ) + DO 90 I = 2, N + TEMP1 = ABS( A( K, I ) ) / + $ SQRT( ABS( A( K+1, I-1 )*A( K+1, I ) ) ) + IF( TEMP1.GT.HALF ) THEN + A( K, I ) = HALF*SQRT( ABS( A( K+1, + $ I-1 )*A( K+1, I ) ) ) + END IF + 90 CONTINUE +* + ELSE +* + IINFO = 1 + END IF +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'Generator', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + RETURN + END IF +* + 100 CONTINUE +* +* Call CHBTRD to compute S and U from upper triangle. +* + CALL CLACPY( ' ', K+1, N, A, LDA, WORK, LDA ) +* + NTEST = 1 + CALL CHBTRD( 'V', 'U', N, K, WORK, LDA, SD, SE, U, LDU, + $ WORK( LDA*N+1 ), IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHBTRD(U)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 1 ) = ULPINV + GO TO 150 + END IF + END IF +* +* Do tests 1 and 2 +* + CALL CHBT21( 'Upper', N, K, 1, A, LDA, SD, SE, U, LDU, + $ WORK, RWORK, RESULT( 1 ) ) +* +* Before converting A into lower for DSBTRD, run DSYTRD_SB2ST +* otherwise matrix A will be converted to lower and then need +* to be converted back to upper in order to run the upper case +* ofDSYTRD_SB2ST +* +* Compute D1 the eigenvalues resulting from the tridiagonal +* form using the DSBTRD and used as reference to compare +* with the DSYTRD_SB2ST routine +* +* Compute D1 from the DSBTRD and used as reference for the +* DSYTRD_SB2ST +* + CALL SCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK, 1 ) +* + CALL CSTEQR( 'N', N, D1, RWORK, WORK, LDU, + $ RWORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEQR(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 5 ) = ULPINV + GO TO 150 + END IF + END IF +* +* DSYTRD_SB2ST Upper case is used to compute D2. +* Note to set SD and SE to zero to be sure not reusing +* the one from above. Compare it with D1 computed +* using the DSBTRD. +* + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL CLACPY( ' ', K+1, N, A, LDA, U, LDU ) + LH = MAX(1, 4*N) + LW = LWORK - LH + CALL CHETRD_HB2ST( 'N', 'N', "U", N, K, U, LDU, SD, SE, + $ WORK, LH, WORK( LH+1 ), LW, IINFO ) +* +* Compute D2 from the DSYTRD_SB2ST Upper case +* + CALL SCOPY( N, SD, 1, D2, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK, 1 ) +* + CALL CSTEQR( 'N', N, D2, RWORK, WORK, LDU, + $ RWORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEQR(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 5 ) = ULPINV + GO TO 150 + END IF + END IF +* +* Convert A from Upper-Triangle-Only storage to +* Lower-Triangle-Only storage. +* + DO 120 JC = 1, N + DO 110 JR = 0, MIN( K, N-JC ) + A( JR+1, JC ) = CONJG( A( K+1-JR, JC+JR ) ) + 110 CONTINUE + 120 CONTINUE + DO 140 JC = N + 1 - K, N + DO 130 JR = MIN( K, N-JC ) + 1, K + A( JR+1, JC ) = ZERO + 130 CONTINUE + 140 CONTINUE +* +* Call CHBTRD to compute S and U from lower triangle +* + CALL CLACPY( ' ', K+1, N, A, LDA, WORK, LDA ) +* + NTEST = 3 + CALL CHBTRD( 'V', 'L', N, K, WORK, LDA, SD, SE, U, LDU, + $ WORK( LDA*N+1 ), IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHBTRD(L)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 3 ) = ULPINV + GO TO 150 + END IF + END IF + NTEST = 4 +* +* Do tests 3 and 4 +* + CALL CHBT21( 'Lower', N, K, 1, A, LDA, SD, SE, U, LDU, + $ WORK, RWORK, RESULT( 3 ) ) +* +* DSYTRD_SB2ST Lower case is used to compute D3. +* Note to set SD and SE to zero to be sure not reusing +* the one from above. Compare it with D1 computed +* using the DSBTRD. +* + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL CLACPY( ' ', K+1, N, A, LDA, U, LDU ) + LH = MAX(1, 4*N) + LW = LWORK - LH + CALL CHETRD_HB2ST( 'N', 'N', "L", N, K, U, LDU, SD, SE, + $ WORK, LH, WORK( LH+1 ), LW, IINFO ) +* +* Compute D3 from the 2-stage Upper case +* + CALL SCOPY( N, SD, 1, D3, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK, 1 ) +* + CALL CSTEQR( 'N', N, D3, RWORK, WORK, LDU, + $ RWORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEQR(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 6 ) = ULPINV + GO TO 150 + END IF + END IF +* +* +* Do Tests 3 and 4 which are similar to 11 and 12 but with the +* D1 computed using the standard 1-stage reduction as reference +* + NTEST = 6 + TEMP1 = ZERO + TEMP2 = ZERO + TEMP3 = ZERO + TEMP4 = ZERO +* + DO 151 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + TEMP3 = MAX( TEMP3, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP4 = MAX( TEMP4, ABS( D1( J )-D3( J ) ) ) + 151 CONTINUE +* + RESULT(5) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) + RESULT(6) = TEMP4 / MAX( UNFL, ULP*MAX( TEMP3, TEMP4 ) ) +* +* End of Loop -- Check for RESULT(j) > THRESH +* + 150 CONTINUE + NTESTT = NTESTT + NTEST +* +* Print out tests which fail. +* + DO 160 JR = 1, NTEST + IF( RESULT( JR ).GE.THRESH ) THEN +* +* If this is the first test to fail, +* print a header to the data file. +* + IF( NERRS.EQ.0 ) THEN + WRITE( NOUNIT, FMT = 9998 )'CHB' + WRITE( NOUNIT, FMT = 9997 ) + WRITE( NOUNIT, FMT = 9996 ) + WRITE( NOUNIT, FMT = 9995 )'Hermitian' + WRITE( NOUNIT, FMT = 9994 )'unitary', '*', + $ 'conjugate transpose', ( '*', J = 1, 6 ) + END IF + NERRS = NERRS + 1 + WRITE( NOUNIT, FMT = 9993 )N, K, IOLDSD, JTYPE, + $ JR, RESULT( JR ) + END IF + 160 CONTINUE +* + 170 CONTINUE + 180 CONTINUE + 190 CONTINUE +* +* Summary +* + CALL SLASUM( 'CHB', NOUNIT, NERRS, NTESTT ) + RETURN +* + 9999 FORMAT( ' CCHKHBSTG: ', A, ' returned INFO=', I6, '.', / 9X, 'N=', + $ I6, ', JTYPE=', I6, ', ISEED=(', 3( I5, ',' ), I5, ')' ) + 9998 FORMAT( / 1X, A3, + $ ' -- Complex Hermitian Banded Tridiagonal Reduction Routines' + $ ) + 9997 FORMAT( ' Matrix types (see SCHK23 for details): ' ) +* + 9996 FORMAT( / ' Special Matrices:', + $ / ' 1=Zero matrix. ', + $ ' 5=Diagonal: clustered entries.', + $ / ' 2=Identity matrix. ', + $ ' 6=Diagonal: large, evenly spaced.', + $ / ' 3=Diagonal: evenly spaced entries. ', + $ ' 7=Diagonal: small, evenly spaced.', + $ / ' 4=Diagonal: geometr. spaced entries.' ) + 9995 FORMAT( ' Dense ', A, ' Banded Matrices:', + $ / ' 8=Evenly spaced eigenvals. ', + $ ' 12=Small, evenly spaced eigenvals.', + $ / ' 9=Geometrically spaced eigenvals. ', + $ ' 13=Matrix with random O(1) entries.', + $ / ' 10=Clustered eigenvalues. ', + $ ' 14=Matrix with large random entries.', + $ / ' 11=Large, evenly spaced eigenvals. ', + $ ' 15=Matrix with small random entries.' ) +* + 9994 FORMAT( / ' Tests performed: (S is Tridiag, U is ', A, ',', + $ / 20X, A, ' means ', A, '.', / ' UPLO=''U'':', + $ / ' 1= | A - U S U', A1, ' | / ( |A| n ulp ) ', + $ ' 2= | I - U U', A1, ' | / ( n ulp )', / ' UPLO=''L'':', + $ / ' 3= | A - U S U', A1, ' | / ( |A| n ulp ) ', + $ ' 4= | I - U U', A1, ' | / ( n ulp )' / ' Eig check:', + $ /' 5= | D1 - D2', '', ' | / ( |D1| ulp ) ', + $ ' 6= | D1 - D3', '', ' | / ( |D1| ulp ) ' ) + 9993 FORMAT( ' N=', I5, ', K=', I4, ', seed=', 4( I4, ',' ), ' type ', + $ I2, ', test(', I2, ')=', G10.3 ) +* +* End of CCHKHBSTG +* + END diff --git a/TESTING/EIG/cchkst2stg.f b/TESTING/EIG/cchkst2stg.f new file mode 100644 index 00000000..84bf432a --- /dev/null +++ b/TESTING/EIG/cchkst2stg.f @@ -0,0 +1,2145 @@ +*> \brief \b CCHKST2STG +* +* @generated from zchkst2stg.f, fortran z -> c, Fri Nov 4 15:45:07 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE CCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, +* NOUNIT, A, LDA, AP, SD, SE, D1, D2, D3, D4, D5, +* WA1, WA2, WA3, WR, U, LDU, V, VP, TAU, Z, WORK, +* LWORK, RWORK, LRWORK, IWORK, LIWORK, RESULT, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDU, LIWORK, LRWORK, LWORK, NOUNIT, +* $ NSIZES, NTYPES +* REAL THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER ISEED( 4 ), IWORK( * ), NN( * ) +* REAL D1( * ), D2( * ), D3( * ), D4( * ), D5( * ), +* $ RESULT( * ), RWORK( * ), SD( * ), SE( * ), +* $ WA1( * ), WA2( * ), WA3( * ), WR( * ) +* COMPLEX A( LDA, * ), AP( * ), TAU( * ), U( LDU, * ), +* $ V( LDU, * ), VP( * ), WORK( * ), Z( LDU, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CCHKST2STG checks the Hermitian eigenvalue problem routines +*> using the 2-stage reduction techniques. Since the generation +*> of Q or the vectors is not available in this release, we only +*> compare the eigenvalue resulting when using the 2-stage to the +*> one considered as reference using the standard 1-stage reduction +*> CHETRD. For that, we call the standard CHETRD and compute D1 using +*> DSTEQR, then we call the 2-stage CHETRD_2STAGE with Upper and Lower +*> and we compute D2 and D3 using DSTEQR and then we replaced tests +*> 3 and 4 by tests 11 and 12. test 1 and 2 remain to verify that +*> the 1-stage results are OK and can be trusted. +*> This testing routine will converge to the CCHKST in the next +*> release when vectors and generation of Q will be implemented. +*> +*> CHETRD factors A as U S U* , where * means conjugate transpose, +*> S is real symmetric tridiagonal, and U is unitary. +*> CHETRD can use either just the lower or just the upper triangle +*> of A; CCHKST2STG checks both cases. +*> U is represented as a product of Householder +*> transformations, whose vectors are stored in the first +*> n-1 columns of V, and whose scale factors are in TAU. +*> +*> CHPTRD does the same as CHETRD, except that A and V are stored +*> in "packed" format. +*> +*> CUNGTR constructs the matrix U from the contents of V and TAU. +*> +*> CUPGTR constructs the matrix U from the contents of VP and TAU. +*> +*> CSTEQR factors S as Z D1 Z* , where Z is the unitary +*> matrix of eigenvectors and D1 is a diagonal matrix with +*> the eigenvalues on the diagonal. D2 is the matrix of +*> eigenvalues computed when Z is not computed. +*> +*> SSTERF computes D3, the matrix of eigenvalues, by the +*> PWK method, which does not yield eigenvectors. +*> +*> CPTEQR factors S as Z4 D4 Z4* , for a +*> Hermitian positive definite tridiagonal matrix. +*> D5 is the matrix of eigenvalues computed when Z is not +*> computed. +*> +*> SSTEBZ computes selected eigenvalues. WA1, WA2, and +*> WA3 will denote eigenvalues computed to high +*> absolute accuracy, with different range options. +*> WR will denote eigenvalues computed to high relative +*> accuracy. +*> +*> CSTEIN computes Y, the eigenvectors of S, given the +*> eigenvalues. +*> +*> CSTEDC factors S as Z D1 Z* , where Z is the unitary +*> matrix of eigenvectors and D1 is a diagonal matrix with +*> the eigenvalues on the diagonal ('I' option). It may also +*> update an input unitary matrix, usually the output +*> from CHETRD/CUNGTR or CHPTRD/CUPGTR ('V' option). It may +*> also just compute eigenvalues ('N' option). +*> +*> CSTEMR factors S as Z D1 Z* , where Z is the unitary +*> matrix of eigenvectors and D1 is a diagonal matrix with +*> the eigenvalues on the diagonal ('I' option). CSTEMR +*> uses the Relatively Robust Representation whenever possible. +*> +*> When CCHKST2STG is called, a number of matrix "sizes" ("n's") and a +*> number of matrix "types" are specified. For each size ("n") +*> and each type of matrix, one matrix will be generated and used +*> to test the Hermitian eigenroutines. For each matrix, a number +*> of tests will be performed: +*> +*> (1) | A - V S V* | / ( |A| n ulp ) CHETRD( UPLO='U', ... ) +*> +*> (2) | I - UV* | / ( n ulp ) CUNGTR( UPLO='U', ... ) +*> +*> (3) | A - V S V* | / ( |A| n ulp ) CHETRD( UPLO='L', ... ) +*> replaced by | D1 - D2 | / ( |D1| ulp ) where D1 is the +*> eigenvalue matrix computed using S and D2 is the +*> eigenvalue matrix computed using S_2stage the output of +*> CHETRD_2STAGE("N", "U",....). D1 and D2 are computed +*> via DSTEQR('N',...) +*> +*> (4) | I - UV* | / ( n ulp ) CUNGTR( UPLO='L', ... ) +*> replaced by | D1 - D3 | / ( |D1| ulp ) where D1 is the +*> eigenvalue matrix computed using S and D3 is the +*> eigenvalue matrix computed using S_2stage the output of +*> CHETRD_2STAGE("N", "L",....). D1 and D3 are computed +*> via DSTEQR('N',...) +*> +*> (5-8) Same as 1-4, but for CHPTRD and CUPGTR. +*> +*> (9) | S - Z D Z* | / ( |S| n ulp ) CSTEQR('V',...) +*> +*> (10) | I - ZZ* | / ( n ulp ) CSTEQR('V',...) +*> +*> (11) | D1 - D2 | / ( |D1| ulp ) CSTEQR('N',...) +*> +*> (12) | D1 - D3 | / ( |D1| ulp ) SSTERF +*> +*> (13) 0 if the true eigenvalues (computed by sturm count) +*> of S are within THRESH of +*> those in D1. 2*THRESH if they are not. (Tested using +*> SSTECH) +*> +*> For S positive definite, +*> +*> (14) | S - Z4 D4 Z4* | / ( |S| n ulp ) CPTEQR('V',...) +*> +*> (15) | I - Z4 Z4* | / ( n ulp ) CPTEQR('V',...) +*> +*> (16) | D4 - D5 | / ( 100 |D4| ulp ) CPTEQR('N',...) +*> +*> When S is also diagonally dominant by the factor gamma < 1, +*> +*> (17) max | D4(i) - WR(i) | / ( |D4(i)| omega ) , +*> i +*> omega = 2 (2n-1) ULP (1 + 8 gamma**2) / (1 - gamma)**4 +*> SSTEBZ( 'A', 'E', ...) +*> +*> (18) | WA1 - D3 | / ( |D3| ulp ) SSTEBZ( 'A', 'E', ...) +*> +*> (19) ( max { min | WA2(i)-WA3(j) | } + +*> i j +*> max { min | WA3(i)-WA2(j) | } ) / ( |D3| ulp ) +*> i j +*> SSTEBZ( 'I', 'E', ...) +*> +*> (20) | S - Y WA1 Y* | / ( |S| n ulp ) SSTEBZ, CSTEIN +*> +*> (21) | I - Y Y* | / ( n ulp ) SSTEBZ, CSTEIN +*> +*> (22) | S - Z D Z* | / ( |S| n ulp ) CSTEDC('I') +*> +*> (23) | I - ZZ* | / ( n ulp ) CSTEDC('I') +*> +*> (24) | S - Z D Z* | / ( |S| n ulp ) CSTEDC('V') +*> +*> (25) | I - ZZ* | / ( n ulp ) CSTEDC('V') +*> +*> (26) | D1 - D2 | / ( |D1| ulp ) CSTEDC('V') and +*> CSTEDC('N') +*> +*> Test 27 is disabled at the moment because CSTEMR does not +*> guarantee high relatvie accuracy. +*> +*> (27) max | D6(i) - WR(i) | / ( |D6(i)| omega ) , +*> i +*> omega = 2 (2n-1) ULP (1 + 8 gamma**2) / (1 - gamma)**4 +*> CSTEMR('V', 'A') +*> +*> (28) max | D6(i) - WR(i) | / ( |D6(i)| omega ) , +*> i +*> omega = 2 (2n-1) ULP (1 + 8 gamma**2) / (1 - gamma)**4 +*> CSTEMR('V', 'I') +*> +*> Tests 29 through 34 are disable at present because CSTEMR +*> does not handle partial specturm requests. +*> +*> (29) | S - Z D Z* | / ( |S| n ulp ) CSTEMR('V', 'I') +*> +*> (30) | I - ZZ* | / ( n ulp ) CSTEMR('V', 'I') +*> +*> (31) ( max { min | WA2(i)-WA3(j) | } + +*> i j +*> max { min | WA3(i)-WA2(j) | } ) / ( |D3| ulp ) +*> i j +*> CSTEMR('N', 'I') vs. CSTEMR('V', 'I') +*> +*> (32) | S - Z D Z* | / ( |S| n ulp ) CSTEMR('V', 'V') +*> +*> (33) | I - ZZ* | / ( n ulp ) CSTEMR('V', 'V') +*> +*> (34) ( max { min | WA2(i)-WA3(j) | } + +*> i j +*> max { min | WA3(i)-WA2(j) | } ) / ( |D3| ulp ) +*> i j +*> CSTEMR('N', 'V') vs. CSTEMR('V', 'V') +*> +*> (35) | S - Z D Z* | / ( |S| n ulp ) CSTEMR('V', 'A') +*> +*> (36) | I - ZZ* | / ( n ulp ) CSTEMR('V', 'A') +*> +*> (37) ( max { min | WA2(i)-WA3(j) | } + +*> i j +*> max { min | WA3(i)-WA2(j) | } ) / ( |D3| ulp ) +*> i j +*> CSTEMR('N', 'A') vs. CSTEMR('V', 'A') +*> +*> The "sizes" are specified by an array NN(1:NSIZES); the value of +*> each element NN(j) specifies one size. +*> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); +*> if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. +*> Currently, the list of possible types is: +*> +*> (1) The zero matrix. +*> (2) The identity matrix. +*> +*> (3) A diagonal matrix with evenly spaced entries +*> 1, ..., ULP and random signs. +*> (ULP = (first number larger than 1) - 1 ) +*> (4) A diagonal matrix with geometrically spaced entries +*> 1, ..., ULP and random signs. +*> (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP +*> and random signs. +*> +*> (6) Same as (4), but multiplied by SQRT( overflow threshold ) +*> (7) Same as (4), but multiplied by SQRT( underflow threshold ) +*> +*> (8) A matrix of the form U* D U, where U is unitary and +*> D has evenly spaced entries 1, ..., ULP with random signs +*> on the diagonal. +*> +*> (9) A matrix of the form U* D U, where U is unitary and +*> D has geometrically spaced entries 1, ..., ULP with random +*> signs on the diagonal. +*> +*> (10) A matrix of the form U* D U, where U is unitary and +*> D has "clustered" entries 1, ULP,..., ULP with random +*> signs on the diagonal. +*> +*> (11) Same as (8), but multiplied by SQRT( overflow threshold ) +*> (12) Same as (8), but multiplied by SQRT( underflow threshold ) +*> +*> (13) Hermitian matrix with random entries chosen from (-1,1). +*> (14) Same as (13), but multiplied by SQRT( overflow threshold ) +*> (15) Same as (13), but multiplied by SQRT( underflow threshold ) +*> (16) Same as (8), but diagonal elements are all positive. +*> (17) Same as (9), but diagonal elements are all positive. +*> (18) Same as (10), but diagonal elements are all positive. +*> (19) Same as (16), but multiplied by SQRT( overflow threshold ) +*> (20) Same as (16), but multiplied by SQRT( underflow threshold ) +*> (21) A diagonally dominant tridiagonal matrix with geometrically +*> spaced diagonal entries 1, ..., ULP. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NSIZES +*> \verbatim +*> NSIZES is INTEGER +*> The number of sizes of matrices to use. If it is zero, +*> CCHKST2STG does nothing. It must be at least zero. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER array, dimension (NSIZES) +*> An array containing the sizes to be used for the matrices. +*> Zero values will be skipped. The values must be at least +*> zero. +*> \endverbatim +*> +*> \param[in] NTYPES +*> \verbatim +*> NTYPES is INTEGER +*> The number of elements in DOTYPE. If it is zero, CCHKST2STG +*> does nothing. It must be at least zero. If it is MAXTYP+1 +*> and NSIZES is 1, then an additional type, MAXTYP+1 is +*> defined, which is to use whatever matrix is in A. This +*> is only useful if DOTYPE(1:MAXTYP) is .FALSE. and +*> DOTYPE(MAXTYP+1) is .TRUE. . +*> \endverbatim +*> +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> If DOTYPE(j) is .TRUE., then for each size in NN a +*> matrix of that size and of type j will be generated. +*> If NTYPES is smaller than the maximum number of types +*> defined (PARAMETER MAXTYP), then types NTYPES+1 through +*> MAXTYP will not be generated. If NTYPES is larger +*> than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) +*> will be ignored. +*> \endverbatim +*> +*> \param[in,out] ISEED +*> \verbatim +*> ISEED is INTEGER array, dimension (4) +*> On entry ISEED specifies the seed of the random number +*> generator. The array elements should be between 0 and 4095; +*> if not they will be reduced mod 4096. Also, ISEED(4) must +*> be odd. The random number generator uses a linear +*> congruential sequence limited to small integers, and so +*> should produce machine independent random numbers. The +*> values of ISEED are changed on exit, and can be used in the +*> next call to CCHKST2STG to continue the same random number +*> sequence. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is REAL +*> A test will count as "failed" if the "error", computed as +*> described above, exceeds THRESH. Note that the error +*> is scaled to be O(1), so THRESH should be a reasonably +*> small multiple of 1, e.g., 10 or 100. In particular, +*> it should not depend on the precision (single vs. double) +*> or the size of the matrix. It must be at least zero. +*> \endverbatim +*> +*> \param[in] NOUNIT +*> \verbatim +*> NOUNIT is INTEGER +*> The FORTRAN unit number for printing out error messages +*> (e.g., if a routine returns IINFO not equal to 0.) +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array of +*> dimension ( LDA , max(NN) ) +*> Used to hold the matrix whose eigenvalues are to be +*> computed. On exit, A contains the last matrix actually +*> used. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. It must be at +*> least 1 and at least max( NN ). +*> \endverbatim +*> +*> \param[out] AP +*> \verbatim +*> AP is COMPLEX array of +*> dimension( max(NN)*max(NN+1)/2 ) +*> The matrix A stored in packed format. +*> \endverbatim +*> +*> \param[out] SD +*> \verbatim +*> SD is REAL array of +*> dimension( max(NN) ) +*> The diagonal of the tridiagonal matrix computed by CHETRD. +*> On exit, SD and SE contain the tridiagonal form of the +*> matrix in A. +*> \endverbatim +*> +*> \param[out] SE +*> \verbatim +*> SE is REAL array of +*> dimension( max(NN) ) +*> The off-diagonal of the tridiagonal matrix computed by +*> CHETRD. On exit, SD and SE contain the tridiagonal form of +*> the matrix in A. +*> \endverbatim +*> +*> \param[out] D1 +*> \verbatim +*> D1 is REAL array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by CSTEQR simlutaneously +*> with Z. On exit, the eigenvalues in D1 correspond with the +*> matrix in A. +*> \endverbatim +*> +*> \param[out] D2 +*> \verbatim +*> D2 is REAL array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by CSTEQR if Z is not +*> computed. On exit, the eigenvalues in D2 correspond with +*> the matrix in A. +*> \endverbatim +*> +*> \param[out] D3 +*> \verbatim +*> D3 is REAL array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by SSTERF. On exit, the +*> eigenvalues in D3 correspond with the matrix in A. +*> \endverbatim +*> +*> \param[out] D4 +*> \verbatim +*> D4 is REAL array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by CPTEQR(V). +*> CPTEQR factors S as Z4 D4 Z4* +*> On exit, the eigenvalues in D4 correspond with the matrix in A. +*> \endverbatim +*> +*> \param[out] D5 +*> \verbatim +*> D5 is REAL array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by CPTEQR(N) +*> when Z is not computed. On exit, the +*> eigenvalues in D4 correspond with the matrix in A. +*> \endverbatim +*> +*> \param[out] WA1 +*> \verbatim +*> WA1 is REAL array of +*> dimension( max(NN) ) +*> All eigenvalues of A, computed to high +*> absolute accuracy, with different range options. +*> as computed by SSTEBZ. +*> \endverbatim +*> +*> \param[out] WA2 +*> \verbatim +*> WA2 is REAL array of +*> dimension( max(NN) ) +*> Selected eigenvalues of A, computed to high +*> absolute accuracy, with different range options. +*> as computed by SSTEBZ. +*> Choose random values for IL and IU, and ask for the +*> IL-th through IU-th eigenvalues. +*> \endverbatim +*> +*> \param[out] WA3 +*> \verbatim +*> WA3 is REAL array of +*> dimension( max(NN) ) +*> Selected eigenvalues of A, computed to high +*> absolute accuracy, with different range options. +*> as computed by SSTEBZ. +*> Determine the values VL and VU of the IL-th and IU-th +*> eigenvalues and ask for all eigenvalues in this range. +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is REAL array of +*> dimension( max(NN) ) +*> All eigenvalues of A, computed to high +*> absolute accuracy, with different options. +*> as computed by SSTEBZ. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX array of +*> dimension( LDU, max(NN) ). +*> The unitary matrix computed by CHETRD + CUNGTR. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of U, Z, and V. It must be at least 1 +*> and at least max( NN ). +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX array of +*> dimension( LDU, max(NN) ). +*> The Housholder vectors computed by CHETRD in reducing A to +*> tridiagonal form. The vectors computed with UPLO='U' are +*> in the upper triangle, and the vectors computed with UPLO='L' +*> are in the lower triangle. (As described in CHETRD, the +*> sub- and superdiagonal are not set to 1, although the +*> true Householder vector has a 1 in that position. The +*> routines that use V, such as CUNGTR, set those entries to +*> 1 before using them, and then restore them later.) +*> \endverbatim +*> +*> \param[out] VP +*> \verbatim +*> VP is COMPLEX array of +*> dimension( max(NN)*max(NN+1)/2 ) +*> The matrix V stored in packed format. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array of +*> dimension( max(NN) ) +*> The Householder factors computed by CHETRD in reducing A +*> to tridiagonal form. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array of +*> dimension( LDU, max(NN) ). +*> The unitary matrix of eigenvectors computed by CSTEQR, +*> CPTEQR, and CSTEIN. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array of +*> dimension( LWORK ) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The number of entries in WORK. This must be at least +*> 1 + 4 * Nmax + 2 * Nmax * lg Nmax + 3 * Nmax**2 +*> where Nmax = max( NN(j), 2 ) and lg = log base 2. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, +*> Workspace. +*> \endverbatim +*> +*> \param[out] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The number of entries in IWORK. This must be at least +*> 6 + 6*Nmax + 5 * Nmax * lg Nmax +*> where Nmax = max( NN(j), 2 ) and lg = log base 2. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The number of entries in LRWORK (dimension( ??? ) +*> \endverbatim +*> +*> \param[out] RESULT +*> \verbatim +*> RESULT is REAL array, dimension (26) +*> The values computed by the tests described above. +*> The values are currently limited to 1/ulp, to avoid +*> overflow. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> If 0, then everything ran OK. +*> -1: NSIZES < 0 +*> -2: Some NN(j) < 0 +*> -3: NTYPES < 0 +*> -5: THRESH < 0 +*> -9: LDA < 1 or LDA < NMAX, where NMAX is max( NN(j) ). +*> -23: LDU < 1 or LDU < NMAX. +*> -29: LWORK too small. +*> If CLATMR, CLATMS, CHETRD, CUNGTR, CSTEQR, SSTERF, +*> or CUNMC2 returns an error code, the +*> absolute value of it is returned. +*> +*>----------------------------------------------------------------------- +*> +*> Some Local Variables and Parameters: +*> ---- ----- --------- --- ---------- +*> ZERO, ONE Real 0 and 1. +*> MAXTYP The number of types defined. +*> NTEST The number of tests performed, or which can +*> be performed so far, for the current matrix. +*> NTESTT The total number of tests performed so far. +*> NBLOCK Blocksize as returned by ENVIR. +*> NMAX Largest value in NN. +*> NMATS The number of matrices generated so far. +*> NERRS The number of tests which have exceeded THRESH +*> so far. +*> COND, IMODE Values to be passed to the matrix generators. +*> ANORM Norm of A; passed to matrix generators. +*> +*> OVFL, UNFL Overflow and underflow thresholds. +*> ULP, ULPINV Finest relative precision and its inverse. +*> RTOVFL, RTUNFL Square roots of the previous 2 values. +*> The following four arrays decode JTYPE: +*> KTYPE(j) The general type (1-10) for type "j". +*> KMODE(j) The MODE value to be passed to the matrix +*> generator for type "j". +*> KMAGN(j) The order of magnitude ( O(1), +*> O(overflow^(1/2) ), O(underflow^(1/2) ) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex_eig +* +* ===================================================================== + SUBROUTINE CCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, + $ NOUNIT, A, LDA, AP, SD, SE, D1, D2, D3, D4, D5, + $ WA1, WA2, WA3, WR, U, LDU, V, VP, TAU, Z, WORK, + $ LWORK, RWORK, LRWORK, IWORK, LIWORK, RESULT, + $ INFO ) +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDU, LIWORK, LRWORK, LWORK, NOUNIT, + $ NSIZES, NTYPES + REAL THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER ISEED( 4 ), IWORK( * ), NN( * ) + REAL D1( * ), D2( * ), D3( * ), D4( * ), D5( * ), + $ RESULT( * ), RWORK( * ), SD( * ), SE( * ), + $ WA1( * ), WA2( * ), WA3( * ), WR( * ) + COMPLEX A( LDA, * ), AP( * ), TAU( * ), U( LDU, * ), + $ V( LDU, * ), VP( * ), WORK( * ), Z( LDU, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, EIGHT, TEN, HUN + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ EIGHT = 8.0E0, TEN = 10.0E0, HUN = 100.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) + REAL HALF + PARAMETER ( HALF = ONE / TWO ) + INTEGER MAXTYP + PARAMETER ( MAXTYP = 21 ) + LOGICAL CRANGE + PARAMETER ( CRANGE = .FALSE. ) + LOGICAL CREL + PARAMETER ( CREL = .FALSE. ) +* .. +* .. Local Scalars .. + LOGICAL BADNN, TRYRAC + INTEGER I, IINFO, IL, IMODE, INDE, INDRWK, ITEMP, + $ ITYPE, IU, J, JC, JR, JSIZE, JTYPE, LGN, + $ LIWEDC, LOG2UI, LRWEDC, LWEDC, M, M2, M3, + $ MTYPES, N, NAP, NBLOCK, NERRS, NMATS, NMAX, + $ NSPLIT, NTEST, NTESTT, LH, LW + REAL ABSTOL, ANINV, ANORM, COND, OVFL, RTOVFL, + $ RTUNFL, TEMP1, TEMP2, TEMP3, TEMP4, ULP, + $ ULPINV, UNFL, VL, VU +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ), IOLDSD( 4 ), ISEED2( 4 ), + $ KMAGN( MAXTYP ), KMODE( MAXTYP ), + $ KTYPE( MAXTYP ) + REAL DUMMA( 1 ) +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SLAMCH, SLARND, SSXT1 + EXTERNAL ILAENV, SLAMCH, SLARND, SSXT1 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLABAD, SLASUM, SSTEBZ, SSTECH, SSTERF, + $ XERBLA, CCOPY, CHET21, CHETRD, CHPT21, CHPTRD, + $ CLACPY, CLASET, CLATMR, CLATMS, CPTEQR, CSTEDC, + $ CSTEMR, CSTEIN, CSTEQR, CSTT21, CSTT22, CUNGTR, + $ CUPGTR, CHETRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, CONJG, INT, LOG, MAX, MIN, SQRT +* .. +* .. Data statements .. + DATA KTYPE / 1, 2, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 8, + $ 8, 8, 9, 9, 9, 9, 9, 10 / + DATA KMAGN / 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, + $ 2, 3, 1, 1, 1, 2, 3, 1 / + DATA KMODE / 0, 0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, + $ 0, 0, 4, 3, 1, 4, 4, 3 / +* .. +* .. Executable Statements .. +* +* Keep ftnchek happy + IDUMMA( 1 ) = 1 +* +* Check for errors +* + NTESTT = 0 + INFO = 0 +* +* Important constants +* + BADNN = .FALSE. + TRYRAC = .TRUE. + NMAX = 1 + DO 10 J = 1, NSIZES + NMAX = MAX( NMAX, NN( J ) ) + IF( NN( J ).LT.0 ) + $ BADNN = .TRUE. + 10 CONTINUE +* + NBLOCK = ILAENV( 1, 'CHETRD', 'L', NMAX, -1, -1, -1 ) + NBLOCK = MIN( NMAX, MAX( 1, NBLOCK ) ) +* +* Check for errors +* + IF( NSIZES.LT.0 ) THEN + INFO = -1 + ELSE IF( BADNN ) THEN + INFO = -2 + ELSE IF( NTYPES.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.NMAX ) THEN + INFO = -9 + ELSE IF( LDU.LT.NMAX ) THEN + INFO = -23 + ELSE IF( 2*MAX( 2, NMAX )**2.GT.LWORK ) THEN + INFO = -29 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CCHKST2STG', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( NSIZES.EQ.0 .OR. NTYPES.EQ.0 ) + $ RETURN +* +* More Important constants +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL SLABAD( UNFL, OVFL ) + ULP = SLAMCH( 'Epsilon' )*SLAMCH( 'Base' ) + ULPINV = ONE / ULP + LOG2UI = INT( LOG( ULPINV ) / LOG( TWO ) ) + RTUNFL = SQRT( UNFL ) + RTOVFL = SQRT( OVFL ) +* +* Loop over sizes, types +* + DO 20 I = 1, 4 + ISEED2( I ) = ISEED( I ) + 20 CONTINUE + NERRS = 0 + NMATS = 0 +* + DO 310 JSIZE = 1, NSIZES + N = NN( JSIZE ) + IF( N.GT.0 ) THEN + LGN = INT( LOG( REAL( N ) ) / LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + LWEDC = 1 + 4*N + 2*N*LGN + 4*N**2 + LRWEDC = 1 + 3*N + 2*N*LGN + 4*N**2 + LIWEDC = 6 + 6*N + 5*N*LGN + ELSE + LWEDC = 8 + LRWEDC = 7 + LIWEDC = 12 + END IF + NAP = ( N*( N+1 ) ) / 2 + ANINV = ONE / REAL( MAX( 1, N ) ) +* + IF( NSIZES.NE.1 ) THEN + MTYPES = MIN( MAXTYP, NTYPES ) + ELSE + MTYPES = MIN( MAXTYP+1, NTYPES ) + END IF +* + DO 300 JTYPE = 1, MTYPES + IF( .NOT.DOTYPE( JTYPE ) ) + $ GO TO 300 + NMATS = NMATS + 1 + NTEST = 0 +* + DO 30 J = 1, 4 + IOLDSD( J ) = ISEED( J ) + 30 CONTINUE +* +* Compute "A" +* +* Control parameters: +* +* KMAGN KMODE KTYPE +* =1 O(1) clustered 1 zero +* =2 large clustered 2 identity +* =3 small exponential (none) +* =4 arithmetic diagonal, (w/ eigenvalues) +* =5 random log Hermitian, w/ eigenvalues +* =6 random (none) +* =7 random diagonal +* =8 random Hermitian +* =9 positive definite +* =10 diagonally dominant tridiagonal +* + IF( MTYPES.GT.MAXTYP ) + $ GO TO 100 +* + ITYPE = KTYPE( JTYPE ) + IMODE = KMODE( JTYPE ) +* +* Compute norm +* + GO TO ( 40, 50, 60 )KMAGN( JTYPE ) +* + 40 CONTINUE + ANORM = ONE + GO TO 70 +* + 50 CONTINUE + ANORM = ( RTOVFL*ULP )*ANINV + GO TO 70 +* + 60 CONTINUE + ANORM = RTUNFL*N*ULPINV + GO TO 70 +* + 70 CONTINUE +* + CALL CLASET( 'Full', LDA, N, CZERO, CZERO, A, LDA ) + IINFO = 0 + IF( JTYPE.LE.15 ) THEN + COND = ULPINV + ELSE + COND = ULPINV*ANINV / TEN + END IF +* +* Special Matrices -- Identity & Jordan block +* +* Zero +* + IF( ITYPE.EQ.1 ) THEN + IINFO = 0 +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Identity +* + DO 80 JC = 1, N + A( JC, JC ) = ANORM + 80 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Diagonal Matrix, [Eigen]values Specified +* + CALL CLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, COND, + $ ANORM, 0, 0, 'N', A, LDA, WORK, IINFO ) +* +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Hermitian, eigenvalues specified +* + CALL CLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, COND, + $ ANORM, N, N, 'N', A, LDA, WORK, IINFO ) +* + ELSE IF( ITYPE.EQ.7 ) THEN +* +* Diagonal, random eigenvalues +* + CALL CLATMR( N, N, 'S', ISEED, 'H', WORK, 6, ONE, CONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, 0, 0, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.8 ) THEN +* +* Hermitian, random eigenvalues +* + CALL CLATMR( N, N, 'S', ISEED, 'H', WORK, 6, ONE, CONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, N, N, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.9 ) THEN +* +* Positive definite, eigenvalues specified. +* + CALL CLATMS( N, N, 'S', ISEED, 'P', RWORK, IMODE, COND, + $ ANORM, N, N, 'N', A, LDA, WORK, IINFO ) +* + ELSE IF( ITYPE.EQ.10 ) THEN +* +* Positive definite tridiagonal, eigenvalues specified. +* + CALL CLATMS( N, N, 'S', ISEED, 'P', RWORK, IMODE, COND, + $ ANORM, 1, 1, 'N', A, LDA, WORK, IINFO ) + DO 90 I = 2, N + TEMP1 = ABS( A( I-1, I ) ) + TEMP2 = SQRT( ABS( A( I-1, I-1 )*A( I, I ) ) ) + IF( TEMP1.GT.HALF*TEMP2 ) THEN + A( I-1, I ) = A( I-1, I )* + $ ( HALF*TEMP2 / ( UNFL+TEMP1 ) ) + A( I, I-1 ) = CONJG( A( I-1, I ) ) + END IF + 90 CONTINUE +* + ELSE +* + IINFO = 1 + END IF +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'Generator', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + RETURN + END IF +* + 100 CONTINUE +* +* Call CHETRD and CUNGTR to compute S and U from +* upper triangle. +* + CALL CLACPY( 'U', N, N, A, LDA, V, LDU ) +* + NTEST = 1 + CALL CHETRD( 'U', N, V, LDU, SD, SE, TAU, WORK, LWORK, + $ IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHETRD(U)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 1 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL CLACPY( 'U', N, N, V, LDU, U, LDU ) +* + NTEST = 2 + CALL CUNGTR( 'U', N, U, LDU, TAU, WORK, LWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CUNGTR(U)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 2 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do tests 1 and 2 +* + CALL CHET21( 2, 'Upper', N, 1, A, LDA, SD, SE, U, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( 1 ) ) + CALL CHET21( 3, 'Upper', N, 1, A, LDA, SD, SE, U, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( 2 ) ) +* +* Compute D1 the eigenvalues resulting from the tridiagonal +* form using the standard 1-stage algorithm and use it as a +* reference to compare with the 2-stage technique +* +* Compute D1 from the 1-stage and used as reference for the +* 2-stage +* + CALL SCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK, 1 ) +* + CALL CSTEQR( 'N', N, D1, RWORK, WORK, LDU, RWORK( N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEQR(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 3 ) = ULPINV + GO TO 280 + END IF + END IF +* +* 2-STAGE TRD Upper case is used to compute D2. +* Note to set SD and SE to zero to be sure not reusing +* the one from above. Compare it with D1 computed +* using the 1-stage. +* + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL CLACPY( 'U', N, N, A, LDA, V, LDU ) + LH = MAX(1, 4*N) + LW = LWORK - LH + CALL CHETRD_2STAGE( 'N', "U", N, V, LDU, SD, SE, TAU, + $ WORK, LH, WORK( LH+1 ), LW, IINFO ) +* +* Compute D2 from the 2-stage Upper case +* + CALL SCOPY( N, SD, 1, D2, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK, 1 ) +* + NTEST = 3 + CALL CSTEQR( 'N', N, D2, RWORK, WORK, LDU, RWORK( N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEQR(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 3 ) = ULPINV + GO TO 280 + END IF + END IF +* +* 2-STAGE TRD Lower case is used to compute D3. +* Note to set SD and SE to zero to be sure not reusing +* the one from above. Compare it with D1 computed +* using the 1-stage. +* + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL CLACPY( 'L', N, N, A, LDA, V, LDU ) + CALL CHETRD_2STAGE( 'N', "L", N, V, LDU, SD, SE, TAU, + $ WORK, LH, WORK( LH+1 ), LW, IINFO ) +* +* Compute D3 from the 2-stage Upper case +* + CALL SCOPY( N, SD, 1, D3, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK, 1 ) +* + NTEST = 4 + CALL CSTEQR( 'N', N, D3, RWORK, WORK, LDU, RWORK( N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEQR(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 4 ) = ULPINV + GO TO 280 + END IF + END IF +* +* +* Do Tests 3 and 4 which are similar to 11 and 12 but with the +* D1 computed using the standard 1-stage reduction as reference +* + NTEST = 4 + TEMP1 = ZERO + TEMP2 = ZERO + TEMP3 = ZERO + TEMP4 = ZERO +* + DO 151 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + TEMP3 = MAX( TEMP3, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP4 = MAX( TEMP4, ABS( D1( J )-D3( J ) ) ) + 151 CONTINUE +* + RESULT( 3 ) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) + RESULT( 4 ) = TEMP4 / MAX( UNFL, ULP*MAX( TEMP3, TEMP4 ) ) +* +* Skip the DSYTRD for lower that since we replaced its testing +* 3 and 4 by the 2-stage one. + GOTO 101 +* +* Call CHETRD and CUNGTR to compute S and U from +* lower triangle, do tests. +* + CALL CLACPY( 'L', N, N, A, LDA, V, LDU ) +* + NTEST = 3 + CALL CHETRD( 'L', N, V, LDU, SD, SE, TAU, WORK, LWORK, + $ IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHETRD(L)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 3 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL CLACPY( 'L', N, N, V, LDU, U, LDU ) +* + NTEST = 4 + CALL CUNGTR( 'L', N, U, LDU, TAU, WORK, LWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CUNGTR(L)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 4 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL CHET21( 2, 'Lower', N, 1, A, LDA, SD, SE, U, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( 3 ) ) + CALL CHET21( 3, 'Lower', N, 1, A, LDA, SD, SE, U, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( 4 ) ) +* +*after skipping old tests 3 4 back to the normal +* + 101 CONTINUE +* +* Store the upper triangle of A in AP +* + I = 0 + DO 120 JC = 1, N + DO 110 JR = 1, JC + I = I + 1 + AP( I ) = A( JR, JC ) + 110 CONTINUE + 120 CONTINUE +* +* Call CHPTRD and CUPGTR to compute S and U from AP +* + CALL CCOPY( NAP, AP, 1, VP, 1 ) +* + NTEST = 5 + CALL CHPTRD( 'U', N, VP, SD, SE, TAU, IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHPTRD(U)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 5 ) = ULPINV + GO TO 280 + END IF + END IF +* + NTEST = 6 + CALL CUPGTR( 'U', N, VP, TAU, U, LDU, WORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CUPGTR(U)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 6 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do tests 5 and 6 +* + CALL CHPT21( 2, 'Upper', N, 1, AP, SD, SE, U, LDU, VP, TAU, + $ WORK, RWORK, RESULT( 5 ) ) + CALL CHPT21( 3, 'Upper', N, 1, AP, SD, SE, U, LDU, VP, TAU, + $ WORK, RWORK, RESULT( 6 ) ) +* +* Store the lower triangle of A in AP +* + I = 0 + DO 140 JC = 1, N + DO 130 JR = JC, N + I = I + 1 + AP( I ) = A( JR, JC ) + 130 CONTINUE + 140 CONTINUE +* +* Call CHPTRD and CUPGTR to compute S and U from AP +* + CALL CCOPY( NAP, AP, 1, VP, 1 ) +* + NTEST = 7 + CALL CHPTRD( 'L', N, VP, SD, SE, TAU, IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHPTRD(L)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 7 ) = ULPINV + GO TO 280 + END IF + END IF +* + NTEST = 8 + CALL CUPGTR( 'L', N, VP, TAU, U, LDU, WORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CUPGTR(L)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 8 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL CHPT21( 2, 'Lower', N, 1, AP, SD, SE, U, LDU, VP, TAU, + $ WORK, RWORK, RESULT( 7 ) ) + CALL CHPT21( 3, 'Lower', N, 1, AP, SD, SE, U, LDU, VP, TAU, + $ WORK, RWORK, RESULT( 8 ) ) +* +* Call CSTEQR to compute D1, D2, and Z, do tests. +* +* Compute D1 and Z +* + CALL SCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK, 1 ) + CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDU ) +* + NTEST = 9 + CALL CSTEQR( 'V', N, D1, RWORK, Z, LDU, RWORK( N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEQR(V)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 9 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Compute D2 +* + CALL SCOPY( N, SD, 1, D2, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK, 1 ) +* + NTEST = 11 + CALL CSTEQR( 'N', N, D2, RWORK, WORK, LDU, RWORK( N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEQR(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 11 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Compute D3 (using PWK method) +* + CALL SCOPY( N, SD, 1, D3, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK, 1 ) +* + NTEST = 12 + CALL SSTERF( N, D3, RWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTERF', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 12 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 9 and 10 +* + CALL CSTT21( N, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, RWORK, + $ RESULT( 9 ) ) +* +* Do Tests 11 and 12 +* + TEMP1 = ZERO + TEMP2 = ZERO + TEMP3 = ZERO + TEMP4 = ZERO +* + DO 150 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + TEMP3 = MAX( TEMP3, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP4 = MAX( TEMP4, ABS( D1( J )-D3( J ) ) ) + 150 CONTINUE +* + RESULT( 11 ) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) + RESULT( 12 ) = TEMP4 / MAX( UNFL, ULP*MAX( TEMP3, TEMP4 ) ) +* +* Do Test 13 -- Sturm Sequence Test of Eigenvalues +* Go up by factors of two until it succeeds +* + NTEST = 13 + TEMP1 = THRESH*( HALF-ULP ) +* + DO 160 J = 0, LOG2UI + CALL SSTECH( N, SD, SE, D1, TEMP1, RWORK, IINFO ) + IF( IINFO.EQ.0 ) + $ GO TO 170 + TEMP1 = TEMP1*TWO + 160 CONTINUE +* + 170 CONTINUE + RESULT( 13 ) = TEMP1 +* +* For positive definite matrices ( JTYPE.GT.15 ) call CPTEQR +* and do tests 14, 15, and 16 . +* + IF( JTYPE.GT.15 ) THEN +* +* Compute D4 and Z4 +* + CALL SCOPY( N, SD, 1, D4, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK, 1 ) + CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDU ) +* + NTEST = 14 + CALL CPTEQR( 'V', N, D4, RWORK, Z, LDU, RWORK( N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CPTEQR(V)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 14 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 14 and 15 +* + CALL CSTT21( N, 0, SD, SE, D4, DUMMA, Z, LDU, WORK, + $ RWORK, RESULT( 14 ) ) +* +* Compute D5 +* + CALL SCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK, 1 ) +* + NTEST = 16 + CALL CPTEQR( 'N', N, D5, RWORK, Z, LDU, RWORK( N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CPTEQR(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 16 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 16 +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 180 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D4( J ) ), ABS( D5( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D4( J )-D5( J ) ) ) + 180 CONTINUE +* + RESULT( 16 ) = TEMP2 / MAX( UNFL, + $ HUN*ULP*MAX( TEMP1, TEMP2 ) ) + ELSE + RESULT( 14 ) = ZERO + RESULT( 15 ) = ZERO + RESULT( 16 ) = ZERO + END IF +* +* Call SSTEBZ with different options and do tests 17-18. +* +* If S is positive definite and diagonally dominant, +* ask for all eigenvalues with high relative accuracy. +* + VL = ZERO + VU = ZERO + IL = 0 + IU = 0 + IF( JTYPE.EQ.21 ) THEN + NTEST = 17 + ABSTOL = UNFL + UNFL + CALL SSTEBZ( 'A', 'E', N, VL, VU, IL, IU, ABSTOL, SD, SE, + $ M, NSPLIT, WR, IWORK( 1 ), IWORK( N+1 ), + $ RWORK, IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEBZ(A,rel)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 17 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do test 17 +* + TEMP2 = TWO*( TWO*N-ONE )*ULP*( ONE+EIGHT*HALF**2 ) / + $ ( ONE-HALF )**4 +* + TEMP1 = ZERO + DO 190 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D4( J )-WR( N-J+1 ) ) / + $ ( ABSTOL+ABS( D4( J ) ) ) ) + 190 CONTINUE +* + RESULT( 17 ) = TEMP1 / TEMP2 + ELSE + RESULT( 17 ) = ZERO + END IF +* +* Now ask for all eigenvalues with high absolute accuracy. +* + NTEST = 18 + ABSTOL = UNFL + UNFL + CALL SSTEBZ( 'A', 'E', N, VL, VU, IL, IU, ABSTOL, SD, SE, M, + $ NSPLIT, WA1, IWORK( 1 ), IWORK( N+1 ), RWORK, + $ IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEBZ(A)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 18 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do test 18 +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 200 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D3( J ) ), ABS( WA1( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D3( J )-WA1( J ) ) ) + 200 CONTINUE +* + RESULT( 18 ) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) +* +* Choose random values for IL and IU, and ask for the +* IL-th through IU-th eigenvalues. +* + NTEST = 19 + IF( N.LE.1 ) THEN + IL = 1 + IU = N + ELSE + IL = 1 + ( N-1 )*INT( SLARND( 1, ISEED2 ) ) + IU = 1 + ( N-1 )*INT( SLARND( 1, ISEED2 ) ) + IF( IU.LT.IL ) THEN + ITEMP = IU + IU = IL + IL = ITEMP + END IF + END IF +* + CALL SSTEBZ( 'I', 'E', N, VL, VU, IL, IU, ABSTOL, SD, SE, + $ M2, NSPLIT, WA2, IWORK( 1 ), IWORK( N+1 ), + $ RWORK, IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEBZ(I)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 19 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Determine the values VL and VU of the IL-th and IU-th +* eigenvalues and ask for all eigenvalues in this range. +* + IF( N.GT.0 ) THEN + IF( IL.NE.1 ) THEN + VL = WA1( IL ) - MAX( HALF*( WA1( IL )-WA1( IL-1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + ELSE + VL = WA1( 1 ) - MAX( HALF*( WA1( N )-WA1( 1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = WA1( IU ) + MAX( HALF*( WA1( IU+1 )-WA1( IU ) ), + $ ULP*ANORM, TWO*RTUNFL ) + ELSE + VU = WA1( N ) + MAX( HALF*( WA1( N )-WA1( 1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + END IF + ELSE + VL = ZERO + VU = ONE + END IF +* + CALL SSTEBZ( 'V', 'E', N, VL, VU, IL, IU, ABSTOL, SD, SE, + $ M3, NSPLIT, WA3, IWORK( 1 ), IWORK( N+1 ), + $ RWORK, IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEBZ(V)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 19 ) = ULPINV + GO TO 280 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.NE.0 ) THEN + RESULT( 19 ) = ULPINV + GO TO 280 + END IF +* +* Do test 19 +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( N ) ), ABS( WA1( 1 ) ) ) + ELSE + TEMP3 = ZERO + END IF +* + RESULT( 19 ) = ( TEMP1+TEMP2 ) / MAX( UNFL, TEMP3*ULP ) +* +* Call CSTEIN to compute eigenvectors corresponding to +* eigenvalues in WA1. (First call SSTEBZ again, to make sure +* it returns these eigenvalues in the correct order.) +* + NTEST = 21 + CALL SSTEBZ( 'A', 'B', N, VL, VU, IL, IU, ABSTOL, SD, SE, M, + $ NSPLIT, WA1, IWORK( 1 ), IWORK( N+1 ), RWORK, + $ IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEBZ(A,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 20 ) = ULPINV + RESULT( 21 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL CSTEIN( N, SD, SE, M, WA1, IWORK( 1 ), IWORK( N+1 ), Z, + $ LDU, RWORK, IWORK( 2*N+1 ), IWORK( 3*N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEIN', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 20 ) = ULPINV + RESULT( 21 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do tests 20 and 21 +* + CALL CSTT21( N, 0, SD, SE, WA1, DUMMA, Z, LDU, WORK, RWORK, + $ RESULT( 20 ) ) +* +* Call CSTEDC(I) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + INDE = 1 + INDRWK = INDE + N + CALL SCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK( INDE ), 1 ) + CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDU ) +* + NTEST = 22 + CALL CSTEDC( 'I', N, D1, RWORK( INDE ), Z, LDU, WORK, LWEDC, + $ RWORK( INDRWK ), LRWEDC, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEDC(I)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 22 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 22 and 23 +* + CALL CSTT21( N, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, RWORK, + $ RESULT( 22 ) ) +* +* Call CSTEDC(V) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + CALL SCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK( INDE ), 1 ) + CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDU ) +* + NTEST = 24 + CALL CSTEDC( 'V', N, D1, RWORK( INDE ), Z, LDU, WORK, LWEDC, + $ RWORK( INDRWK ), LRWEDC, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEDC(V)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 24 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 24 and 25 +* + CALL CSTT21( N, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, RWORK, + $ RESULT( 24 ) ) +* +* Call CSTEDC(N) to compute D2, do tests. +* +* Compute D2 +* + CALL SCOPY( N, SD, 1, D2, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK( INDE ), 1 ) + CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDU ) +* + NTEST = 26 + CALL CSTEDC( 'N', N, D2, RWORK( INDE ), Z, LDU, WORK, LWEDC, + $ RWORK( INDRWK ), LRWEDC, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEDC(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 26 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 26 +* + TEMP1 = ZERO + TEMP2 = ZERO +* + DO 210 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + 210 CONTINUE +* + RESULT( 26 ) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) +* +* Only test CSTEMR if IEEE compliant +* + IF( ILAENV( 10, 'CSTEMR', 'VA', 1, 0, 0, 0 ).EQ.1 .AND. + $ ILAENV( 11, 'CSTEMR', 'VA', 1, 0, 0, 0 ).EQ.1 ) THEN +* +* Call CSTEMR, do test 27 (relative eigenvalue accuracy) +* +* If S is positive definite and diagonally dominant, +* ask for all eigenvalues with high relative accuracy. +* + VL = ZERO + VU = ZERO + IL = 0 + IU = 0 + IF( JTYPE.EQ.21 .AND. CREL ) THEN + NTEST = 27 + ABSTOL = UNFL + UNFL + CALL CSTEMR( 'V', 'A', N, SD, SE, VL, VU, IL, IU, + $ M, WR, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ RWORK, LRWORK, IWORK( 2*N+1 ), LWORK-2*N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEMR(V,A,rel)', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 27 ) = ULPINV + GO TO 270 + END IF + END IF +* +* Do test 27 +* + TEMP2 = TWO*( TWO*N-ONE )*ULP*( ONE+EIGHT*HALF**2 ) / + $ ( ONE-HALF )**4 +* + TEMP1 = ZERO + DO 220 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D4( J )-WR( N-J+1 ) ) / + $ ( ABSTOL+ABS( D4( J ) ) ) ) + 220 CONTINUE +* + RESULT( 27 ) = TEMP1 / TEMP2 +* + IL = 1 + ( N-1 )*INT( SLARND( 1, ISEED2 ) ) + IU = 1 + ( N-1 )*INT( SLARND( 1, ISEED2 ) ) + IF( IU.LT.IL ) THEN + ITEMP = IU + IU = IL + IL = ITEMP + END IF +* + IF( CRANGE ) THEN + NTEST = 28 + ABSTOL = UNFL + UNFL + CALL CSTEMR( 'V', 'I', N, SD, SE, VL, VU, IL, IU, + $ M, WR, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ RWORK, LRWORK, IWORK( 2*N+1 ), + $ LWORK-2*N, IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEMR(V,I,rel)', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 28 ) = ULPINV + GO TO 270 + END IF + END IF +* +* +* Do test 28 +* + TEMP2 = TWO*( TWO*N-ONE )*ULP* + $ ( ONE+EIGHT*HALF**2 ) / ( ONE-HALF )**4 +* + TEMP1 = ZERO + DO 230 J = IL, IU + TEMP1 = MAX( TEMP1, ABS( WR( J-IL+1 )-D4( N-J+ + $ 1 ) ) / ( ABSTOL+ABS( WR( J-IL+1 ) ) ) ) + 230 CONTINUE +* + RESULT( 28 ) = TEMP1 / TEMP2 + ELSE + RESULT( 28 ) = ZERO + END IF + ELSE + RESULT( 27 ) = ZERO + RESULT( 28 ) = ZERO + END IF +* +* Call CSTEMR(V,I) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + CALL SCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK, 1 ) + CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDU ) +* + IF( CRANGE ) THEN + NTEST = 29 + IL = 1 + ( N-1 )*INT( SLARND( 1, ISEED2 ) ) + IU = 1 + ( N-1 )*INT( SLARND( 1, ISEED2 ) ) + IF( IU.LT.IL ) THEN + ITEMP = IU + IU = IL + IL = ITEMP + END IF + CALL CSTEMR( 'V', 'I', N, D5, RWORK, VL, VU, IL, IU, + $ M, D1, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ RWORK( N+1 ), LRWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEMR(V,I)', IINFO, + $ N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 29 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 29 and 30 +* +* +* Call CSTEMR to compute D2, do tests. +* +* Compute D2 +* + CALL SCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK, 1 ) +* + NTEST = 31 + CALL CSTEMR( 'N', 'I', N, D5, RWORK, VL, VU, IL, IU, + $ M, D2, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ RWORK( N+1 ), LRWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEMR(N,I)', IINFO, + $ N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 31 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 31 +* + TEMP1 = ZERO + TEMP2 = ZERO +* + DO 240 J = 1, IU - IL + 1 + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), + $ ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + 240 CONTINUE +* + RESULT( 31 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* +* +* Call CSTEMR(V,V) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + CALL SCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK, 1 ) + CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDU ) +* + NTEST = 32 +* + IF( N.GT.0 ) THEN + IF( IL.NE.1 ) THEN + VL = D2( IL ) - MAX( HALF* + $ ( D2( IL )-D2( IL-1 ) ), ULP*ANORM, + $ TWO*RTUNFL ) + ELSE + VL = D2( 1 ) - MAX( HALF*( D2( N )-D2( 1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = D2( IU ) + MAX( HALF* + $ ( D2( IU+1 )-D2( IU ) ), ULP*ANORM, + $ TWO*RTUNFL ) + ELSE + VU = D2( N ) + MAX( HALF*( D2( N )-D2( 1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + END IF + ELSE + VL = ZERO + VU = ONE + END IF +* + CALL CSTEMR( 'V', 'V', N, D5, RWORK, VL, VU, IL, IU, + $ M, D1, Z, LDU, M, IWORK( 1 ), TRYRAC, + $ RWORK( N+1 ), LRWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEMR(V,V)', IINFO, + $ N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 32 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 32 and 33 +* + CALL CSTT22( N, M, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, + $ M, RWORK, RESULT( 32 ) ) +* +* Call CSTEMR to compute D2, do tests. +* +* Compute D2 +* + CALL SCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK, 1 ) +* + NTEST = 34 + CALL CSTEMR( 'N', 'V', N, D5, RWORK, VL, VU, IL, IU, + $ M, D2, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ RWORK( N+1 ), LRWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEMR(N,V)', IINFO, + $ N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 34 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 34 +* + TEMP1 = ZERO + TEMP2 = ZERO +* + DO 250 J = 1, IU - IL + 1 + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), + $ ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + 250 CONTINUE +* + RESULT( 34 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) + ELSE + RESULT( 29 ) = ZERO + RESULT( 30 ) = ZERO + RESULT( 31 ) = ZERO + RESULT( 32 ) = ZERO + RESULT( 33 ) = ZERO + RESULT( 34 ) = ZERO + END IF +* +* +* Call CSTEMR(V,A) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + CALL SCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK, 1 ) +* + NTEST = 35 +* + CALL CSTEMR( 'V', 'A', N, D5, RWORK, VL, VU, IL, IU, + $ M, D1, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ RWORK( N+1 ), LRWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEMR(V,A)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 35 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 35 and 36 +* + CALL CSTT22( N, M, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, M, + $ RWORK, RESULT( 35 ) ) +* +* Call CSTEMR to compute D2, do tests. +* +* Compute D2 +* + CALL SCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, RWORK, 1 ) +* + NTEST = 37 + CALL CSTEMR( 'N', 'A', N, D5, RWORK, VL, VU, IL, IU, + $ M, D2, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ RWORK( N+1 ), LRWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CSTEMR(N,A)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 37 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 34 +* + TEMP1 = ZERO + TEMP2 = ZERO +* + DO 260 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + 260 CONTINUE +* + RESULT( 37 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) + END IF + 270 CONTINUE + 280 CONTINUE + NTESTT = NTESTT + NTEST +* +* End of Loop -- Check for RESULT(j) > THRESH +* +* +* Print out tests which fail. +* + DO 290 JR = 1, NTEST + IF( RESULT( JR ).GE.THRESH ) THEN +* +* If this is the first test to fail, +* print a header to the data file. +* + IF( NERRS.EQ.0 ) THEN + WRITE( NOUNIT, FMT = 9998 )'CST' + WRITE( NOUNIT, FMT = 9997 ) + WRITE( NOUNIT, FMT = 9996 ) + WRITE( NOUNIT, FMT = 9995 )'Hermitian' + WRITE( NOUNIT, FMT = 9994 ) +* +* Tests performed +* + WRITE( NOUNIT, FMT = 9987 ) + END IF + NERRS = NERRS + 1 + IF( RESULT( JR ).LT.10000.0E0 ) THEN + WRITE( NOUNIT, FMT = 9989 )N, JTYPE, IOLDSD, JR, + $ RESULT( JR ) + ELSE + WRITE( NOUNIT, FMT = 9988 )N, JTYPE, IOLDSD, JR, + $ RESULT( JR ) + END IF + END IF + 290 CONTINUE + 300 CONTINUE + 310 CONTINUE +* +* Summary +* + CALL SLASUM( 'CST', NOUNIT, NERRS, NTESTT ) + RETURN +* + 9999 FORMAT( ' CCHKST2STG: ', A, ' returned INFO=', I6, '.', / 9X, + $ 'N=', I6, ', JTYPE=', I6, ', ISEED=(', 3( I5, ',' ), I5, ')' ) +* + 9998 FORMAT( / 1X, A3, ' -- Complex Hermitian eigenvalue problem' ) + 9997 FORMAT( ' Matrix types (see CCHKST2STG for details): ' ) +* + 9996 FORMAT( / ' Special Matrices:', + $ / ' 1=Zero matrix. ', + $ ' 5=Diagonal: clustered entries.', + $ / ' 2=Identity matrix. ', + $ ' 6=Diagonal: large, evenly spaced.', + $ / ' 3=Diagonal: evenly spaced entries. ', + $ ' 7=Diagonal: small, evenly spaced.', + $ / ' 4=Diagonal: geometr. spaced entries.' ) + 9995 FORMAT( ' Dense ', A, ' Matrices:', + $ / ' 8=Evenly spaced eigenvals. ', + $ ' 12=Small, evenly spaced eigenvals.', + $ / ' 9=Geometrically spaced eigenvals. ', + $ ' 13=Matrix with random O(1) entries.', + $ / ' 10=Clustered eigenvalues. ', + $ ' 14=Matrix with large random entries.', + $ / ' 11=Large, evenly spaced eigenvals. ', + $ ' 15=Matrix with small random entries.' ) + 9994 FORMAT( ' 16=Positive definite, evenly spaced eigenvalues', + $ / ' 17=Positive definite, geometrically spaced eigenvlaues', + $ / ' 18=Positive definite, clustered eigenvalues', + $ / ' 19=Positive definite, small evenly spaced eigenvalues', + $ / ' 20=Positive definite, large evenly spaced eigenvalues', + $ / ' 21=Diagonally dominant tridiagonal, geometrically', + $ ' spaced eigenvalues' ) +* + 9989 FORMAT( ' Matrix order=', I5, ', type=', I2, ', seed=', + $ 4( I4, ',' ), ' result ', I3, ' is', 0P, F8.2 ) + 9988 FORMAT( ' Matrix order=', I5, ', type=', I2, ', seed=', + $ 4( I4, ',' ), ' result ', I3, ' is', 1P, E10.3 ) +* + 9987 FORMAT( / 'Test performed: see CCHKST2STG for details.', / ) +* End of CCHKST2STG +* + END diff --git a/TESTING/EIG/cdrvsg2stg.f b/TESTING/EIG/cdrvsg2stg.f new file mode 100644 index 00000000..3a624568 --- /dev/null +++ b/TESTING/EIG/cdrvsg2stg.f @@ -0,0 +1,1384 @@ +*> \brief \b CDRVSG2STG +* +* @generated from zdrvsg2stg.f, fortran z -> c, Sun Nov 6 14:01:09 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE CDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, +* NOUNIT, A, LDA, B, LDB, D, D2, Z, LDZ, AB, +* BB, AP, BP, WORK, NWORK, RWORK, LRWORK, +* IWORK, LIWORK, RESULT, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDZ, LIWORK, LRWORK, NOUNIT, +* $ NSIZES, NTYPES, NWORK +* REAL THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER ISEED( 4 ), IWORK( * ), NN( * ) +* REAL D( * ), RESULT( * ), RWORK( * ) +* COMPLEX A( LDA, * ), AB( LDA, * ), AP( * ), +* $ B( LDB, * ), BB( LDB, * ), BP( * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CDRVSG2STG checks the complex Hermitian generalized eigenproblem +*> drivers. +*> +*> CHEGV computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian-definite generalized +*> eigenproblem. +*> +*> CHEGVD computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian-definite generalized +*> eigenproblem using a divide and conquer algorithm. +*> +*> CHEGVX computes selected eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian-definite generalized +*> eigenproblem. +*> +*> CHPGV computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian-definite generalized +*> eigenproblem in packed storage. +*> +*> CHPGVD computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian-definite generalized +*> eigenproblem in packed storage using a divide and +*> conquer algorithm. +*> +*> CHPGVX computes selected eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian-definite generalized +*> eigenproblem in packed storage. +*> +*> CHBGV computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian-definite banded +*> generalized eigenproblem. +*> +*> CHBGVD computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian-definite banded +*> generalized eigenproblem using a divide and conquer +*> algorithm. +*> +*> CHBGVX computes selected eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian-definite banded +*> generalized eigenproblem. +*> +*> When CDRVSG2STG is called, a number of matrix "sizes" ("n's") and a +*> number of matrix "types" are specified. For each size ("n") +*> and each type of matrix, one matrix A of the given type will be +*> generated; a random well-conditioned matrix B is also generated +*> and the pair (A,B) is used to test the drivers. +*> +*> For each pair (A,B), the following tests are performed: +*> +*> (1) CHEGV with ITYPE = 1 and UPLO ='U': +*> +*> | A Z - B Z D | / ( |A| |Z| n ulp ) +*> | D - D2 | / ( |D| ulp ) where D is computed by +*> CHEGV and D2 is computed by +*> CHEGV_2STAGE. This test is +*> only performed for DSYGV +*> +*> (2) as (1) but calling CHPGV +*> (3) as (1) but calling CHBGV +*> (4) as (1) but with UPLO = 'L' +*> (5) as (4) but calling CHPGV +*> (6) as (4) but calling CHBGV +*> +*> (7) CHEGV with ITYPE = 2 and UPLO ='U': +*> +*> | A B Z - Z D | / ( |A| |Z| n ulp ) +*> +*> (8) as (7) but calling CHPGV +*> (9) as (7) but with UPLO = 'L' +*> (10) as (9) but calling CHPGV +*> +*> (11) CHEGV with ITYPE = 3 and UPLO ='U': +*> +*> | B A Z - Z D | / ( |A| |Z| n ulp ) +*> +*> (12) as (11) but calling CHPGV +*> (13) as (11) but with UPLO = 'L' +*> (14) as (13) but calling CHPGV +*> +*> CHEGVD, CHPGVD and CHBGVD performed the same 14 tests. +*> +*> CHEGVX, CHPGVX and CHBGVX performed the above 14 tests with +*> the parameter RANGE = 'A', 'N' and 'I', respectively. +*> +*> The "sizes" are specified by an array NN(1:NSIZES); the value of +*> each element NN(j) specifies one size. +*> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); +*> if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. +*> This type is used for the matrix A which has half-bandwidth KA. +*> B is generated as a well-conditioned positive definite matrix +*> with half-bandwidth KB (<= KA). +*> Currently, the list of possible types for A is: +*> +*> (1) The zero matrix. +*> (2) The identity matrix. +*> +*> (3) A diagonal matrix with evenly spaced entries +*> 1, ..., ULP and random signs. +*> (ULP = (first number larger than 1) - 1 ) +*> (4) A diagonal matrix with geometrically spaced entries +*> 1, ..., ULP and random signs. +*> (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP +*> and random signs. +*> +*> (6) Same as (4), but multiplied by SQRT( overflow threshold ) +*> (7) Same as (4), but multiplied by SQRT( underflow threshold ) +*> +*> (8) A matrix of the form U* D U, where U is unitary and +*> D has evenly spaced entries 1, ..., ULP with random signs +*> on the diagonal. +*> +*> (9) A matrix of the form U* D U, where U is unitary and +*> D has geometrically spaced entries 1, ..., ULP with random +*> signs on the diagonal. +*> +*> (10) A matrix of the form U* D U, where U is unitary and +*> D has "clustered" entries 1, ULP,..., ULP with random +*> signs on the diagonal. +*> +*> (11) Same as (8), but multiplied by SQRT( overflow threshold ) +*> (12) Same as (8), but multiplied by SQRT( underflow threshold ) +*> +*> (13) Hermitian matrix with random entries chosen from (-1,1). +*> (14) Same as (13), but multiplied by SQRT( overflow threshold ) +*> (15) Same as (13), but multiplied by SQRT( underflow threshold ) +*> +*> (16) Same as (8), but with KA = 1 and KB = 1 +*> (17) Same as (8), but with KA = 2 and KB = 1 +*> (18) Same as (8), but with KA = 2 and KB = 2 +*> (19) Same as (8), but with KA = 3 and KB = 1 +*> (20) Same as (8), but with KA = 3 and KB = 2 +*> (21) Same as (8), but with KA = 3 and KB = 3 +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> NSIZES INTEGER +*> The number of sizes of matrices to use. If it is zero, +*> CDRVSG2STG does nothing. It must be at least zero. +*> Not modified. +*> +*> NN INTEGER array, dimension (NSIZES) +*> An array containing the sizes to be used for the matrices. +*> Zero values will be skipped. The values must be at least +*> zero. +*> Not modified. +*> +*> NTYPES INTEGER +*> The number of elements in DOTYPE. If it is zero, CDRVSG2STG +*> does nothing. It must be at least zero. If it is MAXTYP+1 +*> and NSIZES is 1, then an additional type, MAXTYP+1 is +*> defined, which is to use whatever matrix is in A. This +*> is only useful if DOTYPE(1:MAXTYP) is .FALSE. and +*> DOTYPE(MAXTYP+1) is .TRUE. . +*> Not modified. +*> +*> DOTYPE LOGICAL array, dimension (NTYPES) +*> If DOTYPE(j) is .TRUE., then for each size in NN a +*> matrix of that size and of type j will be generated. +*> If NTYPES is smaller than the maximum number of types +*> defined (PARAMETER MAXTYP), then types NTYPES+1 through +*> MAXTYP will not be generated. If NTYPES is larger +*> than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) +*> will be ignored. +*> Not modified. +*> +*> ISEED INTEGER array, dimension (4) +*> On entry ISEED specifies the seed of the random number +*> generator. The array elements should be between 0 and 4095; +*> if not they will be reduced mod 4096. Also, ISEED(4) must +*> be odd. The random number generator uses a linear +*> congruential sequence limited to small integers, and so +*> should produce machine independent random numbers. The +*> values of ISEED are changed on exit, and can be used in the +*> next call to CDRVSG2STG to continue the same random number +*> sequence. +*> Modified. +*> +*> THRESH REAL +*> A test will count as "failed" if the "error", computed as +*> described above, exceeds THRESH. Note that the error +*> is scaled to be O(1), so THRESH should be a reasonably +*> small multiple of 1, e.g., 10 or 100. In particular, +*> it should not depend on the precision (single vs. double) +*> or the size of the matrix. It must be at least zero. +*> Not modified. +*> +*> NOUNIT INTEGER +*> The FORTRAN unit number for printing out error messages +*> (e.g., if a routine returns IINFO not equal to 0.) +*> Not modified. +*> +*> A COMPLEX array, dimension (LDA , max(NN)) +*> Used to hold the matrix whose eigenvalues are to be +*> computed. On exit, A contains the last matrix actually +*> used. +*> Modified. +*> +*> LDA INTEGER +*> The leading dimension of A. It must be at +*> least 1 and at least max( NN ). +*> Not modified. +*> +*> B COMPLEX array, dimension (LDB , max(NN)) +*> Used to hold the Hermitian positive definite matrix for +*> the generailzed problem. +*> On exit, B contains the last matrix actually +*> used. +*> Modified. +*> +*> LDB INTEGER +*> The leading dimension of B. It must be at +*> least 1 and at least max( NN ). +*> Not modified. +*> +*> D REAL array, dimension (max(NN)) +*> The eigenvalues of A. On exit, the eigenvalues in D +*> correspond with the matrix in A. +*> Modified. +*> +*> Z COMPLEX array, dimension (LDZ, max(NN)) +*> The matrix of eigenvectors. +*> Modified. +*> +*> LDZ INTEGER +*> The leading dimension of ZZ. It must be at least 1 and +*> at least max( NN ). +*> Not modified. +*> +*> AB COMPLEX array, dimension (LDA, max(NN)) +*> Workspace. +*> Modified. +*> +*> BB COMPLEX array, dimension (LDB, max(NN)) +*> Workspace. +*> Modified. +*> +*> AP COMPLEX array, dimension (max(NN)**2) +*> Workspace. +*> Modified. +*> +*> BP COMPLEX array, dimension (max(NN)**2) +*> Workspace. +*> Modified. +*> +*> WORK COMPLEX array, dimension (NWORK) +*> Workspace. +*> Modified. +*> +*> NWORK INTEGER +*> The number of entries in WORK. This must be at least +*> 2*N + N**2 where N = max( NN(j), 2 ). +*> Not modified. +*> +*> RWORK REAL array, dimension (LRWORK) +*> Workspace. +*> Modified. +*> +*> LRWORK INTEGER +*> The number of entries in RWORK. This must be at least +*> max( 7*N, 1 + 4*N + 2*N*lg(N) + 3*N**2 ) where +*> N = max( NN(j) ) and lg( N ) = smallest integer k such +*> that 2**k >= N . +*> Not modified. +*> +*> IWORK INTEGER array, dimension (LIWORK)) +*> Workspace. +*> Modified. +*> +*> LIWORK INTEGER +*> The number of entries in IWORK. This must be at least +*> 2 + 5*max( NN(j) ). +*> Not modified. +*> +*> RESULT REAL array, dimension (70) +*> The values computed by the 70 tests described above. +*> Modified. +*> +*> INFO INTEGER +*> If 0, then everything ran OK. +*> -1: NSIZES < 0 +*> -2: Some NN(j) < 0 +*> -3: NTYPES < 0 +*> -5: THRESH < 0 +*> -9: LDA < 1 or LDA < NMAX, where NMAX is max( NN(j) ). +*> -16: LDZ < 1 or LDZ < NMAX. +*> -21: NWORK too small. +*> -23: LRWORK too small. +*> -25: LIWORK too small. +*> If CLATMR, CLATMS, CHEGV, CHPGV, CHBGV, CHEGVD, CHPGVD, +*> CHPGVD, CHEGVX, CHPGVX, CHBGVX returns an error code, +*> the absolute value of it is returned. +*> Modified. +*> +*>----------------------------------------------------------------------- +*> +*> Some Local Variables and Parameters: +*> ---- ----- --------- --- ---------- +*> ZERO, ONE Real 0 and 1. +*> MAXTYP The number of types defined. +*> NTEST The number of tests that have been run +*> on this matrix. +*> NTESTT The total number of tests for this call. +*> NMAX Largest value in NN. +*> NMATS The number of matrices generated so far. +*> NERRS The number of tests which have exceeded THRESH +*> so far (computed by SLAFTS). +*> COND, IMODE Values to be passed to the matrix generators. +*> ANORM Norm of A; passed to matrix generators. +*> +*> OVFL, UNFL Overflow and underflow thresholds. +*> ULP, ULPINV Finest relative precision and its inverse. +*> RTOVFL, RTUNFL Square roots of the previous 2 values. +*> The following four arrays decode JTYPE: +*> KTYPE(j) The general type (1-10) for type "j". +*> KMODE(j) The MODE value to be passed to the matrix +*> generator for type "j". +*> KMAGN(j) The order of magnitude ( O(1), +*> O(overflow^(1/2) ), O(underflow^(1/2) ) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex_eig +* +* ===================================================================== + SUBROUTINE CDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, + $ NOUNIT, A, LDA, B, LDB, D, D2, Z, LDZ, AB, + $ BB, AP, BP, WORK, NWORK, RWORK, LRWORK, + $ IWORK, LIWORK, RESULT, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDZ, LIWORK, LRWORK, NOUNIT, + $ NSIZES, NTYPES, NWORK + REAL THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER ISEED( 4 ), IWORK( * ), NN( * ) + REAL D( * ), D2( * ), RESULT( * ), RWORK( * ) + COMPLEX A( LDA, * ), AB( LDA, * ), AP( * ), + $ B( LDB, * ), BB( LDB, * ), BP( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TEN + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TEN = 10.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) + INTEGER MAXTYP + PARAMETER ( MAXTYP = 21 ) +* .. +* .. Local Scalars .. + LOGICAL BADNN + CHARACTER UPLO + INTEGER I, IBTYPE, IBUPLO, IINFO, IJ, IL, IMODE, ITEMP, + $ ITYPE, IU, J, JCOL, JSIZE, JTYPE, KA, KA9, KB, + $ KB9, M, MTYPES, N, NERRS, NMATS, NMAX, NTEST, + $ NTESTT + REAL ABSTOL, ANINV, ANORM, COND, OVFL, RTOVFL, + $ RTUNFL, ULP, ULPINV, UNFL, VL, VU, TEMP1, TEMP2 +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ), IOLDSD( 4 ), ISEED2( 4 ), + $ KMAGN( MAXTYP ), KMODE( MAXTYP ), + $ KTYPE( MAXTYP ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLARND + EXTERNAL LSAME, SLAMCH, SLARND +* .. +* .. External Subroutines .. + EXTERNAL SLABAD, SLAFTS, SLASUM, XERBLA, CHBGV, CHBGVD, + $ CHBGVX, CHEGV, CHEGVD, CHEGVX, CHPGV, CHPGVD, + $ CHPGVX, CLACPY, CLASET, CLATMR, CLATMS, CSGT01, + $ CHEGV_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, MAX, MIN, SQRT +* .. +* .. Data statements .. + DATA KTYPE / 1, 2, 5*4, 5*5, 3*8, 6*9 / + DATA KMAGN / 2*1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, + $ 2, 3, 6*1 / + DATA KMODE / 2*0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, + $ 0, 0, 6*4 / +* .. +* .. Executable Statements .. +* +* 1) Check for errors +* + NTESTT = 0 + INFO = 0 +* + BADNN = .FALSE. + NMAX = 0 + DO 10 J = 1, NSIZES + NMAX = MAX( NMAX, NN( J ) ) + IF( NN( J ).LT.0 ) + $ BADNN = .TRUE. + 10 CONTINUE +* +* Check for errors +* + IF( NSIZES.LT.0 ) THEN + INFO = -1 + ELSE IF( BADNN ) THEN + INFO = -2 + ELSE IF( NTYPES.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LE.1 .OR. LDA.LT.NMAX ) THEN + INFO = -9 + ELSE IF( LDZ.LE.1 .OR. LDZ.LT.NMAX ) THEN + INFO = -16 + ELSE IF( 2*MAX( NMAX, 2 )**2.GT.NWORK ) THEN + INFO = -21 + ELSE IF( 2*MAX( NMAX, 2 )**2.GT.LRWORK ) THEN + INFO = -23 + ELSE IF( 2*MAX( NMAX, 2 )**2.GT.LIWORK ) THEN + INFO = -25 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CDRVSG2STG', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( NSIZES.EQ.0 .OR. NTYPES.EQ.0 ) + $ RETURN +* +* More Important constants +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = SLAMCH( 'Overflow' ) + CALL SLABAD( UNFL, OVFL ) + ULP = SLAMCH( 'Epsilon' )*SLAMCH( 'Base' ) + ULPINV = ONE / ULP + RTUNFL = SQRT( UNFL ) + RTOVFL = SQRT( OVFL ) +* + DO 20 I = 1, 4 + ISEED2( I ) = ISEED( I ) + 20 CONTINUE +* +* Loop over sizes, types +* + NERRS = 0 + NMATS = 0 +* + DO 650 JSIZE = 1, NSIZES + N = NN( JSIZE ) + ANINV = ONE / REAL( MAX( 1, N ) ) +* + IF( NSIZES.NE.1 ) THEN + MTYPES = MIN( MAXTYP, NTYPES ) + ELSE + MTYPES = MIN( MAXTYP+1, NTYPES ) + END IF +* + KA9 = 0 + KB9 = 0 + DO 640 JTYPE = 1, MTYPES + IF( .NOT.DOTYPE( JTYPE ) ) + $ GO TO 640 + NMATS = NMATS + 1 + NTEST = 0 +* + DO 30 J = 1, 4 + IOLDSD( J ) = ISEED( J ) + 30 CONTINUE +* +* 2) Compute "A" +* +* Control parameters: +* +* KMAGN KMODE KTYPE +* =1 O(1) clustered 1 zero +* =2 large clustered 2 identity +* =3 small exponential (none) +* =4 arithmetic diagonal, w/ eigenvalues +* =5 random log hermitian, w/ eigenvalues +* =6 random (none) +* =7 random diagonal +* =8 random hermitian +* =9 banded, w/ eigenvalues +* + IF( MTYPES.GT.MAXTYP ) + $ GO TO 90 +* + ITYPE = KTYPE( JTYPE ) + IMODE = KMODE( JTYPE ) +* +* Compute norm +* + GO TO ( 40, 50, 60 )KMAGN( JTYPE ) +* + 40 CONTINUE + ANORM = ONE + GO TO 70 +* + 50 CONTINUE + ANORM = ( RTOVFL*ULP )*ANINV + GO TO 70 +* + 60 CONTINUE + ANORM = RTUNFL*N*ULPINV + GO TO 70 +* + 70 CONTINUE +* + IINFO = 0 + COND = ULPINV +* +* Special Matrices -- Identity & Jordan block +* + IF( ITYPE.EQ.1 ) THEN +* +* Zero +* + KA = 0 + KB = 0 + CALL CLASET( 'Full', LDA, N, CZERO, CZERO, A, LDA ) +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Identity +* + KA = 0 + KB = 0 + CALL CLASET( 'Full', LDA, N, CZERO, CZERO, A, LDA ) + DO 80 JCOL = 1, N + A( JCOL, JCOL ) = ANORM + 80 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Diagonal Matrix, [Eigen]values Specified +* + KA = 0 + KB = 0 + CALL CLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, COND, + $ ANORM, 0, 0, 'N', A, LDA, WORK, IINFO ) +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Hermitian, eigenvalues specified +* + KA = MAX( 0, N-1 ) + KB = KA + CALL CLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, COND, + $ ANORM, N, N, 'N', A, LDA, WORK, IINFO ) +* + ELSE IF( ITYPE.EQ.7 ) THEN +* +* Diagonal, random eigenvalues +* + KA = 0 + KB = 0 + CALL CLATMR( N, N, 'S', ISEED, 'H', WORK, 6, ONE, CONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, 0, 0, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.8 ) THEN +* +* Hermitian, random eigenvalues +* + KA = MAX( 0, N-1 ) + KB = KA + CALL CLATMR( N, N, 'S', ISEED, 'H', WORK, 6, ONE, CONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, N, N, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.9 ) THEN +* +* Hermitian banded, eigenvalues specified +* +* The following values are used for the half-bandwidths: +* +* ka = 1 kb = 1 +* ka = 2 kb = 1 +* ka = 2 kb = 2 +* ka = 3 kb = 1 +* ka = 3 kb = 2 +* ka = 3 kb = 3 +* + KB9 = KB9 + 1 + IF( KB9.GT.KA9 ) THEN + KA9 = KA9 + 1 + KB9 = 1 + END IF + KA = MAX( 0, MIN( N-1, KA9 ) ) + KB = MAX( 0, MIN( N-1, KB9 ) ) + CALL CLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, COND, + $ ANORM, KA, KA, 'N', A, LDA, WORK, IINFO ) +* + ELSE +* + IINFO = 1 + END IF +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'Generator', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + RETURN + END IF +* + 90 CONTINUE +* + ABSTOL = UNFL + UNFL + IF( N.LE.1 ) THEN + IL = 1 + IU = N + ELSE + IL = 1 + INT( ( N-1 )*SLARND( 1, ISEED2 ) ) + IU = 1 + INT( ( N-1 )*SLARND( 1, ISEED2 ) ) + IF( IL.GT.IU ) THEN + ITEMP = IL + IL = IU + IU = ITEMP + END IF + END IF +* +* 3) Call CHEGV, CHPGV, CHBGV, CHEGVD, CHPGVD, CHBGVD, +* CHEGVX, CHPGVX and CHBGVX, do tests. +* +* loop over the three generalized problems +* IBTYPE = 1: A*x = (lambda)*B*x +* IBTYPE = 2: A*B*x = (lambda)*x +* IBTYPE = 3: B*A*x = (lambda)*x +* + DO 630 IBTYPE = 1, 3 +* +* loop over the setting UPLO +* + DO 620 IBUPLO = 1, 2 + IF( IBUPLO.EQ.1 ) + $ UPLO = 'U' + IF( IBUPLO.EQ.2 ) + $ UPLO = 'L' +* +* Generate random well-conditioned positive definite +* matrix B, of bandwidth not greater than that of A. +* + CALL CLATMS( N, N, 'U', ISEED, 'P', RWORK, 5, TEN, + $ ONE, KB, KB, UPLO, B, LDB, WORK( N+1 ), + $ IINFO ) +* +* Test CHEGV +* + NTEST = NTEST + 1 +* + CALL CLACPY( ' ', N, N, A, LDA, Z, LDZ ) + CALL CLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL CHEGV( IBTYPE, 'V', UPLO, N, Z, LDZ, BB, LDB, D, + $ WORK, NWORK, RWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHEGV(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL CSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* +* Test CHEGV_2STAGE +* + NTEST = NTEST + 1 +* + CALL CLACPY( ' ', N, N, A, LDA, Z, LDZ ) + CALL CLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL CHEGV_2STAGE( IBTYPE, 'N', UPLO, N, Z, LDZ, + $ BB, LDB, D2, WORK, NWORK, RWORK, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'CHEGV_2STAGE(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* +C CALL CSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, +C $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* +* Do Tests | D1 - D2 | / ( |D1| ulp ) +* D1 computed using the standard 1-stage reduction as reference +* D2 computed using the 2-stage reduction +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 151 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D( J ) ), + $ ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D( J )-D2( J ) ) ) + 151 CONTINUE +* + RESULT( NTEST ) = TEMP2 / + $ MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) +* +* Test CHEGVD +* + NTEST = NTEST + 1 +* + CALL CLACPY( ' ', N, N, A, LDA, Z, LDZ ) + CALL CLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL CHEGVD( IBTYPE, 'V', UPLO, N, Z, LDZ, BB, LDB, D, + $ WORK, NWORK, RWORK, LRWORK, IWORK, + $ LIWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHEGVD(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL CSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* +* Test CHEGVX +* + NTEST = NTEST + 1 +* + CALL CLACPY( ' ', N, N, A, LDA, AB, LDA ) + CALL CLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL CHEGVX( IBTYPE, 'V', 'A', UPLO, N, AB, LDA, BB, + $ LDB, VL, VU, IL, IU, ABSTOL, M, D, Z, + $ LDZ, WORK, NWORK, RWORK, IWORK( N+1 ), + $ IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHEGVX(V,A' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL CSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* + CALL CLACPY( ' ', N, N, A, LDA, AB, LDA ) + CALL CLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* +* since we do not know the exact eigenvalues of this +* eigenpair, we just set VL and VU as constants. +* It is quite possible that there are no eigenvalues +* in this interval. +* + VL = ZERO + VU = ANORM + CALL CHEGVX( IBTYPE, 'V', 'V', UPLO, N, AB, LDA, BB, + $ LDB, VL, VU, IL, IU, ABSTOL, M, D, Z, + $ LDZ, WORK, NWORK, RWORK, IWORK( N+1 ), + $ IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHEGVX(V,V,' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL CSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* + CALL CLACPY( ' ', N, N, A, LDA, AB, LDA ) + CALL CLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL CHEGVX( IBTYPE, 'V', 'I', UPLO, N, AB, LDA, BB, + $ LDB, VL, VU, IL, IU, ABSTOL, M, D, Z, + $ LDZ, WORK, NWORK, RWORK, IWORK( N+1 ), + $ IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHEGVX(V,I,' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL CSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* + 100 CONTINUE +* +* Test CHPGV +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 120 J = 1, N + DO 110 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 110 CONTINUE + 120 CONTINUE + ELSE + IJ = 1 + DO 140 J = 1, N + DO 130 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 130 CONTINUE + 140 CONTINUE + END IF +* + CALL CHPGV( IBTYPE, 'V', UPLO, N, AP, BP, D, Z, LDZ, + $ WORK, RWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHPGV(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL CSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* +* Test CHPGVD +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 160 J = 1, N + DO 150 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 150 CONTINUE + 160 CONTINUE + ELSE + IJ = 1 + DO 180 J = 1, N + DO 170 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 170 CONTINUE + 180 CONTINUE + END IF +* + CALL CHPGVD( IBTYPE, 'V', UPLO, N, AP, BP, D, Z, LDZ, + $ WORK, NWORK, RWORK, LRWORK, IWORK, + $ LIWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHPGVD(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL CSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* +* Test CHPGVX +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 200 J = 1, N + DO 190 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 190 CONTINUE + 200 CONTINUE + ELSE + IJ = 1 + DO 220 J = 1, N + DO 210 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 210 CONTINUE + 220 CONTINUE + END IF +* + CALL CHPGVX( IBTYPE, 'V', 'A', UPLO, N, AP, BP, VL, + $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, + $ RWORK, IWORK( N+1 ), IWORK, INFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHPGVX(V,A' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL CSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 240 J = 1, N + DO 230 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 230 CONTINUE + 240 CONTINUE + ELSE + IJ = 1 + DO 260 J = 1, N + DO 250 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 250 CONTINUE + 260 CONTINUE + END IF +* + VL = ZERO + VU = ANORM + CALL CHPGVX( IBTYPE, 'V', 'V', UPLO, N, AP, BP, VL, + $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, + $ RWORK, IWORK( N+1 ), IWORK, INFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHPGVX(V,V' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL CSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 280 J = 1, N + DO 270 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 270 CONTINUE + 280 CONTINUE + ELSE + IJ = 1 + DO 300 J = 1, N + DO 290 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 290 CONTINUE + 300 CONTINUE + END IF +* + CALL CHPGVX( IBTYPE, 'V', 'I', UPLO, N, AP, BP, VL, + $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, + $ RWORK, IWORK( N+1 ), IWORK, INFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHPGVX(V,I' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL CSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* + 310 CONTINUE +* + IF( IBTYPE.EQ.1 ) THEN +* +* TEST CHBGV +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 340 J = 1, N + DO 320 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 320 CONTINUE + DO 330 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 330 CONTINUE + 340 CONTINUE + ELSE + DO 370 J = 1, N + DO 350 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 350 CONTINUE + DO 360 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 360 CONTINUE + 370 CONTINUE + END IF +* + CALL CHBGV( 'V', UPLO, N, KA, KB, AB, LDA, BB, LDB, + $ D, Z, LDZ, WORK, RWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHBGV(V,' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL CSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* +* TEST CHBGVD +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 400 J = 1, N + DO 380 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 380 CONTINUE + DO 390 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 390 CONTINUE + 400 CONTINUE + ELSE + DO 430 J = 1, N + DO 410 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 410 CONTINUE + DO 420 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 420 CONTINUE + 430 CONTINUE + END IF +* + CALL CHBGVD( 'V', UPLO, N, KA, KB, AB, LDA, BB, + $ LDB, D, Z, LDZ, WORK, NWORK, RWORK, + $ LRWORK, IWORK, LIWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHBGVD(V,' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL CSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* +* Test CHBGVX +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 460 J = 1, N + DO 440 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 440 CONTINUE + DO 450 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 450 CONTINUE + 460 CONTINUE + ELSE + DO 490 J = 1, N + DO 470 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 470 CONTINUE + DO 480 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 480 CONTINUE + 490 CONTINUE + END IF +* + CALL CHBGVX( 'V', 'A', UPLO, N, KA, KB, AB, LDA, + $ BB, LDB, BP, MAX( 1, N ), VL, VU, IL, + $ IU, ABSTOL, M, D, Z, LDZ, WORK, RWORK, + $ IWORK( N+1 ), IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHBGVX(V,A' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL CSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 520 J = 1, N + DO 500 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 500 CONTINUE + DO 510 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 510 CONTINUE + 520 CONTINUE + ELSE + DO 550 J = 1, N + DO 530 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 530 CONTINUE + DO 540 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 540 CONTINUE + 550 CONTINUE + END IF +* + VL = ZERO + VU = ANORM + CALL CHBGVX( 'V', 'V', UPLO, N, KA, KB, AB, LDA, + $ BB, LDB, BP, MAX( 1, N ), VL, VU, IL, + $ IU, ABSTOL, M, D, Z, LDZ, WORK, RWORK, + $ IWORK( N+1 ), IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHBGVX(V,V' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL CSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 580 J = 1, N + DO 560 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 560 CONTINUE + DO 570 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 570 CONTINUE + 580 CONTINUE + ELSE + DO 610 J = 1, N + DO 590 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 590 CONTINUE + DO 600 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 600 CONTINUE + 610 CONTINUE + END IF +* + CALL CHBGVX( 'V', 'I', UPLO, N, KA, KB, AB, LDA, + $ BB, LDB, BP, MAX( 1, N ), VL, VU, IL, + $ IU, ABSTOL, M, D, Z, LDZ, WORK, RWORK, + $ IWORK( N+1 ), IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHBGVX(V,I' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL CSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* + END IF +* + 620 CONTINUE + 630 CONTINUE +* +* End of Loop -- Check for RESULT(j) > THRESH +* + NTESTT = NTESTT + NTEST + CALL SLAFTS( 'CSG', N, N, JTYPE, NTEST, RESULT, IOLDSD, + $ THRESH, NOUNIT, NERRS ) + 640 CONTINUE + 650 CONTINUE +* +* Summary +* + CALL SLASUM( 'CSG', NOUNIT, NERRS, NTESTT ) +* + RETURN +* + 9999 FORMAT( ' CDRVSG2STG: ', A, ' returned INFO=', I6, '.', / 9X, + $ 'N=', I6, ', JTYPE=', I6, ', ISEED=(', 3( I5, ',' ), I5, ')' ) +* +* End of CDRVSG2STG +* + END diff --git a/TESTING/EIG/cdrvst2stg.f b/TESTING/EIG/cdrvst2stg.f new file mode 100644 index 00000000..ab1af355 --- /dev/null +++ b/TESTING/EIG/cdrvst2stg.f @@ -0,0 +1,2118 @@ +*> \brief \b CDRVST2STG +* +* @generated from zdrvst2stg.f, fortran z -> c, Sat Nov 5 23:41:02 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE CDRVST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, +* NOUNIT, A, LDA, D1, D2, D3, WA1, WA2, WA3, U, +* LDU, V, TAU, Z, WORK, LWORK, RWORK, LRWORK, +* IWORK, LIWORK, RESULT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDU, LIWORK, LRWORK, LWORK, NOUNIT, +* $ NSIZES, NTYPES +* REAL THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER ISEED( 4 ), IWORK( * ), NN( * ) +* REAL D1( * ), D2( * ), D3( * ), RESULT( * ), +* $ RWORK( * ), WA1( * ), WA2( * ), WA3( * ) +* COMPLEX A( LDA, * ), TAU( * ), U( LDU, * ), +* $ V( LDU, * ), WORK( * ), Z( LDU, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CDRVST2STG checks the Hermitian eigenvalue problem drivers. +*> +*> CHEEVD computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian matrix, +*> using a divide-and-conquer algorithm. +*> +*> CHEEVX computes selected eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian matrix. +*> +*> CHEEVR computes selected eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian matrix +*> using the Relatively Robust Representation where it can. +*> +*> CHPEVD computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian matrix in packed +*> storage, using a divide-and-conquer algorithm. +*> +*> CHPEVX computes selected eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian matrix in packed +*> storage. +*> +*> CHBEVD computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian band matrix, +*> using a divide-and-conquer algorithm. +*> +*> CHBEVX computes selected eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian band matrix. +*> +*> CHEEV computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian matrix. +*> +*> CHPEV computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian matrix in packed +*> storage. +*> +*> CHBEV computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian band matrix. +*> +*> When CDRVST2STG is called, a number of matrix "sizes" ("n's") and a +*> number of matrix "types" are specified. For each size ("n") +*> and each type of matrix, one matrix will be generated and used +*> to test the appropriate drivers. For each matrix and each +*> driver routine called, the following tests will be performed: +*> +*> (1) | A - Z D Z' | / ( |A| n ulp ) +*> +*> (2) | I - Z Z' | / ( n ulp ) +*> +*> (3) | D1 - D2 | / ( |D1| ulp ) +*> +*> where Z is the matrix of eigenvectors returned when the +*> eigenvector option is given and D1 and D2 are the eigenvalues +*> returned with and without the eigenvector option. +*> +*> The "sizes" are specified by an array NN(1:NSIZES); the value of +*> each element NN(j) specifies one size. +*> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); +*> if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. +*> Currently, the list of possible types is: +*> +*> (1) The zero matrix. +*> (2) The identity matrix. +*> +*> (3) A diagonal matrix with evenly spaced entries +*> 1, ..., ULP and random signs. +*> (ULP = (first number larger than 1) - 1 ) +*> (4) A diagonal matrix with geometrically spaced entries +*> 1, ..., ULP and random signs. +*> (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP +*> and random signs. +*> +*> (6) Same as (4), but multiplied by SQRT( overflow threshold ) +*> (7) Same as (4), but multiplied by SQRT( underflow threshold ) +*> +*> (8) A matrix of the form U* D U, where U is unitary and +*> D has evenly spaced entries 1, ..., ULP with random signs +*> on the diagonal. +*> +*> (9) A matrix of the form U* D U, where U is unitary and +*> D has geometrically spaced entries 1, ..., ULP with random +*> signs on the diagonal. +*> +*> (10) A matrix of the form U* D U, where U is unitary and +*> D has "clustered" entries 1, ULP,..., ULP with random +*> signs on the diagonal. +*> +*> (11) Same as (8), but multiplied by SQRT( overflow threshold ) +*> (12) Same as (8), but multiplied by SQRT( underflow threshold ) +*> +*> (13) Symmetric matrix with random entries chosen from (-1,1). +*> (14) Same as (13), but multiplied by SQRT( overflow threshold ) +*> (15) Same as (13), but multiplied by SQRT( underflow threshold ) +*> (16) A band matrix with half bandwidth randomly chosen between +*> 0 and N-1, with evenly spaced eigenvalues 1, ..., ULP +*> with random signs. +*> (17) Same as (16), but multiplied by SQRT( overflow threshold ) +*> (18) Same as (16), but multiplied by SQRT( underflow threshold ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> NSIZES INTEGER +*> The number of sizes of matrices to use. If it is zero, +*> CDRVST2STG does nothing. It must be at least zero. +*> Not modified. +*> +*> NN INTEGER array, dimension (NSIZES) +*> An array containing the sizes to be used for the matrices. +*> Zero values will be skipped. The values must be at least +*> zero. +*> Not modified. +*> +*> NTYPES INTEGER +*> The number of elements in DOTYPE. If it is zero, CDRVST2STG +*> does nothing. It must be at least zero. If it is MAXTYP+1 +*> and NSIZES is 1, then an additional type, MAXTYP+1 is +*> defined, which is to use whatever matrix is in A. This +*> is only useful if DOTYPE(1:MAXTYP) is .FALSE. and +*> DOTYPE(MAXTYP+1) is .TRUE. . +*> Not modified. +*> +*> DOTYPE LOGICAL array, dimension (NTYPES) +*> If DOTYPE(j) is .TRUE., then for each size in NN a +*> matrix of that size and of type j will be generated. +*> If NTYPES is smaller than the maximum number of types +*> defined (PARAMETER MAXTYP), then types NTYPES+1 through +*> MAXTYP will not be generated. If NTYPES is larger +*> than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) +*> will be ignored. +*> Not modified. +*> +*> ISEED INTEGER array, dimension (4) +*> On entry ISEED specifies the seed of the random number +*> generator. The array elements should be between 0 and 4095; +*> if not they will be reduced mod 4096. Also, ISEED(4) must +*> be odd. The random number generator uses a linear +*> congruential sequence limited to small integers, and so +*> should produce machine independent random numbers. The +*> values of ISEED are changed on exit, and can be used in the +*> next call to CDRVST2STG to continue the same random number +*> sequence. +*> Modified. +*> +*> THRESH REAL +*> A test will count as "failed" if the "error", computed as +*> described above, exceeds THRESH. Note that the error +*> is scaled to be O(1), so THRESH should be a reasonably +*> small multiple of 1, e.g., 10 or 100. In particular, +*> it should not depend on the precision (single vs. double) +*> or the size of the matrix. It must be at least zero. +*> Not modified. +*> +*> NOUNIT INTEGER +*> The FORTRAN unit number for printing out error messages +*> (e.g., if a routine returns IINFO not equal to 0.) +*> Not modified. +*> +*> A COMPLEX array, dimension (LDA , max(NN)) +*> Used to hold the matrix whose eigenvalues are to be +*> computed. On exit, A contains the last matrix actually +*> used. +*> Modified. +*> +*> LDA INTEGER +*> The leading dimension of A. It must be at +*> least 1 and at least max( NN ). +*> Not modified. +*> +*> D1 REAL array, dimension (max(NN)) +*> The eigenvalues of A, as computed by CSTEQR simlutaneously +*> with Z. On exit, the eigenvalues in D1 correspond with the +*> matrix in A. +*> Modified. +*> +*> D2 REAL array, dimension (max(NN)) +*> The eigenvalues of A, as computed by CSTEQR if Z is not +*> computed. On exit, the eigenvalues in D2 correspond with +*> the matrix in A. +*> Modified. +*> +*> D3 REAL array, dimension (max(NN)) +*> The eigenvalues of A, as computed by SSTERF. On exit, the +*> eigenvalues in D3 correspond with the matrix in A. +*> Modified. +*> +*> WA1 REAL array, dimension +*> +*> WA2 REAL array, dimension +*> +*> WA3 REAL array, dimension +*> +*> U COMPLEX array, dimension (LDU, max(NN)) +*> The unitary matrix computed by CHETRD + CUNGC3. +*> Modified. +*> +*> LDU INTEGER +*> The leading dimension of U, Z, and V. It must be at +*> least 1 and at least max( NN ). +*> Not modified. +*> +*> V COMPLEX array, dimension (LDU, max(NN)) +*> The Housholder vectors computed by CHETRD in reducing A to +*> tridiagonal form. +*> Modified. +*> +*> TAU COMPLEX array, dimension (max(NN)) +*> The Householder factors computed by CHETRD in reducing A +*> to tridiagonal form. +*> Modified. +*> +*> Z COMPLEX array, dimension (LDU, max(NN)) +*> The unitary matrix of eigenvectors computed by CHEEVD, +*> CHEEVX, CHPEVD, CHPEVX, CHBEVD, and CHBEVX. +*> Modified. +*> +*> WORK - COMPLEX array of dimension ( LWORK ) +*> Workspace. +*> Modified. +*> +*> LWORK - INTEGER +*> The number of entries in WORK. This must be at least +*> 2*max( NN(j), 2 )**2. +*> Not modified. +*> +*> RWORK REAL array, dimension (3*max(NN)) +*> Workspace. +*> Modified. +*> +*> LRWORK - INTEGER +*> The number of entries in RWORK. +*> +*> IWORK INTEGER array, dimension (6*max(NN)) +*> Workspace. +*> Modified. +*> +*> LIWORK - INTEGER +*> The number of entries in IWORK. +*> +*> RESULT REAL array, dimension (??) +*> The values computed by the tests described above. +*> The values are currently limited to 1/ulp, to avoid +*> overflow. +*> Modified. +*> +*> INFO INTEGER +*> If 0, then everything ran OK. +*> -1: NSIZES < 0 +*> -2: Some NN(j) < 0 +*> -3: NTYPES < 0 +*> -5: THRESH < 0 +*> -9: LDA < 1 or LDA < NMAX, where NMAX is max( NN(j) ). +*> -16: LDU < 1 or LDU < NMAX. +*> -21: LWORK too small. +*> If SLATMR, SLATMS, CHETRD, SORGC3, CSTEQR, SSTERF, +*> or SORMC2 returns an error code, the +*> absolute value of it is returned. +*> Modified. +*> +*>----------------------------------------------------------------------- +*> +*> Some Local Variables and Parameters: +*> ---- ----- --------- --- ---------- +*> ZERO, ONE Real 0 and 1. +*> MAXTYP The number of types defined. +*> NTEST The number of tests performed, or which can +*> be performed so far, for the current matrix. +*> NTESTT The total number of tests performed so far. +*> NMAX Largest value in NN. +*> NMATS The number of matrices generated so far. +*> NERRS The number of tests which have exceeded THRESH +*> so far (computed by SLAFTS). +*> COND, IMODE Values to be passed to the matrix generators. +*> ANORM Norm of A; passed to matrix generators. +*> +*> OVFL, UNFL Overflow and underflow thresholds. +*> ULP, ULPINV Finest relative precision and its inverse. +*> RTOVFL, RTUNFL Square roots of the previous 2 values. +*> The following four arrays decode JTYPE: +*> KTYPE(j) The general type (1-10) for type "j". +*> KMODE(j) The MODE value to be passed to the matrix +*> generator for type "j". +*> KMAGN(j) The order of magnitude ( O(1), +*> O(overflow^(1/2) ), O(underflow^(1/2) ) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex_eig +* +* ===================================================================== + SUBROUTINE CDRVST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, + $ NOUNIT, A, LDA, D1, D2, D3, WA1, WA2, WA3, U, + $ LDU, V, TAU, Z, WORK, LWORK, RWORK, LRWORK, + $ IWORK, LIWORK, RESULT, INFO ) +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDU, LIWORK, LRWORK, LWORK, NOUNIT, + $ NSIZES, NTYPES + REAL THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER ISEED( 4 ), IWORK( * ), NN( * ) + REAL D1( * ), D2( * ), D3( * ), RESULT( * ), + $ RWORK( * ), WA1( * ), WA2( * ), WA3( * ) + COMPLEX A( LDA, * ), TAU( * ), U( LDU, * ), + $ V( LDU, * ), WORK( * ), Z( LDU, * ) +* .. +* +* ===================================================================== +* +* +* .. Parameters .. + REAL ZERO, ONE, TWO, TEN + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0, + $ TEN = 10.0E+0 ) + REAL HALF + PARAMETER ( HALF = ONE / TWO ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) + INTEGER MAXTYP + PARAMETER ( MAXTYP = 18 ) +* .. +* .. Local Scalars .. + LOGICAL BADNN + CHARACTER UPLO + INTEGER I, IDIAG, IHBW, IINFO, IL, IMODE, INDWRK, INDX, + $ IROW, ITEMP, ITYPE, IU, IUPLO, J, J1, J2, JCOL, + $ JSIZE, JTYPE, KD, LGN, LIWEDC, LRWEDC, LWEDC, + $ M, M2, M3, MTYPES, N, NERRS, NMATS, NMAX, + $ NTEST, NTESTT + REAL ABSTOL, ANINV, ANORM, COND, OVFL, RTOVFL, + $ RTUNFL, TEMP1, TEMP2, TEMP3, ULP, ULPINV, UNFL, + $ VL, VU +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ), IOLDSD( 4 ), ISEED2( 4 ), + $ ISEED3( 4 ), KMAGN( MAXTYP ), KMODE( MAXTYP ), + $ KTYPE( MAXTYP ) +* .. +* .. External Functions .. + REAL SLAMCH, SLARND, SSXT1 + EXTERNAL SLAMCH, SLARND, SSXT1 +* .. +* .. External Subroutines .. + EXTERNAL ALASVM, SLABAD, SLAFTS, XERBLA, CHBEV, CHBEVD, + $ CHBEVX, CHEEV, CHEEVD, CHEEVR, CHEEVX, CHET21, + $ CHET22, CHPEV, CHPEVD, CHPEVX, CLACPY, CLASET, + $ CHEEVD_2STAGE, CHEEVR_2STAGE, CHEEVX_2STAGE, + $ CHEEV_2STAGE, CHBEV_2STAGE, CHBEVD_2STAGE, + $ CHBEVX_2STAGE, CHETRD_2STAGE, CHETRD_SY2SB, + $ CHETRD_SB2ST, CLATMR, CLATMS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, INT, LOG, MAX, MIN, SQRT +* .. +* .. Data statements .. + DATA KTYPE / 1, 2, 5*4, 5*5, 3*8, 3*9 / + DATA KMAGN / 2*1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, + $ 2, 3, 1, 2, 3 / + DATA KMODE / 2*0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, + $ 0, 0, 4, 4, 4 / +* .. +* .. Executable Statements .. +* +* 1) Check for errors +* + NTESTT = 0 + INFO = 0 +* + BADNN = .FALSE. + NMAX = 1 + DO 10 J = 1, NSIZES + NMAX = MAX( NMAX, NN( J ) ) + IF( NN( J ).LT.0 ) + $ BADNN = .TRUE. + 10 CONTINUE +* +* Check for errors +* + IF( NSIZES.LT.0 ) THEN + INFO = -1 + ELSE IF( BADNN ) THEN + INFO = -2 + ELSE IF( NTYPES.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.NMAX ) THEN + INFO = -9 + ELSE IF( LDU.LT.NMAX ) THEN + INFO = -16 + ELSE IF( 2*MAX( 2, NMAX )**2.GT.LWORK ) THEN + INFO = -22 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CDRVST2STG', -INFO ) + RETURN + END IF +* +* Quick return if nothing to do +* + IF( NSIZES.EQ.0 .OR. NTYPES.EQ.0 ) + $ RETURN +* +* More Important constants +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = SLAMCH( 'Overflow' ) + CALL SLABAD( UNFL, OVFL ) + ULP = SLAMCH( 'Epsilon' )*SLAMCH( 'Base' ) + ULPINV = ONE / ULP + RTUNFL = SQRT( UNFL ) + RTOVFL = SQRT( OVFL ) +* +* Loop over sizes, types +* + DO 20 I = 1, 4 + ISEED2( I ) = ISEED( I ) + ISEED3( I ) = ISEED( I ) + 20 CONTINUE +* + NERRS = 0 + NMATS = 0 +* + DO 1220 JSIZE = 1, NSIZES + N = NN( JSIZE ) + IF( N.GT.0 ) THEN + LGN = INT( LOG( REAL( N ) ) / LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + LWEDC = MAX( 2*N+N*N, 2*N*N ) + LRWEDC = 1 + 4*N + 2*N*LGN + 3*N**2 + LIWEDC = 3 + 5*N + ELSE + LWEDC = 2 + LRWEDC = 8 + LIWEDC = 8 + END IF + ANINV = ONE / REAL( MAX( 1, N ) ) +* + IF( NSIZES.NE.1 ) THEN + MTYPES = MIN( MAXTYP, NTYPES ) + ELSE + MTYPES = MIN( MAXTYP+1, NTYPES ) + END IF +* + DO 1210 JTYPE = 1, MTYPES + IF( .NOT.DOTYPE( JTYPE ) ) + $ GO TO 1210 + NMATS = NMATS + 1 + NTEST = 0 +* + DO 30 J = 1, 4 + IOLDSD( J ) = ISEED( J ) + 30 CONTINUE +* +* 2) Compute "A" +* +* Control parameters: +* +* KMAGN KMODE KTYPE +* =1 O(1) clustered 1 zero +* =2 large clustered 2 identity +* =3 small exponential (none) +* =4 arithmetic diagonal, (w/ eigenvalues) +* =5 random log Hermitian, w/ eigenvalues +* =6 random (none) +* =7 random diagonal +* =8 random Hermitian +* =9 band Hermitian, w/ eigenvalues +* + IF( MTYPES.GT.MAXTYP ) + $ GO TO 110 +* + ITYPE = KTYPE( JTYPE ) + IMODE = KMODE( JTYPE ) +* +* Compute norm +* + GO TO ( 40, 50, 60 )KMAGN( JTYPE ) +* + 40 CONTINUE + ANORM = ONE + GO TO 70 +* + 50 CONTINUE + ANORM = ( RTOVFL*ULP )*ANINV + GO TO 70 +* + 60 CONTINUE + ANORM = RTUNFL*N*ULPINV + GO TO 70 +* + 70 CONTINUE +* + CALL CLASET( 'Full', LDA, N, CZERO, CZERO, A, LDA ) + IINFO = 0 + COND = ULPINV +* +* Special Matrices -- Identity & Jordan block +* +* Zero +* + IF( ITYPE.EQ.1 ) THEN + IINFO = 0 +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Identity +* + DO 80 JCOL = 1, N + A( JCOL, JCOL ) = ANORM + 80 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Diagonal Matrix, [Eigen]values Specified +* + CALL CLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, COND, + $ ANORM, 0, 0, 'N', A, LDA, WORK, IINFO ) +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Hermitian, eigenvalues specified +* + CALL CLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, COND, + $ ANORM, N, N, 'N', A, LDA, WORK, IINFO ) +* + ELSE IF( ITYPE.EQ.7 ) THEN +* +* Diagonal, random eigenvalues +* + CALL CLATMR( N, N, 'S', ISEED, 'H', WORK, 6, ONE, CONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, 0, 0, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.8 ) THEN +* +* Hermitian, random eigenvalues +* + CALL CLATMR( N, N, 'S', ISEED, 'H', WORK, 6, ONE, CONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, N, N, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.9 ) THEN +* +* Hermitian banded, eigenvalues specified +* + IHBW = INT( ( N-1 )*SLARND( 1, ISEED3 ) ) + CALL CLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, COND, + $ ANORM, IHBW, IHBW, 'Z', U, LDU, WORK, + $ IINFO ) +* +* Store as dense matrix for most routines. +* + CALL CLASET( 'Full', LDA, N, CZERO, CZERO, A, LDA ) + DO 100 IDIAG = -IHBW, IHBW + IROW = IHBW - IDIAG + 1 + J1 = MAX( 1, IDIAG+1 ) + J2 = MIN( N, N+IDIAG ) + DO 90 J = J1, J2 + I = J - IDIAG + A( I, J ) = U( IROW, J ) + 90 CONTINUE + 100 CONTINUE + ELSE + IINFO = 1 + END IF +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'Generator', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + RETURN + END IF +* + 110 CONTINUE +* + ABSTOL = UNFL + UNFL + IF( N.LE.1 ) THEN + IL = 1 + IU = N + ELSE + IL = 1 + INT( ( N-1 )*SLARND( 1, ISEED2 ) ) + IU = 1 + INT( ( N-1 )*SLARND( 1, ISEED2 ) ) + IF( IL.GT.IU ) THEN + ITEMP = IL + IL = IU + IU = ITEMP + END IF + END IF +* +* Perform tests storing upper or lower triangular +* part of matrix. +* + DO 1200 IUPLO = 0, 1 + IF( IUPLO.EQ.0 ) THEN + UPLO = 'L' + ELSE + UPLO = 'U' + END IF +* +* Call CHEEVD and CHEEVX. +* + CALL CLACPY( ' ', N, N, A, LDA, V, LDU ) +* + NTEST = NTEST + 1 + CALL CHEEVD( 'V', UPLO, N, A, LDU, D1, WORK, LWEDC, + $ RWORK, LRWEDC, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHEEVD(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 130 + END IF + END IF +* +* Do tests 1 and 2. +* + CALL CHET21( 1, UPLO, N, 0, V, LDU, D1, D2, A, LDU, Z, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + CALL CLACPY( ' ', N, N, V, LDU, A, LDA ) +* + NTEST = NTEST + 2 + CALL CHEEVD_2STAGE( 'N', UPLO, N, A, LDU, D3, WORK, + $ LWORK, RWORK, LRWEDC, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'CHEEVD_2STAGE(N,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 130 + END IF + END IF +* +* Do test 3. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 120 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 120 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 130 CONTINUE + CALL CLACPY( ' ', N, N, V, LDU, A, LDA ) +* + NTEST = NTEST + 1 +* + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( D1( 1 ) ), ABS( D1( N ) ) ) + IF( IL.NE.1 ) THEN + VL = D1( IL ) - MAX( HALF*( D1( IL )-D1( IL-1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + ELSE IF( N.GT.0 ) THEN + VL = D1( 1 ) - MAX( HALF*( D1( N )-D1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = D1( IU ) + MAX( HALF*( D1( IU+1 )-D1( IU ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + ELSE IF( N.GT.0 ) THEN + VU = D1( N ) + MAX( HALF*( D1( N )-D1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + ELSE + TEMP3 = ZERO + VL = ZERO + VU = ONE + END IF +* + CALL CHEEVX( 'V', 'A', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M, WA1, Z, LDU, WORK, LWORK, RWORK, + $ IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHEEVX(V,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 150 + END IF + END IF +* +* Do tests 4 and 5. +* + CALL CLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL CHET21( 1, UPLO, N, 0, A, LDU, WA1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + CALL CHEEVX_2STAGE( 'N', 'A', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M2, WA2, Z, LDU, + $ WORK, LWORK, RWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'CHEEVX_2STAGE(N,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 150 + END IF + END IF +* +* Do test 6. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 140 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA1( J ) ), ABS( WA2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA1( J )-WA2( J ) ) ) + 140 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 150 CONTINUE + CALL CLACPY( ' ', N, N, V, LDU, A, LDA ) +* + NTEST = NTEST + 1 +* + CALL CHEEVX( 'V', 'I', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, WORK, LWORK, RWORK, + $ IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHEEVX(V,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 160 + END IF + END IF +* +* Do tests 7 and 8. +* + CALL CLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL CHET22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + CALL CHEEVX_2STAGE( 'N', 'I', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M3, WA3, Z, LDU, + $ WORK, LWORK, RWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'CHEEVX_2STAGE(N,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 160 + END IF + END IF +* +* Do test 9. +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 160 CONTINUE + CALL CLACPY( ' ', N, N, V, LDU, A, LDA ) +* + NTEST = NTEST + 1 +* + CALL CHEEVX( 'V', 'V', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, WORK, LWORK, RWORK, + $ IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHEEVX(V,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 170 + END IF + END IF +* +* Do tests 10 and 11. +* + CALL CLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL CHET22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + CALL CHEEVX_2STAGE( 'N', 'V', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M3, WA3, Z, LDU, + $ WORK, LWORK, RWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'CHEEVX_2STAGE(N,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 170 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.GT.0 ) THEN + RESULT( NTEST ) = ULPINV + GO TO 170 + END IF +* +* Do test 12. +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 170 CONTINUE +* +* Call CHPEVD and CHPEVX. +* + CALL CLACPY( ' ', N, N, V, LDU, A, LDA ) +* +* Load array WORK with the upper or lower triangular +* part of the matrix in packed form. +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 190 J = 1, N + DO 180 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 180 CONTINUE + 190 CONTINUE + ELSE + INDX = 1 + DO 210 J = 1, N + DO 200 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 200 CONTINUE + 210 CONTINUE + END IF +* + NTEST = NTEST + 1 + INDWRK = N*( N+1 ) / 2 + 1 + CALL CHPEVD( 'V', UPLO, N, WORK, D1, Z, LDU, + $ WORK( INDWRK ), LWEDC, RWORK, LRWEDC, IWORK, + $ LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHPEVD(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 270 + END IF + END IF +* +* Do tests 13 and 14. +* + CALL CHET21( 1, UPLO, N, 0, A, LDA, D1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 230 J = 1, N + DO 220 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 220 CONTINUE + 230 CONTINUE + ELSE + INDX = 1 + DO 250 J = 1, N + DO 240 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 240 CONTINUE + 250 CONTINUE + END IF +* + NTEST = NTEST + 2 + INDWRK = N*( N+1 ) / 2 + 1 + CALL CHPEVD( 'N', UPLO, N, WORK, D3, Z, LDU, + $ WORK( INDWRK ), LWEDC, RWORK, LRWEDC, IWORK, + $ LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHPEVD(N,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 270 + END IF + END IF +* +* Do test 15. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 260 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 260 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* +* Load array WORK with the upper or lower triangular part +* of the matrix in packed form. +* + 270 CONTINUE + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 290 J = 1, N + DO 280 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 280 CONTINUE + 290 CONTINUE + ELSE + INDX = 1 + DO 310 J = 1, N + DO 300 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 300 CONTINUE + 310 CONTINUE + END IF +* + NTEST = NTEST + 1 +* + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( D1( 1 ) ), ABS( D1( N ) ) ) + IF( IL.NE.1 ) THEN + VL = D1( IL ) - MAX( HALF*( D1( IL )-D1( IL-1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + ELSE IF( N.GT.0 ) THEN + VL = D1( 1 ) - MAX( HALF*( D1( N )-D1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = D1( IU ) + MAX( HALF*( D1( IU+1 )-D1( IU ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + ELSE IF( N.GT.0 ) THEN + VU = D1( N ) + MAX( HALF*( D1( N )-D1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + ELSE + TEMP3 = ZERO + VL = ZERO + VU = ONE + END IF +* + CALL CHPEVX( 'V', 'A', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M, WA1, Z, LDU, V, RWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHPEVX(V,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 370 + END IF + END IF +* +* Do tests 16 and 17. +* + CALL CHET21( 1, UPLO, N, 0, A, LDU, WA1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 330 J = 1, N + DO 320 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 320 CONTINUE + 330 CONTINUE + ELSE + INDX = 1 + DO 350 J = 1, N + DO 340 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 340 CONTINUE + 350 CONTINUE + END IF +* + CALL CHPEVX( 'N', 'A', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, V, RWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHPEVX(N,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 370 + END IF + END IF +* +* Do test 18. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 360 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA1( J ) ), ABS( WA2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA1( J )-WA2( J ) ) ) + 360 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 370 CONTINUE + NTEST = NTEST + 1 + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 390 J = 1, N + DO 380 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 380 CONTINUE + 390 CONTINUE + ELSE + INDX = 1 + DO 410 J = 1, N + DO 400 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 400 CONTINUE + 410 CONTINUE + END IF +* + CALL CHPEVX( 'V', 'I', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, V, RWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHPEVX(V,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 460 + END IF + END IF +* +* Do tests 19 and 20. +* + CALL CHET22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 430 J = 1, N + DO 420 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 420 CONTINUE + 430 CONTINUE + ELSE + INDX = 1 + DO 450 J = 1, N + DO 440 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 440 CONTINUE + 450 CONTINUE + END IF +* + CALL CHPEVX( 'N', 'I', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M3, WA3, Z, LDU, V, RWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHPEVX(N,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 460 + END IF + END IF +* +* Do test 21. +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 460 CONTINUE + NTEST = NTEST + 1 + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 480 J = 1, N + DO 470 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 470 CONTINUE + 480 CONTINUE + ELSE + INDX = 1 + DO 500 J = 1, N + DO 490 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 490 CONTINUE + 500 CONTINUE + END IF +* + CALL CHPEVX( 'V', 'V', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, V, RWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHPEVX(V,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 550 + END IF + END IF +* +* Do tests 22 and 23. +* + CALL CHET22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 520 J = 1, N + DO 510 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 510 CONTINUE + 520 CONTINUE + ELSE + INDX = 1 + DO 540 J = 1, N + DO 530 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 530 CONTINUE + 540 CONTINUE + END IF +* + CALL CHPEVX( 'N', 'V', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M3, WA3, Z, LDU, V, RWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHPEVX(N,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 550 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.GT.0 ) THEN + RESULT( NTEST ) = ULPINV + GO TO 550 + END IF +* +* Do test 24. +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 550 CONTINUE +* +* Call CHBEVD and CHBEVX. +* + IF( JTYPE.LE.7 ) THEN + KD = 0 + ELSE IF( JTYPE.GE.8 .AND. JTYPE.LE.15 ) THEN + KD = MAX( N-1, 0 ) + ELSE + KD = IHBW + END IF +* +* Load array V with the upper or lower triangular part +* of the matrix in band form. +* + IF( IUPLO.EQ.1 ) THEN + DO 570 J = 1, N + DO 560 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 560 CONTINUE + 570 CONTINUE + ELSE + DO 590 J = 1, N + DO 580 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 580 CONTINUE + 590 CONTINUE + END IF +* + NTEST = NTEST + 1 + CALL CHBEVD( 'V', UPLO, N, KD, V, LDU, D1, Z, LDU, WORK, + $ LWEDC, RWORK, LRWEDC, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9998 )'CHBEVD(V,' // UPLO // + $ ')', IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 650 + END IF + END IF +* +* Do tests 25 and 26. +* + CALL CHET21( 1, UPLO, N, 0, A, LDA, D1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + IF( IUPLO.EQ.1 ) THEN + DO 610 J = 1, N + DO 600 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 600 CONTINUE + 610 CONTINUE + ELSE + DO 630 J = 1, N + DO 620 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 620 CONTINUE + 630 CONTINUE + END IF +* + NTEST = NTEST + 2 + CALL CHBEVD_2STAGE( 'N', UPLO, N, KD, V, LDU, D3, + $ Z, LDU, WORK, LWORK, RWORK, + $ LRWEDC, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9998 ) + $ 'CHBEVD_2STAGE(N,' // UPLO // + $ ')', IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 650 + END IF + END IF +* +* Do test 27. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 640 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 640 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* +* Load array V with the upper or lower triangular part +* of the matrix in band form. +* + 650 CONTINUE + IF( IUPLO.EQ.1 ) THEN + DO 670 J = 1, N + DO 660 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 660 CONTINUE + 670 CONTINUE + ELSE + DO 690 J = 1, N + DO 680 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 680 CONTINUE + 690 CONTINUE + END IF +* + NTEST = NTEST + 1 + CALL CHBEVX( 'V', 'A', UPLO, N, KD, V, LDU, U, LDU, VL, + $ VU, IL, IU, ABSTOL, M, WA1, Z, LDU, WORK, + $ RWORK, IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHBEVX(V,A,' // UPLO // + $ ')', IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 750 + END IF + END IF +* +* Do tests 28 and 29. +* + CALL CHET21( 1, UPLO, N, 0, A, LDU, WA1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + DO 710 J = 1, N + DO 700 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 700 CONTINUE + 710 CONTINUE + ELSE + DO 730 J = 1, N + DO 720 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 720 CONTINUE + 730 CONTINUE + END IF +* + CALL CHBEVX_2STAGE( 'N', 'A', UPLO, N, KD, V, LDU, + $ U, LDU, VL, VU, IL, IU, ABSTOL, + $ M2, WA2, Z, LDU, WORK, LWORK, + $ RWORK, IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9998 ) + $ 'CHBEVX_2STAGE(N,A,' // UPLO // + $ ')', IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 750 + END IF + END IF +* +* Do test 30. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 740 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA1( J ) ), ABS( WA2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA1( J )-WA2( J ) ) ) + 740 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* +* Load array V with the upper or lower triangular part +* of the matrix in band form. +* + 750 CONTINUE + NTEST = NTEST + 1 + IF( IUPLO.EQ.1 ) THEN + DO 770 J = 1, N + DO 760 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 760 CONTINUE + 770 CONTINUE + ELSE + DO 790 J = 1, N + DO 780 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 780 CONTINUE + 790 CONTINUE + END IF +* + CALL CHBEVX( 'V', 'I', UPLO, N, KD, V, LDU, U, LDU, VL, + $ VU, IL, IU, ABSTOL, M2, WA2, Z, LDU, WORK, + $ RWORK, IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9998 )'CHBEVX(V,I,' // UPLO // + $ ')', IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 840 + END IF + END IF +* +* Do tests 31 and 32. +* + CALL CHET22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + DO 810 J = 1, N + DO 800 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 800 CONTINUE + 810 CONTINUE + ELSE + DO 830 J = 1, N + DO 820 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 820 CONTINUE + 830 CONTINUE + END IF + CALL CHBEVX_2STAGE( 'N', 'I', UPLO, N, KD, V, LDU, + $ U, LDU, VL, VU, IL, IU, ABSTOL, + $ M3, WA3, Z, LDU, WORK, LWORK, + $ RWORK, IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9998 ) + $ 'CHBEVX_2STAGE(N,I,' // UPLO // + $ ')', IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 840 + END IF + END IF +* +* Do test 33. +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* +* Load array V with the upper or lower triangular part +* of the matrix in band form. +* + 840 CONTINUE + NTEST = NTEST + 1 + IF( IUPLO.EQ.1 ) THEN + DO 860 J = 1, N + DO 850 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 850 CONTINUE + 860 CONTINUE + ELSE + DO 880 J = 1, N + DO 870 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 870 CONTINUE + 880 CONTINUE + END IF + CALL CHBEVX( 'V', 'V', UPLO, N, KD, V, LDU, U, LDU, VL, + $ VU, IL, IU, ABSTOL, M2, WA2, Z, LDU, WORK, + $ RWORK, IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9998 )'CHBEVX(V,V,' // UPLO // + $ ')', IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 930 + END IF + END IF +* +* Do tests 34 and 35. +* + CALL CHET22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + DO 900 J = 1, N + DO 890 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 890 CONTINUE + 900 CONTINUE + ELSE + DO 920 J = 1, N + DO 910 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 910 CONTINUE + 920 CONTINUE + END IF + CALL CHBEVX_2STAGE( 'N', 'V', UPLO, N, KD, V, LDU, + $ U, LDU, VL, VU, IL, IU, ABSTOL, + $ M3, WA3, Z, LDU, WORK, LWORK, + $ RWORK, IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9998 ) + $ 'CHBEVX_2STAGE(N,V,' // UPLO // + $ ')', IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 930 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.GT.0 ) THEN + RESULT( NTEST ) = ULPINV + GO TO 930 + END IF +* +* Do test 36. +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 930 CONTINUE +* +* Call CHEEV +* + CALL CLACPY( ' ', N, N, A, LDA, V, LDU ) +* + NTEST = NTEST + 1 + CALL CHEEV( 'V', UPLO, N, A, LDU, D1, WORK, LWORK, RWORK, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHEEV(V,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 950 + END IF + END IF +* +* Do tests 37 and 38 +* + CALL CHET21( 1, UPLO, N, 0, V, LDU, D1, D2, A, LDU, Z, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + CALL CLACPY( ' ', N, N, V, LDU, A, LDA ) +* + NTEST = NTEST + 2 + CALL CHEEV_2STAGE( 'N', UPLO, N, A, LDU, D3, + $ WORK, LWORK, RWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'CHEEV_2STAGE(N,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 950 + END IF + END IF +* +* Do test 39 +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 940 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 940 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 950 CONTINUE +* + CALL CLACPY( ' ', N, N, V, LDU, A, LDA ) +* +* Call CHPEV +* +* Load array WORK with the upper or lower triangular +* part of the matrix in packed form. +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 970 J = 1, N + DO 960 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 960 CONTINUE + 970 CONTINUE + ELSE + INDX = 1 + DO 990 J = 1, N + DO 980 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 980 CONTINUE + 990 CONTINUE + END IF +* + NTEST = NTEST + 1 + INDWRK = N*( N+1 ) / 2 + 1 + CALL CHPEV( 'V', UPLO, N, WORK, D1, Z, LDU, + $ WORK( INDWRK ), RWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHPEV(V,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1050 + END IF + END IF +* +* Do tests 40 and 41. +* + CALL CHET21( 1, UPLO, N, 0, A, LDA, D1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 1010 J = 1, N + DO 1000 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1000 CONTINUE + 1010 CONTINUE + ELSE + INDX = 1 + DO 1030 J = 1, N + DO 1020 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1020 CONTINUE + 1030 CONTINUE + END IF +* + NTEST = NTEST + 2 + INDWRK = N*( N+1 ) / 2 + 1 + CALL CHPEV( 'N', UPLO, N, WORK, D3, Z, LDU, + $ WORK( INDWRK ), RWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHPEV(N,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1050 + END IF + END IF +* +* Do test 42 +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 1040 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 1040 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 1050 CONTINUE +* +* Call CHBEV +* + IF( JTYPE.LE.7 ) THEN + KD = 0 + ELSE IF( JTYPE.GE.8 .AND. JTYPE.LE.15 ) THEN + KD = MAX( N-1, 0 ) + ELSE + KD = IHBW + END IF +* +* Load array V with the upper or lower triangular part +* of the matrix in band form. +* + IF( IUPLO.EQ.1 ) THEN + DO 1070 J = 1, N + DO 1060 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1060 CONTINUE + 1070 CONTINUE + ELSE + DO 1090 J = 1, N + DO 1080 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1080 CONTINUE + 1090 CONTINUE + END IF +* + NTEST = NTEST + 1 + CALL CHBEV( 'V', UPLO, N, KD, V, LDU, D1, Z, LDU, WORK, + $ RWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9998 )'CHBEV(V,' // UPLO // ')', + $ IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1140 + END IF + END IF +* +* Do tests 43 and 44. +* + CALL CHET21( 1, UPLO, N, 0, A, LDA, D1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + IF( IUPLO.EQ.1 ) THEN + DO 1110 J = 1, N + DO 1100 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1100 CONTINUE + 1110 CONTINUE + ELSE + DO 1130 J = 1, N + DO 1120 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1120 CONTINUE + 1130 CONTINUE + END IF +* + NTEST = NTEST + 2 + CALL CHBEV_2STAGE( 'N', UPLO, N, KD, V, LDU, D3, Z, LDU, + $ WORK, LWORK, RWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9998 ) + $ 'CHBEV_2STAGE(N,' // UPLO // ')', + $ IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1140 + END IF + END IF +* + 1140 CONTINUE +* +* Do test 45. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 1150 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 1150 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + CALL CLACPY( ' ', N, N, A, LDA, V, LDU ) + NTEST = NTEST + 1 + CALL CHEEVR( 'V', 'A', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M, WA1, Z, LDU, IWORK, WORK, LWORK, + $ RWORK, LRWORK, IWORK( 2*N+1 ), LIWORK-2*N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHEEVR(V,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1170 + END IF + END IF +* +* Do tests 45 and 46 (or ... ) +* + CALL CLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL CHET21( 1, UPLO, N, 0, A, LDU, WA1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + CALL CHEEVR_2STAGE( 'N', 'A', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M2, WA2, Z, LDU, + $ IWORK, WORK, LWORK, RWORK, LRWORK, + $ IWORK( 2*N+1 ), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'CHEEVR_2STAGE(N,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1170 + END IF + END IF +* +* Do test 47 (or ... ) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 1160 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA1( J ) ), ABS( WA2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA1( J )-WA2( J ) ) ) + 1160 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 1170 CONTINUE +* + NTEST = NTEST + 1 + CALL CLACPY( ' ', N, N, V, LDU, A, LDA ) + CALL CHEEVR( 'V', 'I', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, IWORK, WORK, LWORK, + $ RWORK, LRWORK, IWORK( 2*N+1 ), LIWORK-2*N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHEEVR(V,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1180 + END IF + END IF +* +* Do tests 48 and 49 (or +??) +* + CALL CLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL CHET22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + CALL CLACPY( ' ', N, N, V, LDU, A, LDA ) + CALL CHEEVR_2STAGE( 'N', 'I', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M3, WA3, Z, LDU, + $ IWORK, WORK, LWORK, RWORK, LRWORK, + $ IWORK( 2*N+1 ), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'CHEEVR_2STAGE(N,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1180 + END IF + END IF +* +* Do test 50 (or +??) +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, ULP*TEMP3 ) + 1180 CONTINUE +* + NTEST = NTEST + 1 + CALL CLACPY( ' ', N, N, V, LDU, A, LDA ) + CALL CHEEVR( 'V', 'V', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, IWORK, WORK, LWORK, + $ RWORK, LRWORK, IWORK( 2*N+1 ), LIWORK-2*N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'CHEEVR(V,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1190 + END IF + END IF +* +* Do tests 51 and 52 (or +??) +* + CALL CLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL CHET22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + CALL CLACPY( ' ', N, N, V, LDU, A, LDA ) + CALL CHEEVR_2STAGE( 'N', 'V', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M3, WA3, Z, LDU, + $ IWORK, WORK, LWORK, RWORK, LRWORK, + $ IWORK( 2*N+1 ), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'CHEEVR_2STAGE(N,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1190 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.GT.0 ) THEN + RESULT( NTEST ) = ULPINV + GO TO 1190 + END IF +* +* Do test 52 (or +??) +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + CALL CLACPY( ' ', N, N, V, LDU, A, LDA ) +* +* +* +* +* Load array V with the upper or lower triangular part +* of the matrix in band form. +* + 1190 CONTINUE +* + 1200 CONTINUE +* +* End of Loop -- Check for RESULT(j) > THRESH +* + NTESTT = NTESTT + NTEST + CALL SLAFTS( 'CST', N, N, JTYPE, NTEST, RESULT, IOLDSD, + $ THRESH, NOUNIT, NERRS ) +* + 1210 CONTINUE + 1220 CONTINUE +* +* Summary +* + CALL ALASVM( 'CST', NOUNIT, NERRS, NTESTT, 0 ) +* + 9999 FORMAT( ' CDRVST2STG: ', A, ' returned INFO=', I6, / 9X, 'N=', I6, + $ ', JTYPE=', I6, ', ISEED=(', 3( I5, ',' ), I5, ')' ) + 9998 FORMAT( ' CDRVST2STG: ', A, ' returned INFO=', I6, / 9X, 'N=', I6, + $ ', KD=', I6, ', JTYPE=', I6, ', ISEED=(', 3( I5, ',' ), I5, + $ ')' ) +* + RETURN +* +* End of CDRVST2STG +* + END diff --git a/TESTING/EIG/cerrst.f b/TESTING/EIG/cerrst.f index 14e4bfbe..c15bf5f4 100644 --- a/TESTING/EIG/cerrst.f +++ b/TESTING/EIG/cerrst.f @@ -25,6 +25,10 @@ *> CUNGTR, CUPMTR, CSTEQR, CSTEIN, CPTEQR, CHBTRD, *> CHEEV, CHEEVX, CHEEVD, CHBEV, CHBEVX, CHBEVD, *> CHPEV, CHPEVX, CHPEVD, and CSTEDC. +*> CHEEVD_2STAGE, CHEEVR_2STAGE, CHEEVX_2STAGE, +*> CHEEV_2STAGE, CHBEV_2STAGE, CHBEVD_2STAGE, +*> CHBEVX_2STAGE, CHETRD_2STAGE, CHETRD_SY2SB, +*> CHETRD_SB2ST *> \endverbatim * * Arguments: @@ -93,7 +97,11 @@ EXTERNAL CHBEV, CHBEVD, CHBEVX, CHBTRD, CHEEV, CHEEVD, $ CHEEVR, CHEEVX, CHETRD, CHKXER, CHPEV, CHPEVD, $ CHPEVX, CHPTRD, CPTEQR, CSTEDC, CSTEIN, CSTEQR, - $ CUNGTR, CUNMTR, CUPGTR, CUPMTR + $ CUNGTR, CUNMTR, CUPGTR, CUPMTR, + $ CHEEVD_2STAGE, CHEEVR_2STAGE, CHEEVX_2STAGE, + $ CHEEV_2STAGE, CHBEV_2STAGE, CHBEVD_2STAGE, + $ CHBEVX_2STAGE, CHETRD_2STAGE, CHETRD_SY2SB, + $ CHETRD_SB2ST * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -151,6 +159,103 @@ CALL CHKXER( 'CHETRD', INFOT, NOUT, LERR, OK ) NT = NT + 4 * +* CHETRD_2STAGE +* + SRNAMT = 'CHETRD_2STAGE' + INFOT = 1 + CALL CHETRD_2STAGE( '/', 'U', 0, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL CHETRD_2STAGE( 'H', 'U', 0, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CHETRD_2STAGE( 'N', '/', 0, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CHETRD_2STAGE( 'N', 'U', -1, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CHETRD_2STAGE( 'N', 'U', 2, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL CHETRD_2STAGE( 'N', 'U', 0, A, 1, D, E, TAU, + $ C, 0, W, 1, INFO ) + CALL CHKXER( 'CHETRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 12 + CALL CHETRD_2STAGE( 'N', 'U', 0, A, 1, D, E, TAU, + $ C, 1, W, 0, INFO ) + CALL CHKXER( 'CHETRD_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 7 +* +* CHETRD_HE2HB +* + SRNAMT = 'CHETRD_HE2HB' + INFOT = 1 + CALL CHETRD_HE2HB( '/', 0, 0, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HE2HB', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CHETRD_HE2HB( 'U', -1, 0, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HE2HB', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CHETRD_HE2HB( 'U', 0, -1, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HE2HB', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CHETRD_HE2HB( 'U', 2, 0, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HE2HB', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL CHETRD_HE2HB( 'U', 0, 2, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HE2HB', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL CHETRD_HE2HB( 'U', 0, 0, A, 1, C, 1, TAU, W, 0, INFO ) + CALL CHKXER( 'CHETRD_HE2HB', INFOT, NOUT, LERR, OK ) + NT = NT + 6 +* +* CHETRD_HB2ST +* + SRNAMT = 'CHETRD_HB2ST' + INFOT = 1 + CALL CHETRD_HB2ST( '/', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CHETRD_HB2ST( 'Y', '/', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CHETRD_HB2ST( 'Y', 'H', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CHETRD_HB2ST( 'Y', 'N', '/', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CHETRD_HB2ST( 'Y', 'N', 'U', -1, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CHETRD_HB2ST( 'Y', 'N', 'U', 0, -1, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL CHETRD_HB2ST( 'Y', 'N', 'U', 0, 1, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL CHETRD_HB2ST( 'Y', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 0, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL CHETRD_HB2ST( 'Y', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 0, INFO ) + CALL CHKXER( 'CHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + NT = NT + 9 +* * CUNGTR * SRNAMT = 'CUNGTR' @@ -377,6 +482,63 @@ CALL CHKXER( 'CHEEVD', INFOT, NOUT, LERR, OK ) NT = NT + 12 * +* CHEEVD_2STAGE +* + SRNAMT = 'CHEEVD_2STAGE' + INFOT = 1 + CALL CHEEVD_2STAGE( '/', 'U', 0, A, 1, X, W, 1, + $ RW, 1, IW, 1, INFO ) + CALL CHKXER( 'CHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL CHEEVD_2STAGE( 'V', 'U', 0, A, 1, X, W, 1, + $ RW, 1, IW, 1, INFO ) + CALL CHKXER( 'CHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CHEEVD_2STAGE( 'N', '/', 0, A, 1, X, W, 1, + $ RW, 1, IW, 1, INFO ) + CALL CHKXER( 'CHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CHEEVD_2STAGE( 'N', 'U', -1, A, 1, X, W, 1, + $ RW, 1, IW, 1, INFO ) + CALL CHKXER( 'CHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CHEEVD_2STAGE( 'N', 'U', 2, A, 1, X, W, 3, + $ RW, 2, IW, 1, INFO ) + CALL CHKXER( 'CHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CHEEVD_2STAGE( 'N', 'U', 1, A, 1, X, W, 0, + $ RW, 1, IW, 1, INFO ) + CALL CHKXER( 'CHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CHEEVD_2STAGE( 'N', 'U', 2, A, 2, X, W, 2, + $ RW, 2, IW, 1, INFO ) + CALL CHKXER( 'CHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 8 +* CALL CHEEVD_2STAGE( 'V', 'U', 2, A, 2, X, W, 3, +* $ RW, 25, IW, 12, INFO ) +* CALL CHKXER( 'CHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL CHEEVD_2STAGE( 'N', 'U', 1, A, 1, X, W, 1, + $ RW, 0, IW, 1, INFO ) + CALL CHKXER( 'CHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL CHEEVD_2STAGE( 'N', 'U', 2, A, 2, X, W, 25, + $ RW, 1, IW, 1, INFO ) + CALL CHKXER( 'CHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 10 +* CALL CHEEVD_2STAGE( 'V', 'U', 2, A, 2, X, W, 8, +* $ RW, 18, IW, 12, INFO ) +* CALL CHKXER( 'CHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 12 + CALL CHEEVD_2STAGE( 'N', 'U', 1, A, 1, X, W, 1, + $ RW, 1, IW, 0, INFO ) + CALL CHKXER( 'CHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 12 +* CALL CHEEVD_2STAGE( 'V', 'U', 2, A, 2, X, W, 8, +* $ RW, 25, IW, 11, INFO ) +* CALL CHKXER( 'CHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 10 +* * CHEEV * SRNAMT = 'CHEEV ' @@ -397,6 +559,29 @@ CALL CHKXER( 'CHEEV ', INFOT, NOUT, LERR, OK ) NT = NT + 5 * +* CHEEV_2STAGE +* + SRNAMT = 'CHEEV_2STAGE ' + INFOT = 1 + CALL CHEEV_2STAGE( '/', 'U', 0, A, 1, X, W, 1, RW, INFO ) + CALL CHKXER( 'CHEEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL CHEEV_2STAGE( 'V', 'U', 0, A, 1, X, W, 1, RW, INFO ) + CALL CHKXER( 'CHEEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CHEEV_2STAGE( 'N', '/', 0, A, 1, X, W, 1, RW, INFO ) + CALL CHKXER( 'CHEEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CHEEV_2STAGE( 'N', 'U', -1, A, 1, X, W, 1, RW, INFO ) + CALL CHKXER( 'CHEEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CHEEV_2STAGE( 'N', 'U', 2, A, 1, X, W, 3, RW, INFO ) + CALL CHKXER( 'CHEEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CHEEV_2STAGE( 'N', 'U', 2, A, 2, X, W, 2, RW, INFO ) + CALL CHKXER( 'CHEEV_2STAGE ', INFOT, NOUT, LERR, OK ) + NT = NT + 6 +* * CHEEVX * SRNAMT = 'CHEEVX' @@ -441,6 +626,65 @@ CALL CHKXER( 'CHEEVX', INFOT, NOUT, LERR, OK ) NT = NT + 10 * +* CHEEVX_2STAGE +* + SRNAMT = 'CHEEVX_2STAGE' + INFOT = 1 + CALL CHEEVX_2STAGE( '/', 'A', 'U', 0, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 1, RW, IW, I3, INFO ) + CALL CHKXER( 'CHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL CHEEVX_2STAGE( 'V', 'A', 'U', 0, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 1, RW, IW, I3, INFO ) + CALL CHKXER( 'CHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CHEEVX_2STAGE( 'N', '/', 'U', 0, A, 1, + $ 0.0D0, 1.0D0, 1, 0, 0.0D0, + $ M, X, Z, 1, W, 1, RW, IW, I3, INFO ) + CALL CHKXER( 'CHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CHEEVX_2STAGE( 'N', 'A', '/', 0, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 1, RW, IW, I3, INFO ) + INFOT = 4 + CALL CHEEVX_2STAGE( 'N', 'A', 'U', -1, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 1, RW, IW, I3, INFO ) + CALL CHKXER( 'CHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL CHEEVX_2STAGE( 'N', 'A', 'U', 2, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 2, W, 3, RW, IW, I3, INFO ) + CALL CHKXER( 'CHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CHEEVX_2STAGE( 'N', 'V', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 1, RW, IW, I3, INFO ) + CALL CHKXER( 'CHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 9 + CALL CHEEVX_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 1, RW, IW, I3, INFO ) + CALL CHKXER( 'CHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL CHEEVX_2STAGE( 'N', 'I', 'U', 2, A, 2, + $ 0.0D0, 0.0D0, 2, 1, 0.0D0, + $ M, X, Z, 2, W, 3, RW, IW, I3, INFO ) + CALL CHKXER( 'CHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 15 + CALL CHEEVX_2STAGE( 'N', 'A', 'U', 2, A, 2, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 0, W, 3, RW, IW, I3, INFO ) + CALL CHKXER( 'CHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 17 + CALL CHEEVX_2STAGE( 'N', 'A', 'U', 2, A, 2, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 2, W, 0, RW, IW, I1, INFO ) + CALL CHKXER( 'CHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 11 +* * CHEEVR * SRNAMT = 'CHEEVR' @@ -508,6 +752,90 @@ CALL CHKXER( 'CHEEVR', INFOT, NOUT, LERR, OK ) NT = NT + 12 * +* CHEEVR_2STAGE +* + SRNAMT = 'CHEEVR_2STAGE' + N = 1 + INFOT = 1 + CALL CHEEVR_2STAGE( '/', 'A', 'U', 0, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N, RW, 24*N, IW( 2*N+1 ), + $ 10*N, INFO ) + CALL CHKXER( 'CHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL CHEEVR_2STAGE( 'V', 'A', 'U', 0, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N, RW, 24*N, IW( 2*N+1 ), + $ 10*N, INFO ) + CALL CHKXER( 'CHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CHEEVR_2STAGE( 'N', '/', 'U', 0, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N, RW, 24*N, IW( 2*N+1 ), + $ 10*N, INFO ) + CALL CHKXER( 'CHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CHEEVR_2STAGE( 'N', 'A', '/', -1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N, RW, 24*N, + $ IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'CHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CHEEVR_2STAGE( 'N', 'A', 'U', -1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N, RW, 24*N, + $ IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'CHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL CHEEVR_2STAGE( 'N', 'A', 'U', 2, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N, RW, 24*N, IW( 2*N+1 ), + $ 10*N, INFO ) + CALL CHKXER( 'CHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL CHEEVR_2STAGE( 'N', 'V', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N, RW, 24*N, IW( 2*N+1 ), + $ 10*N, INFO ) + CALL CHKXER( 'CHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 9 + CALL CHEEVR_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 0, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N, RW, 24*N, IW( 2*N+1 ), + $ 10*N, INFO ) + CALL CHKXER( 'CHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL CHEEVR_2STAGE( 'N', 'I', 'U', 2, A, 2, + $ 0.0D0, 0.0D0, 2, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N, RW, 24*N, IW( 2*N+1 ), + $ 10*N, INFO ) + CALL CHKXER( 'CHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 15 + CALL CHEEVR_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 0, IW, Q, 2*N, RW, 24*N, IW( 2*N+1 ), + $ 10*N, INFO ) + CALL CHKXER( 'CHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 18 + CALL CHEEVR_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N-1, RW, 24*N, IW( 2*N+1 ), + $ 10*N, INFO ) + CALL CHKXER( 'CHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 20 + CALL CHEEVR_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 26*N, RW, 24*N-1, IW( 2*N-1 ), + $ 10*N, INFO ) + CALL CHKXER( 'CHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 22 + CALL CHEEVR_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 26*N, RW, 24*N, IW, 10*N-1, + $ INFO ) + CALL CHKXER( 'CHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 13 +* * CHPEVD * SRNAMT = 'CHPEVD' @@ -646,6 +974,47 @@ CALL CHKXER( 'CHBTRD', INFOT, NOUT, LERR, OK ) NT = NT + 6 * +* CHETRD_HB2ST +* + SRNAMT = 'CHETRD_HB2ST' + INFOT = 1 + CALL CHETRD_HB2ST( '/', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CHETRD_HB2ST( 'N', '/', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CHETRD_HB2ST( 'N', 'H', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CHETRD_HB2ST( 'N', 'N', '/', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CHETRD_HB2ST( 'N', 'N', 'U', -1, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CHETRD_HB2ST( 'N', 'N', 'U', 0, -1, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL CHETRD_HB2ST( 'N', 'N', 'U', 0, 1, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL CHETRD_HB2ST( 'N', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 0, W, 1, INFO ) + CALL CHKXER( 'CHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL CHETRD_HB2ST( 'N', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 0, INFO ) + CALL CHKXER( 'CHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + NT = NT + 9 +* * CHBEVD * SRNAMT = 'CHBEVD' @@ -711,6 +1080,75 @@ CALL CHKXER( 'CHBEVD', INFOT, NOUT, LERR, OK ) NT = NT + 15 * +* CHBEVD_2STAGE +* + SRNAMT = 'CHBEVD_2STAGE' + INFOT = 1 + CALL CHBEVD_2STAGE( '/', 'U', 0, 0, A, 1, X, Z, 1, + $ W, 1, RW, 1, IW, 1, INFO ) + CALL CHKXER( 'CHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL CHBEVD_2STAGE( 'V', 'U', 0, 0, A, 1, X, Z, 1, + $ W, 1, RW, 1, IW, 1, INFO ) + CALL CHKXER( 'CHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CHBEVD_2STAGE( 'N', '/', 0, 0, A, 1, X, Z, 1, + $ W, 1, RW, 1, IW, 1, INFO ) + CALL CHKXER( 'CHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CHBEVD_2STAGE( 'N', 'U', -1, 0, A, 1, X, Z, 1, + $ W, 1, RW, 1, IW, 1, INFO ) + CALL CHKXER( 'CHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CHBEVD_2STAGE( 'N', 'U', 0, -1, A, 1, X, Z, 1, + $ W, 1, RW, 1, IW, 1, INFO ) + CALL CHKXER( 'CHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL CHBEVD_2STAGE( 'N', 'U', 2, 1, A, 1, X, Z, 1, + $ W, 2, RW, 2, IW, 1, INFO ) + CALL CHKXER( 'CHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 9 + CALL CHBEVD_2STAGE( 'N', 'U', 2, 1, A, 2, X, Z, 0, + $ W, 8, RW, 25, IW, 12, INFO ) + CALL CHKXER( 'CHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL CHBEVD_2STAGE( 'N', 'U', 1, 0, A, 1, X, Z, 1, + $ W, 0, RW, 1, IW, 1, INFO ) + CALL CHKXER( 'CHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL CHBEVD_2STAGE( 'N', 'U', 2, 1, A, 2, X, Z, 2, + $ W, 1, RW, 2, IW, 1, INFO ) + CALL CHKXER( 'CHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 11 +* CALL CHBEVD_2STAGE( 'V', 'U', 2, 1, A, 2, X, Z, 2, +* $ W, 2, RW, 25, IW, 12, INFO ) +* CALL CHKXER( 'CHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL CHBEVD_2STAGE( 'N', 'U', 1, 0, A, 1, X, Z, 1, + $ W, 1, RW, 0, IW, 1, INFO ) + CALL CHKXER( 'CHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL CHBEVD_2STAGE( 'N', 'U', 2, 1, A, 2, X, Z, 2, + $ W, 25, RW, 1, IW, 1, INFO ) + CALL CHKXER( 'CHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 13 +* CALL CHBEVD_2STAGE( 'V', 'U', 2, 1, A, 2, X, Z, 2, +* $ W, 25, RW, 2, IW, 12, INFO ) +* CALL CHKXER( 'CHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 15 + CALL CHBEVD_2STAGE( 'N', 'U', 1, 0, A, 1, X, Z, 1, + $ W, 1, RW, 1, IW, 0, INFO ) + CALL CHKXER( 'CHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 15 + CALL CHBEVD_2STAGE( 'N', 'U', 2, 1, A, 2, X, Z, 2, + $ W, 25, RW, 2, IW, 0, INFO ) + CALL CHKXER( 'CHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 15 +* CALL CHBEVD_2STAGE( 'V', 'U', 2, 1, A, 2, X, Z, 2, +* $ W, 25, RW, 25, IW, 2, INFO ) +* CALL CHKXER( 'CHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 13 +* * CHBEV * SRNAMT = 'CHBEV ' @@ -734,6 +1172,43 @@ CALL CHKXER( 'CHBEV ', INFOT, NOUT, LERR, OK ) NT = NT + 6 * +* CHBEV_2STAGE +* + SRNAMT = 'CHBEV_2STAGE ' + INFOT = 1 + CALL CHBEV_2STAGE( '/', 'U', 0, 0, A, 1, X, + $ Z, 1, W, 0, RW, INFO ) + CALL CHKXER( 'CHBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL CHBEV_2STAGE( 'V', 'U', 0, 0, A, 1, X, + $ Z, 1, W, 0, RW, INFO ) + CALL CHKXER( 'CHBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CHBEV_2STAGE( 'N', '/', 0, 0, A, 1, X, + $ Z, 1, W, 0, RW, INFO ) + CALL CHKXER( 'CHBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CHBEV_2STAGE( 'N', 'U', -1, 0, A, 1, X, + $ Z, 1, W, 0, RW, INFO ) + CALL CHKXER( 'CHBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL CHBEV_2STAGE( 'N', 'U', 0, -1, A, 1, X, + $ Z, 1, W, 0, RW, INFO ) + CALL CHKXER( 'CHBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL CHBEV_2STAGE( 'N', 'U', 2, 1, A, 1, X, + $ Z, 1, W, 0, RW, INFO ) + CALL CHKXER( 'CHBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 9 + CALL CHBEV_2STAGE( 'N', 'U', 2, 0, A, 1, X, + $ Z, 0, W, 0, RW, INFO ) + CALL CHKXER( 'CHBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL CHBEV_2STAGE( 'N', 'U', 2, 0, A, 1, X, + $ Z, 1, W, 0, RW, INFO ) + CALL CHKXER( 'CHBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + NT = NT + 8 +* * CHBEVX * SRNAMT = 'CHBEVX' @@ -781,6 +1256,74 @@ $ 0.0, M, X, Z, 1, W, RW, IW, I3, INFO ) CALL CHKXER( 'CHBEVX', INFOT, NOUT, LERR, OK ) NT = NT + 11 +* +* CHBEVX_2STAGE +* + SRNAMT = 'CHBEVX_2STAGE' + INFOT = 1 + CALL CHBEVX_2STAGE( '/', 'A', 'U', 0, 0, A, 1, Q, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + INFOT = 1 + CALL CHBEVX_2STAGE( 'V', 'A', 'U', 0, 0, A, 1, Q, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'CHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL CHBEVX_2STAGE( 'N', '/', 'U', 0, 0, A, 1, Q, 1, + $ 0.0D0, 1.0D0, 1, 0, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'CHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL CHBEVX_2STAGE( 'N', 'A', '/', 0, 0, A, 1, Q, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + INFOT = 4 + CALL CHBEVX_2STAGE( 'N', 'A', 'U', -1, 0, A, 1, Q, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'CHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL CHBEVX_2STAGE( 'N', 'A', 'U', 0, -1, A, 1, Q, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'CHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL CHBEVX_2STAGE( 'N', 'A', 'U', 2, 1, A, 1, Q, 2, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 2, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'CHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 9 +* CALL CHBEVX_2STAGE( 'V', 'A', 'U', 2, 0, A, 1, Q, 1, +* $ 0.0D0, 0.0D0, 0, 0, 0.0D0, +* $ M, X, Z, 2, W, 0, RW, IW, I3, INFO ) +* CALL CHKXER( 'CHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL CHBEVX_2STAGE( 'N', 'V', 'U', 1, 0, A, 1, Q, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'CHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 12 + CALL CHBEVX_2STAGE( 'N', 'I', 'U', 1, 0, A, 1, Q, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'CHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL CHBEVX_2STAGE( 'N', 'I', 'U', 1, 0, A, 1, Q, 1, + $ 0.0D0, 0.0D0, 1, 2, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'CHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 18 + CALL CHBEVX_2STAGE( 'N', 'A', 'U', 2, 0, A, 1, Q, 2, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 0, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'CHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 20 + CALL CHBEVX_2STAGE( 'N', 'A', 'U', 2, 0, A, 1, Q, 2, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'CHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 12 END IF * * Print a summary line. diff --git a/TESTING/EIG/dchkee.f b/TESTING/EIG/dchkee.f index b723687a..4d342085 100644 --- a/TESTING/EIG/dchkee.f +++ b/TESTING/EIG/dchkee.f @@ -1106,7 +1106,8 @@ $ DDRGEV, DDRGSX, DDRGVX, DDRVBD, DDRVES, DDRVEV, $ DDRVSG, DDRVST, DDRVSX, DDRVVX, DERRBD, $ DERRED, DERRGG, DERRHS, DERRST, ILAVER, XLAENV, - $ DDRGES3, DDRGEV3 + $ DDRGES3, DDRGEV3, + $ DCHKST2STG, DDRVST2STG, DCHKSB2STG, DDRVSG2STG * .. * .. Intrinsic Functions .. INTRINSIC LEN, MIN @@ -1153,7 +1154,7 @@ PATH = LINE( 1: 3 ) NEP = LSAMEN( 3, PATH, 'NEP' ) .OR. LSAMEN( 3, PATH, 'DHS' ) SEP = LSAMEN( 3, PATH, 'SEP' ) .OR. LSAMEN( 3, PATH, 'DST' ) .OR. - $ LSAMEN( 3, PATH, 'DSG' ) + $ LSAMEN( 3, PATH, 'DSG' ) .OR. LSAMEN( 3, PATH, 'SE2' ) SVD = LSAMEN( 3, PATH, 'SVD' ) .OR. LSAMEN( 3, PATH, 'DBD' ) DEV = LSAMEN( 3, PATH, 'DEV' ) DES = LSAMEN( 3, PATH, 'DES' ) @@ -1839,7 +1840,8 @@ $ WRITE( NOUT, FMT = 9980 )'DCHKHS', INFO 270 CONTINUE * - ELSE IF( LSAMEN( 3, C3, 'DST' ) .OR. LSAMEN( 3, C3, 'SEP' ) ) THEN + ELSE IF( LSAMEN( 3, C3, 'DST' ) .OR. LSAMEN( 3, C3, 'SEP' ) + $ .OR. LSAMEN( 3, C3, 'SE2' ) ) THEN * * ---------------------------------- * SEP: Symmetric Eigenvalue Problem @@ -1869,6 +1871,15 @@ WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), $ NXVAL( I ) IF( TSTCHK ) THEN + IF( LSAMEN( 3, C3, 'SE2' ) ) THEN + CALL DCHKST2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), + $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), + $ D( 1, 6 ), D( 1, 7 ), D( 1, 8 ), D( 1, 9 ), + $ D( 1, 10 ), D( 1, 11 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), D( 1, 12 ), A( 1, 6 ), + $ WORK, LWORK, IWORK, LIWORK, RESULT, INFO ) + ELSE CALL DCHKST( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), @@ -1876,16 +1887,26 @@ $ D( 1, 10 ), D( 1, 11 ), A( 1, 3 ), NMAX, $ A( 1, 4 ), A( 1, 5 ), D( 1, 12 ), A( 1, 6 ), $ WORK, LWORK, IWORK, LIWORK, RESULT, INFO ) + ENDIF IF( INFO.NE.0 ) $ WRITE( NOUT, FMT = 9980 )'DCHKST', INFO END IF IF( TSTDRV ) THEN + IF( LSAMEN( 3, C3, 'SE2' ) ) THEN + CALL DDRVST2STG( NN, NVAL, 18, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, D( 1, 3 ), D( 1, 4 ), + $ D( 1, 5 ), D( 1, 6 ), D( 1, 8 ), D( 1, 9 ), + $ D( 1, 10 ), D( 1, 11 ), A( 1, 2 ), NMAX, + $ A( 1, 3 ), D( 1, 12 ), A( 1, 4 ), WORK, + $ LWORK, IWORK, LIWORK, RESULT, INFO ) + ELSE CALL DDRVST( NN, NVAL, 18, DOTYPE, ISEED, THRESH, NOUT, $ A( 1, 1 ), NMAX, D( 1, 3 ), D( 1, 4 ), $ D( 1, 5 ), D( 1, 6 ), D( 1, 8 ), D( 1, 9 ), $ D( 1, 10 ), D( 1, 11 ), A( 1, 2 ), NMAX, $ A( 1, 3 ), D( 1, 12 ), A( 1, 4 ), WORK, $ LWORK, IWORK, LIWORK, RESULT, INFO ) + ENDIF IF( INFO.NE.0 ) $ WRITE( NOUT, FMT = 9980 )'DDRVST', INFO END IF @@ -1918,11 +1939,17 @@ WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), $ NXVAL( I ) IF( TSTCHK ) THEN - CALL DDRVSG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, - $ D( 1, 3 ), A( 1, 3 ), NMAX, A( 1, 4 ), - $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), WORK, - $ LWORK, IWORK, LIWORK, RESULT, INFO ) +* CALL DDRVSG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, +* $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, +* $ D( 1, 3 ), A( 1, 3 ), NMAX, A( 1, 4 ), +* $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), WORK, +* $ LWORK, IWORK, LIWORK, RESULT, INFO ) + CALL DDRVSG2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, + $ D( 1, 3 ), D( 1, 3 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), A( 1, 6 ), + $ A( 1, 7 ), WORK, LWORK, IWORK, LIWORK, + $ RESULT, INFO ) IF( INFO.NE.0 ) $ WRITE( NOUT, FMT = 9980 )'DDRVSG', INFO END IF @@ -2282,9 +2309,13 @@ CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) IF( TSTERR ) $ CALL DERRST( 'DSB', NOUT ) - CALL DCHKSB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, D( 1, 1 ), D( 1, 2 ), - $ A( 1, 2 ), NMAX, WORK, LWORK, RESULT, INFO ) +* CALL DCHKSB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH, +* $ NOUT, A( 1, 1 ), NMAX, D( 1, 1 ), D( 1, 2 ), +* $ A( 1, 2 ), NMAX, WORK, LWORK, RESULT, INFO ) + CALL DCHKSB2STG( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, + $ THRESH, NOUT, A( 1, 1 ), NMAX, D( 1, 1 ), + $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), + $ A( 1, 2 ), NMAX, WORK, LWORK, RESULT, INFO ) IF( INFO.NE.0 ) $ WRITE( NOUT, FMT = 9980 )'DCHKSB', INFO * diff --git a/TESTING/EIG/dchksb2stg.f b/TESTING/EIG/dchksb2stg.f new file mode 100644 index 00000000..adac168c --- /dev/null +++ b/TESTING/EIG/dchksb2stg.f @@ -0,0 +1,870 @@ +*> \brief \b DCHKSBSTG +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DCHKSB2STG( NSIZES, NN, NWDTHS, KK, NTYPES, DOTYPE, +* ISEED, THRESH, NOUNIT, A, LDA, SD, SE, D1, +* D2, D3, U, LDU, WORK, LWORK, RESULT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDU, LWORK, NOUNIT, NSIZES, NTYPES, +* $ NWDTHS +* DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER ISEED( 4 ), KK( * ), NN( * ) +* DOUBLE PRECISION A( LDA, * ), RESULT( * ), SD( * ), SE( * ), +* $ U( LDU, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DCHKSBSTG tests the reduction of a symmetric band matrix to tridiagonal +*> form, used with the symmetric eigenvalue problem. +*> +*> DSBTRD factors a symmetric band matrix A as U S U' , where ' means +*> transpose, S is symmetric tridiagonal, and U is orthogonal. +*> DSBTRD can use either just the lower or just the upper triangle +*> of A; DCHKSBSTG checks both cases. +*> +*> DSYTRD_SB2ST factors a symmetric band matrix A as U S U' , +*> where ' means transpose, S is symmetric tridiagonal, and U is +*> orthogonal. DSYTRD_SB2ST can use either just the lower or just +*> the upper triangle of A; DCHKSBSTG checks both cases. +*> +*> DSTEQR factors S as Z D1 Z'. +*> D1 is the matrix of eigenvalues computed when Z is not computed +*> and from the S resulting of DSBTRD "U" (used as reference for DSYTRD_SB2ST) +*> D2 is the matrix of eigenvalues computed when Z is not computed +*> and from the S resulting of DSYTRD_SB2ST "U". +*> D3 is the matrix of eigenvalues computed when Z is not computed +*> and from the S resulting of DSYTRD_SB2ST "L". +*> +*> When DCHKSBSTG is called, a number of matrix "sizes" ("n's"), a number +*> of bandwidths ("k's"), and a number of matrix "types" are +*> specified. For each size ("n"), each bandwidth ("k") less than or +*> equal to "n", and each type of matrix, one matrix will be generated +*> and used to test the symmetric banded reduction routine. For each +*> matrix, a number of tests will be performed: +*> +*> (1) | A - V S V' | / ( |A| n ulp ) computed by DSBTRD with +*> UPLO='U' +*> +*> (2) | I - UU' | / ( n ulp ) +*> +*> (3) | A - V S V' | / ( |A| n ulp ) computed by DSBTRD with +*> UPLO='L' +*> +*> (4) | I - UU' | / ( n ulp ) +*> +*> (5) | D1 - D2 | / ( |D1| ulp ) where D1 is computed by +*> DSBTRD with UPLO='U' and +*> D2 is computed by +*> DSYTRD_SB2ST with UPLO='U' +*> +*> (6) | D1 - D3 | / ( |D1| ulp ) where D1 is computed by +*> DSBTRD with UPLO='U' and +*> D3 is computed by +*> DSYTRD_SB2ST with UPLO='L' +*> +*> The "sizes" are specified by an array NN(1:NSIZES); the value of +*> each element NN(j) specifies one size. +*> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); +*> if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. +*> Currently, the list of possible types is: +*> +*> (1) The zero matrix. +*> (2) The identity matrix. +*> +*> (3) A diagonal matrix with evenly spaced entries +*> 1, ..., ULP and random signs. +*> (ULP = (first number larger than 1) - 1 ) +*> (4) A diagonal matrix with geometrically spaced entries +*> 1, ..., ULP and random signs. +*> (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP +*> and random signs. +*> +*> (6) Same as (4), but multiplied by SQRT( overflow threshold ) +*> (7) Same as (4), but multiplied by SQRT( underflow threshold ) +*> +*> (8) A matrix of the form U' D U, where U is orthogonal and +*> D has evenly spaced entries 1, ..., ULP with random signs +*> on the diagonal. +*> +*> (9) A matrix of the form U' D U, where U is orthogonal and +*> D has geometrically spaced entries 1, ..., ULP with random +*> signs on the diagonal. +*> +*> (10) A matrix of the form U' D U, where U is orthogonal and +*> D has "clustered" entries 1, ULP,..., ULP with random +*> signs on the diagonal. +*> +*> (11) Same as (8), but multiplied by SQRT( overflow threshold ) +*> (12) Same as (8), but multiplied by SQRT( underflow threshold ) +*> +*> (13) Symmetric matrix with random entries chosen from (-1,1). +*> (14) Same as (13), but multiplied by SQRT( overflow threshold ) +*> (15) Same as (13), but multiplied by SQRT( underflow threshold ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NSIZES +*> \verbatim +*> NSIZES is INTEGER +*> The number of sizes of matrices to use. If it is zero, +*> DCHKSBSTG does nothing. It must be at least zero. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER array, dimension (NSIZES) +*> An array containing the sizes to be used for the matrices. +*> Zero values will be skipped. The values must be at least +*> zero. +*> \endverbatim +*> +*> \param[in] NWDTHS +*> \verbatim +*> NWDTHS is INTEGER +*> The number of bandwidths to use. If it is zero, +*> DCHKSBSTG does nothing. It must be at least zero. +*> \endverbatim +*> +*> \param[in] KK +*> \verbatim +*> KK is INTEGER array, dimension (NWDTHS) +*> An array containing the bandwidths to be used for the band +*> matrices. The values must be at least zero. +*> \endverbatim +*> +*> \param[in] NTYPES +*> \verbatim +*> NTYPES is INTEGER +*> The number of elements in DOTYPE. If it is zero, DCHKSBSTG +*> does nothing. It must be at least zero. If it is MAXTYP+1 +*> and NSIZES is 1, then an additional type, MAXTYP+1 is +*> defined, which is to use whatever matrix is in A. This +*> is only useful if DOTYPE(1:MAXTYP) is .FALSE. and +*> DOTYPE(MAXTYP+1) is .TRUE. . +*> \endverbatim +*> +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> If DOTYPE(j) is .TRUE., then for each size in NN a +*> matrix of that size and of type j will be generated. +*> If NTYPES is smaller than the maximum number of types +*> defined (PARAMETER MAXTYP), then types NTYPES+1 through +*> MAXTYP will not be generated. If NTYPES is larger +*> than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) +*> will be ignored. +*> \endverbatim +*> +*> \param[in,out] ISEED +*> \verbatim +*> ISEED is INTEGER array, dimension (4) +*> On entry ISEED specifies the seed of the random number +*> generator. The array elements should be between 0 and 4095; +*> if not they will be reduced mod 4096. Also, ISEED(4) must +*> be odd. The random number generator uses a linear +*> congruential sequence limited to small integers, and so +*> should produce machine independent random numbers. The +*> values of ISEED are changed on exit, and can be used in the +*> next call to DCHKSBSTG to continue the same random number +*> sequence. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is DOUBLE PRECISION +*> A test will count as "failed" if the "error", computed as +*> described above, exceeds THRESH. Note that the error +*> is scaled to be O(1), so THRESH should be a reasonably +*> small multiple of 1, e.g., 10 or 100. In particular, +*> it should not depend on the precision (single vs. double) +*> or the size of the matrix. It must be at least zero. +*> \endverbatim +*> +*> \param[in] NOUNIT +*> \verbatim +*> NOUNIT is INTEGER +*> The FORTRAN unit number for printing out error messages +*> (e.g., if a routine returns IINFO not equal to 0.) +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA, max(NN)) +*> Used to hold the matrix whose eigenvalues are to be +*> computed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. It must be at least 2 (not 1!) +*> and at least max( KK )+1. +*> \endverbatim +*> +*> \param[out] SD +*> \verbatim +*> SD is DOUBLE PRECISION array, dimension (max(NN)) +*> Used to hold the diagonal of the tridiagonal matrix computed +*> by DSBTRD. +*> \endverbatim +*> +*> \param[out] SE +*> \verbatim +*> SE is DOUBLE PRECISION array, dimension (max(NN)) +*> Used to hold the off-diagonal of the tridiagonal matrix +*> computed by DSBTRD. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension (LDU, max(NN)) +*> Used to hold the orthogonal matrix computed by DSBTRD. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of U. It must be at least 1 +*> and at least max( NN ). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The number of entries in WORK. This must be at least +*> max( LDA+1, max(NN)+1 )*max(NN). +*> \endverbatim +*> +*> \param[out] RESULT +*> \verbatim +*> RESULT is DOUBLE PRECISION array, dimension (4) +*> The values computed by the tests described above. +*> The values are currently limited to 1/ulp, to avoid +*> overflow. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> If 0, then everything ran OK. +*> +*>----------------------------------------------------------------------- +*> +*> Some Local Variables and Parameters: +*> ---- ----- --------- --- ---------- +*> ZERO, ONE Real 0 and 1. +*> MAXTYP The number of types defined. +*> NTEST The number of tests performed, or which can +*> be performed so far, for the current matrix. +*> NTESTT The total number of tests performed so far. +*> NMAX Largest value in NN. +*> NMATS The number of matrices generated so far. +*> NERRS The number of tests which have exceeded THRESH +*> so far. +*> COND, IMODE Values to be passed to the matrix generators. +*> ANORM Norm of A; passed to matrix generators. +*> +*> OVFL, UNFL Overflow and underflow thresholds. +*> ULP, ULPINV Finest relative precision and its inverse. +*> RTOVFL, RTUNFL Square roots of the previous 2 values. +*> The following four arrays decode JTYPE: +*> KTYPE(j) The general type (1-10) for type "j". +*> KMODE(j) The MODE value to be passed to the matrix +*> generator for type "j". +*> KMAGN(j) The order of magnitude ( O(1), +*> O(overflow^(1/2) ), O(underflow^(1/2) ) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup double_eig +* +* ===================================================================== + SUBROUTINE DCHKSB2STG( NSIZES, NN, NWDTHS, KK, NTYPES, DOTYPE, + $ ISEED, THRESH, NOUNIT, A, LDA, SD, SE, D1, + $ D2, D3, U, LDU, WORK, LWORK, RESULT, INFO ) +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDU, LWORK, NOUNIT, NSIZES, NTYPES, + $ NWDTHS + DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER ISEED( 4 ), KK( * ), NN( * ) + DOUBLE PRECISION A( LDA, * ), RESULT( * ), SD( * ), SE( * ), + $ D1( * ), D2( * ), D3( * ), + $ U( LDU, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, TEN + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ TEN = 10.0D0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = ONE / TWO ) + INTEGER MAXTYP + PARAMETER ( MAXTYP = 15 ) +* .. +* .. Local Scalars .. + LOGICAL BADNN, BADNNB + INTEGER I, IINFO, IMODE, ITYPE, J, JC, JCOL, JR, JSIZE, + $ JTYPE, JWIDTH, K, KMAX, MTYPES, N, NERRS, + $ NMATS, NMAX, NTEST, NTESTT + DOUBLE PRECISION ANINV, ANORM, COND, OVFL, RTOVFL, RTUNFL, + $ TEMP1, TEMP2, TEMP3, TEMP4, ULP, ULPINV, UNFL +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ), IOLDSD( 4 ), KMAGN( MAXTYP ), + $ KMODE( MAXTYP ), KTYPE( MAXTYP ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLACPY, DLASET, DLASUM, DLATMR, DLATMS, DSBT21, + $ DSBTRD, XERBLA, DSBTRD_SB2ST, DSTEQR +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN, SQRT +* .. +* .. Data statements .. + DATA KTYPE / 1, 2, 5*4, 5*5, 3*8 / + DATA KMAGN / 2*1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, + $ 2, 3 / + DATA KMODE / 2*0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, + $ 0, 0 / +* .. +* .. Executable Statements .. +* +* Check for errors +* + NTESTT = 0 + INFO = 0 +* +* Important constants +* + BADNN = .FALSE. + NMAX = 1 + DO 10 J = 1, NSIZES + NMAX = MAX( NMAX, NN( J ) ) + IF( NN( J ).LT.0 ) + $ BADNN = .TRUE. + 10 CONTINUE +* + BADNNB = .FALSE. + KMAX = 0 + DO 20 J = 1, NSIZES + KMAX = MAX( KMAX, KK( J ) ) + IF( KK( J ).LT.0 ) + $ BADNNB = .TRUE. + 20 CONTINUE + KMAX = MIN( NMAX-1, KMAX ) +* +* Check for errors +* + IF( NSIZES.LT.0 ) THEN + INFO = -1 + ELSE IF( BADNN ) THEN + INFO = -2 + ELSE IF( NWDTHS.LT.0 ) THEN + INFO = -3 + ELSE IF( BADNNB ) THEN + INFO = -4 + ELSE IF( NTYPES.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.KMAX+1 ) THEN + INFO = -11 + ELSE IF( LDU.LT.NMAX ) THEN + INFO = -15 + ELSE IF( ( MAX( LDA, NMAX )+1 )*NMAX.GT.LWORK ) THEN + INFO = -17 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DCHKSBSTG', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( NSIZES.EQ.0 .OR. NTYPES.EQ.0 .OR. NWDTHS.EQ.0 ) + $ RETURN +* +* More Important constants +* + UNFL = DLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + ULP = DLAMCH( 'Epsilon' )*DLAMCH( 'Base' ) + ULPINV = ONE / ULP + RTUNFL = SQRT( UNFL ) + RTOVFL = SQRT( OVFL ) +* +* Loop over sizes, types +* + NERRS = 0 + NMATS = 0 +* + DO 190 JSIZE = 1, NSIZES + N = NN( JSIZE ) + ANINV = ONE / DBLE( MAX( 1, N ) ) +* + DO 180 JWIDTH = 1, NWDTHS + K = KK( JWIDTH ) + IF( K.GT.N ) + $ GO TO 180 + K = MAX( 0, MIN( N-1, K ) ) +* + IF( NSIZES.NE.1 ) THEN + MTYPES = MIN( MAXTYP, NTYPES ) + ELSE + MTYPES = MIN( MAXTYP+1, NTYPES ) + END IF +* + DO 170 JTYPE = 1, MTYPES + IF( .NOT.DOTYPE( JTYPE ) ) + $ GO TO 170 + NMATS = NMATS + 1 + NTEST = 0 +* + DO 30 J = 1, 4 + IOLDSD( J ) = ISEED( J ) + 30 CONTINUE +* +* Compute "A". +* Store as "Upper"; later, we will copy to other format. +* +* Control parameters: +* +* KMAGN KMODE KTYPE +* =1 O(1) clustered 1 zero +* =2 large clustered 2 identity +* =3 small exponential (none) +* =4 arithmetic diagonal, (w/ eigenvalues) +* =5 random log symmetric, w/ eigenvalues +* =6 random (none) +* =7 random diagonal +* =8 random symmetric +* =9 positive definite +* =10 diagonally dominant tridiagonal +* + IF( MTYPES.GT.MAXTYP ) + $ GO TO 100 +* + ITYPE = KTYPE( JTYPE ) + IMODE = KMODE( JTYPE ) +* +* Compute norm +* + GO TO ( 40, 50, 60 )KMAGN( JTYPE ) +* + 40 CONTINUE + ANORM = ONE + GO TO 70 +* + 50 CONTINUE + ANORM = ( RTOVFL*ULP )*ANINV + GO TO 70 +* + 60 CONTINUE + ANORM = RTUNFL*N*ULPINV + GO TO 70 +* + 70 CONTINUE +* + CALL DLASET( 'Full', LDA, N, ZERO, ZERO, A, LDA ) + IINFO = 0 + IF( JTYPE.LE.15 ) THEN + COND = ULPINV + ELSE + COND = ULPINV*ANINV / TEN + END IF +* +* Special Matrices -- Identity & Jordan block +* +* Zero +* + IF( ITYPE.EQ.1 ) THEN + IINFO = 0 +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Identity +* + DO 80 JCOL = 1, N + A( K+1, JCOL ) = ANORM + 80 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Diagonal Matrix, [Eigen]values Specified +* + CALL DLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, 0, 0, 'Q', A( K+1, 1 ), LDA, + $ WORK( N+1 ), IINFO ) +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Symmetric, eigenvalues specified +* + CALL DLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, K, K, 'Q', A, LDA, WORK( N+1 ), + $ IINFO ) +* + ELSE IF( ITYPE.EQ.7 ) THEN +* +* Diagonal, random eigenvalues +* + CALL DLATMR( N, N, 'S', ISEED, 'S', WORK, 6, ONE, ONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, 0, 0, + $ ZERO, ANORM, 'Q', A( K+1, 1 ), LDA, + $ IDUMMA, IINFO ) +* + ELSE IF( ITYPE.EQ.8 ) THEN +* +* Symmetric, random eigenvalues +* + CALL DLATMR( N, N, 'S', ISEED, 'S', WORK, 6, ONE, ONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, K, K, + $ ZERO, ANORM, 'Q', A, LDA, IDUMMA, IINFO ) +* + ELSE IF( ITYPE.EQ.9 ) THEN +* +* Positive definite, eigenvalues specified. +* + CALL DLATMS( N, N, 'S', ISEED, 'P', WORK, IMODE, COND, + $ ANORM, K, K, 'Q', A, LDA, WORK( N+1 ), + $ IINFO ) +* + ELSE IF( ITYPE.EQ.10 ) THEN +* +* Positive definite tridiagonal, eigenvalues specified. +* + IF( N.GT.1 ) + $ K = MAX( 1, K ) + CALL DLATMS( N, N, 'S', ISEED, 'P', WORK, IMODE, COND, + $ ANORM, 1, 1, 'Q', A( K, 1 ), LDA, + $ WORK( N+1 ), IINFO ) + DO 90 I = 2, N + TEMP1 = ABS( A( K, I ) ) / + $ SQRT( ABS( A( K+1, I-1 )*A( K+1, I ) ) ) + IF( TEMP1.GT.HALF ) THEN + A( K, I ) = HALF*SQRT( ABS( A( K+1, + $ I-1 )*A( K+1, I ) ) ) + END IF + 90 CONTINUE +* + ELSE +* + IINFO = 1 + END IF +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'Generator', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + RETURN + END IF +* + 100 CONTINUE +* +* Call DSBTRD to compute S and U from upper triangle. +* + CALL DLACPY( ' ', K+1, N, A, LDA, WORK, LDA ) +* + NTEST = 1 + CALL DSBTRD( 'V', 'U', N, K, WORK, LDA, SD, SE, U, LDU, + $ WORK( LDA*N+1 ), IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSBTRD(U)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 1 ) = ULPINV + GO TO 150 + END IF + END IF +* +* Do tests 1 and 2 +* + CALL DSBT21( 'Upper', N, K, 1, A, LDA, SD, SE, U, LDU, + $ WORK, RESULT( 1 ) ) +* +* Before converting A into lower for DSBTRD, run DSYTRD_SB2ST +* otherwise matrix A will be converted to lower and then need +* to be converted back to upper in order to run the upper case +* ofDSYTRD_SB2ST +* +* Compute D1 the eigenvalues resulting from the tridiagonal +* form using the DSBTRD and used as reference to compare +* with the DSYTRD_SB2ST routine +* +* Compute D1 from the DSBTRD and used as reference for the +* DSYTRD_SB2ST +* + CALL DCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) +* + CALL DSTEQR( 'N', N, D1, WORK, WORK( N+1 ), LDU, + $ WORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEQR(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 5 ) = ULPINV + GO TO 150 + END IF + END IF +* +* DSYTRD_SB2ST Upper case is used to compute D2. +* Note to set SD and SE to zero to be sure not reusing +* the one from above. Compare it with D1 computed +* using the DSBTRD. +* + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL DLACPY( ' ', K+1, N, A, LDA, U, LDU ) + LH = MAX(1, 4*N) + LW = LWORK - LH + CALL DSYTRD_SB2ST( 'N', 'N', "U", N, K, U, LDU, SD, SE, + $ WORK, LH, WORK( LH+1 ), LW, IINFO ) +* +* Compute D2 from the DSYTRD_SB2ST Upper case +* + CALL DCOPY( N, SD, 1, D2, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) +* + CALL DSTEQR( 'N', N, D2, WORK, WORK( N+1 ), LDU, + $ WORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEQR(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 5 ) = ULPINV + GO TO 150 + END IF + END IF +* +* Convert A from Upper-Triangle-Only storage to +* Lower-Triangle-Only storage. +* + DO 120 JC = 1, N + DO 110 JR = 0, MIN( K, N-JC ) + A( JR+1, JC ) = A( K+1-JR, JC+JR ) + 110 CONTINUE + 120 CONTINUE + DO 140 JC = N + 1 - K, N + DO 130 JR = MIN( K, N-JC ) + 1, K + A( JR+1, JC ) = ZERO + 130 CONTINUE + 140 CONTINUE +* +* Call DSBTRD to compute S and U from lower triangle +* + CALL DLACPY( ' ', K+1, N, A, LDA, WORK, LDA ) +* + NTEST = 3 + CALL DSBTRD( 'V', 'L', N, K, WORK, LDA, SD, SE, U, LDU, + $ WORK( LDA*N+1 ), IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSBTRD(L)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 3 ) = ULPINV + GO TO 150 + END IF + END IF + NTEST = 4 +* +* Do tests 3 and 4 +* + CALL DSBT21( 'Lower', N, K, 1, A, LDA, SD, SE, U, LDU, + $ WORK, RESULT( 3 ) ) +* +* DSYTRD_SB2ST Lower case is used to compute D3. +* Note to set SD and SE to zero to be sure not reusing +* the one from above. Compare it with D1 computed +* using the DSBTRD. +* + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL DLACPY( ' ', K+1, N, A, LDA, U, LDU ) + LH = MAX(1, 4*N) + LW = LWORK - LH + CALL DSYTRD_SB2ST( 'N', 'N', "L", N, K, U, LDU, SD, SE, + $ WORK, LH, WORK( LH+1 ), LW, IINFO ) +* +* Compute D3 from the 2-stage Upper case +* + CALL DCOPY( N, SD, 1, D3, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) +* + CALL DSTEQR( 'N', N, D3, WORK, WORK( N+1 ), LDU, + $ WORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEQR(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 6 ) = ULPINV + GO TO 150 + END IF + END IF +* +* +* Do Tests 3 and 4 which are similar to 11 and 12 but with the +* D1 computed using the standard 1-stage reduction as reference +* + NTEST = 6 + TEMP1 = ZERO + TEMP2 = ZERO + TEMP3 = ZERO + TEMP4 = ZERO +* + DO 151 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + TEMP3 = MAX( TEMP3, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP4 = MAX( TEMP4, ABS( D1( J )-D3( J ) ) ) + 151 CONTINUE +* + RESULT(5) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) + RESULT(6) = TEMP4 / MAX( UNFL, ULP*MAX( TEMP3, TEMP4 ) ) +* +* End of Loop -- Check for RESULT(j) > THRESH +* + 150 CONTINUE + NTESTT = NTESTT + NTEST +* +* Print out tests which fail. +* + DO 160 JR = 1, NTEST + IF( RESULT( JR ).GE.THRESH ) THEN +* +* If this is the first test to fail, +* print a header to the data file. +* + IF( NERRS.EQ.0 ) THEN + WRITE( NOUNIT, FMT = 9998 )'DSB' + WRITE( NOUNIT, FMT = 9997 ) + WRITE( NOUNIT, FMT = 9996 ) + WRITE( NOUNIT, FMT = 9995 )'Symmetric' + WRITE( NOUNIT, FMT = 9994 )'orthogonal', '''', + $ 'transpose', ( '''', J = 1, 6 ) + END IF + NERRS = NERRS + 1 + WRITE( NOUNIT, FMT = 9993 )N, K, IOLDSD, JTYPE, + $ JR, RESULT( JR ) + END IF + 160 CONTINUE +* + 170 CONTINUE + 180 CONTINUE + 190 CONTINUE +* +* Summary +* + CALL DLASUM( 'DSB', NOUNIT, NERRS, NTESTT ) + RETURN +* + 9999 FORMAT( ' DCHKSBSTG: ', A, ' returned INFO=', I6, '.', / 9X, 'N=', + $ I6, ', JTYPE=', I6, ', ISEED=(', 3( I5, ',' ), I5, ')' ) +* + 9998 FORMAT( / 1X, A3, + $ ' -- Real Symmetric Banded Tridiagonal Reduction Routines' ) + 9997 FORMAT( ' Matrix types (see DCHKSBSTG for details): ' ) +* + 9996 FORMAT( / ' Special Matrices:', + $ / ' 1=Zero matrix. ', + $ ' 5=Diagonal: clustered entries.', + $ / ' 2=Identity matrix. ', + $ ' 6=Diagonal: large, evenly spaced.', + $ / ' 3=Diagonal: evenly spaced entries. ', + $ ' 7=Diagonal: small, evenly spaced.', + $ / ' 4=Diagonal: geometr. spaced entries.' ) + 9995 FORMAT( ' Dense ', A, ' Banded Matrices:', + $ / ' 8=Evenly spaced eigenvals. ', + $ ' 12=Small, evenly spaced eigenvals.', + $ / ' 9=Geometrically spaced eigenvals. ', + $ ' 13=Matrix with random O(1) entries.', + $ / ' 10=Clustered eigenvalues. ', + $ ' 14=Matrix with large random entries.', + $ / ' 11=Large, evenly spaced eigenvals. ', + $ ' 15=Matrix with small random entries.' ) +* + 9994 FORMAT( / ' Tests performed: (S is Tridiag, U is ', A, ',', + $ / 20X, A, ' means ', A, '.', / ' UPLO=''U'':', + $ / ' 1= | A - U S U', A1, ' | / ( |A| n ulp ) ', + $ ' 2= | I - U U', A1, ' | / ( n ulp )', / ' UPLO=''L'':', + $ / ' 3= | A - U S U', A1, ' | / ( |A| n ulp ) ', + $ ' 4= | I - U U', A1, ' | / ( n ulp )' / ' Eig check:', + $ /' 5= | D1 - D2', '', ' | / ( |D1| ulp ) ', + $ ' 6= | D1 - D3', '', ' | / ( |D1| ulp ) ' ) + 9993 FORMAT( ' N=', I5, ', K=', I4, ', seed=', 4( I4, ',' ), ' type ', + $ I2, ', test(', I2, ')=', G10.3 ) +* +* End of DCHKSBSTG +* + END diff --git a/TESTING/EIG/dchkst2stg.f b/TESTING/EIG/dchkst2stg.f new file mode 100644 index 00000000..29190691 --- /dev/null +++ b/TESTING/EIG/dchkst2stg.f @@ -0,0 +1,2120 @@ +*> \brief \b DCHKST2STG +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, +* NOUNIT, A, LDA, AP, SD, SE, D1, D2, D3, D4, D5, +* WA1, WA2, WA3, WR, U, LDU, V, VP, TAU, Z, WORK, +* LWORK, IWORK, LIWORK, RESULT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDU, LIWORK, LWORK, NOUNIT, NSIZES, +* $ NTYPES +* DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER ISEED( 4 ), IWORK( * ), NN( * ) +* DOUBLE PRECISION A( LDA, * ), AP( * ), D1( * ), D2( * ), +* $ D3( * ), D4( * ), D5( * ), RESULT( * ), +* $ SD( * ), SE( * ), TAU( * ), U( LDU, * ), +* $ V( LDU, * ), VP( * ), WA1( * ), WA2( * ), +* $ WA3( * ), WORK( * ), WR( * ), Z( LDU, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DCHKST2STG checks the symmetric eigenvalue problem routines +*> using the 2-stage reduction techniques. Since the generation +*> of Q or the vectors is not available in this release, we only +*> compare the eigenvalue resulting when using the 2-stage to the +*> one considered as reference using the standard 1-stage reduction +*> DSYTRD. For that, we call the standard DSYTRD and compute D1 using +*> DSTEQR, then we call the 2-stage DSYTRD_2STAGE with Upper and Lower +*> and we compute D2 and D3 using DSTEQR and then we replaced tests +*> 3 and 4 by tests 11 and 12. test 1 and 2 remain to verify that +*> the 1-stage results are OK and can be trusted. +*> This testing routine will converge to the DCHKST in the next +*> release when vectors and generation of Q will be implemented. +*> +*> DSYTRD factors A as U S U' , where ' means transpose, +*> S is symmetric tridiagonal, and U is orthogonal. +*> DSYTRD can use either just the lower or just the upper triangle +*> of A; DCHKST2STG checks both cases. +*> U is represented as a product of Householder +*> transformations, whose vectors are stored in the first +*> n-1 columns of V, and whose scale factors are in TAU. +*> +*> DSPTRD does the same as DSYTRD, except that A and V are stored +*> in "packed" format. +*> +*> DORGTR constructs the matrix U from the contents of V and TAU. +*> +*> DOPGTR constructs the matrix U from the contents of VP and TAU. +*> +*> DSTEQR factors S as Z D1 Z' , where Z is the orthogonal +*> matrix of eigenvectors and D1 is a diagonal matrix with +*> the eigenvalues on the diagonal. D2 is the matrix of +*> eigenvalues computed when Z is not computed. +*> +*> DSTERF computes D3, the matrix of eigenvalues, by the +*> PWK method, which does not yield eigenvectors. +*> +*> DPTEQR factors S as Z4 D4 Z4' , for a +*> symmetric positive definite tridiagonal matrix. +*> D5 is the matrix of eigenvalues computed when Z is not +*> computed. +*> +*> DSTEBZ computes selected eigenvalues. WA1, WA2, and +*> WA3 will denote eigenvalues computed to high +*> absolute accuracy, with different range options. +*> WR will denote eigenvalues computed to high relative +*> accuracy. +*> +*> DSTEIN computes Y, the eigenvectors of S, given the +*> eigenvalues. +*> +*> DSTEDC factors S as Z D1 Z' , where Z is the orthogonal +*> matrix of eigenvectors and D1 is a diagonal matrix with +*> the eigenvalues on the diagonal ('I' option). It may also +*> update an input orthogonal matrix, usually the output +*> from DSYTRD/DORGTR or DSPTRD/DOPGTR ('V' option). It may +*> also just compute eigenvalues ('N' option). +*> +*> DSTEMR factors S as Z D1 Z' , where Z is the orthogonal +*> matrix of eigenvectors and D1 is a diagonal matrix with +*> the eigenvalues on the diagonal ('I' option). DSTEMR +*> uses the Relatively Robust Representation whenever possible. +*> +*> When DCHKST2STG is called, a number of matrix "sizes" ("n's") and a +*> number of matrix "types" are specified. For each size ("n") +*> and each type of matrix, one matrix will be generated and used +*> to test the symmetric eigenroutines. For each matrix, a number +*> of tests will be performed: +*> +*> (1) | A - V S V' | / ( |A| n ulp ) DSYTRD( UPLO='U', ... ) +*> +*> (2) | I - UV' | / ( n ulp ) DORGTR( UPLO='U', ... ) +*> +*> (3) | A - V S V' | / ( |A| n ulp ) DSYTRD( UPLO='L', ... ) +*> replaced by | D1 - D2 | / ( |D1| ulp ) where D1 is the +*> eigenvalue matrix computed using S and D2 is the +*> eigenvalue matrix computed using S_2stage the output of +*> DSYTRD_2STAGE("N", "U",....). D1 and D2 are computed +*> via DSTEQR('N',...) +*> +*> (4) | I - UV' | / ( n ulp ) DORGTR( UPLO='L', ... ) +*> replaced by | D1 - D3 | / ( |D1| ulp ) where D1 is the +*> eigenvalue matrix computed using S and D3 is the +*> eigenvalue matrix computed using S_2stage the output of +*> DSYTRD_2STAGE("N", "L",....). D1 and D3 are computed +*> via DSTEQR('N',...) +*> +*> (5-8) Same as 1-4, but for DSPTRD and DOPGTR. +*> +*> (9) | S - Z D Z' | / ( |S| n ulp ) DSTEQR('V',...) +*> +*> (10) | I - ZZ' | / ( n ulp ) DSTEQR('V',...) +*> +*> (11) | D1 - D2 | / ( |D1| ulp ) DSTEQR('N',...) +*> +*> (12) | D1 - D3 | / ( |D1| ulp ) DSTERF +*> +*> (13) 0 if the true eigenvalues (computed by sturm count) +*> of S are within THRESH of +*> those in D1. 2*THRESH if they are not. (Tested using +*> DSTECH) +*> +*> For S positive definite, +*> +*> (14) | S - Z4 D4 Z4' | / ( |S| n ulp ) DPTEQR('V',...) +*> +*> (15) | I - Z4 Z4' | / ( n ulp ) DPTEQR('V',...) +*> +*> (16) | D4 - D5 | / ( 100 |D4| ulp ) DPTEQR('N',...) +*> +*> When S is also diagonally dominant by the factor gamma < 1, +*> +*> (17) max | D4(i) - WR(i) | / ( |D4(i)| omega ) , +*> i +*> omega = 2 (2n-1) ULP (1 + 8 gamma**2) / (1 - gamma)**4 +*> DSTEBZ( 'A', 'E', ...) +*> +*> (18) | WA1 - D3 | / ( |D3| ulp ) DSTEBZ( 'A', 'E', ...) +*> +*> (19) ( max { min | WA2(i)-WA3(j) | } + +*> i j +*> max { min | WA3(i)-WA2(j) | } ) / ( |D3| ulp ) +*> i j +*> DSTEBZ( 'I', 'E', ...) +*> +*> (20) | S - Y WA1 Y' | / ( |S| n ulp ) DSTEBZ, SSTEIN +*> +*> (21) | I - Y Y' | / ( n ulp ) DSTEBZ, SSTEIN +*> +*> (22) | S - Z D Z' | / ( |S| n ulp ) DSTEDC('I') +*> +*> (23) | I - ZZ' | / ( n ulp ) DSTEDC('I') +*> +*> (24) | S - Z D Z' | / ( |S| n ulp ) DSTEDC('V') +*> +*> (25) | I - ZZ' | / ( n ulp ) DSTEDC('V') +*> +*> (26) | D1 - D2 | / ( |D1| ulp ) DSTEDC('V') and +*> DSTEDC('N') +*> +*> Test 27 is disabled at the moment because DSTEMR does not +*> guarantee high relatvie accuracy. +*> +*> (27) max | D6(i) - WR(i) | / ( |D6(i)| omega ) , +*> i +*> omega = 2 (2n-1) ULP (1 + 8 gamma**2) / (1 - gamma)**4 +*> DSTEMR('V', 'A') +*> +*> (28) max | D6(i) - WR(i) | / ( |D6(i)| omega ) , +*> i +*> omega = 2 (2n-1) ULP (1 + 8 gamma**2) / (1 - gamma)**4 +*> DSTEMR('V', 'I') +*> +*> Tests 29 through 34 are disable at present because DSTEMR +*> does not handle partial specturm requests. +*> +*> (29) | S - Z D Z' | / ( |S| n ulp ) DSTEMR('V', 'I') +*> +*> (30) | I - ZZ' | / ( n ulp ) DSTEMR('V', 'I') +*> +*> (31) ( max { min | WA2(i)-WA3(j) | } + +*> i j +*> max { min | WA3(i)-WA2(j) | } ) / ( |D3| ulp ) +*> i j +*> DSTEMR('N', 'I') vs. SSTEMR('V', 'I') +*> +*> (32) | S - Z D Z' | / ( |S| n ulp ) DSTEMR('V', 'V') +*> +*> (33) | I - ZZ' | / ( n ulp ) DSTEMR('V', 'V') +*> +*> (34) ( max { min | WA2(i)-WA3(j) | } + +*> i j +*> max { min | WA3(i)-WA2(j) | } ) / ( |D3| ulp ) +*> i j +*> DSTEMR('N', 'V') vs. SSTEMR('V', 'V') +*> +*> (35) | S - Z D Z' | / ( |S| n ulp ) DSTEMR('V', 'A') +*> +*> (36) | I - ZZ' | / ( n ulp ) DSTEMR('V', 'A') +*> +*> (37) ( max { min | WA2(i)-WA3(j) | } + +*> i j +*> max { min | WA3(i)-WA2(j) | } ) / ( |D3| ulp ) +*> i j +*> DSTEMR('N', 'A') vs. SSTEMR('V', 'A') +*> +*> The "sizes" are specified by an array NN(1:NSIZES); the value of +*> each element NN(j) specifies one size. +*> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); +*> if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. +*> Currently, the list of possible types is: +*> +*> (1) The zero matrix. +*> (2) The identity matrix. +*> +*> (3) A diagonal matrix with evenly spaced entries +*> 1, ..., ULP and random signs. +*> (ULP = (first number larger than 1) - 1 ) +*> (4) A diagonal matrix with geometrically spaced entries +*> 1, ..., ULP and random signs. +*> (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP +*> and random signs. +*> +*> (6) Same as (4), but multiplied by SQRT( overflow threshold ) +*> (7) Same as (4), but multiplied by SQRT( underflow threshold ) +*> +*> (8) A matrix of the form U' D U, where U is orthogonal and +*> D has evenly spaced entries 1, ..., ULP with random signs +*> on the diagonal. +*> +*> (9) A matrix of the form U' D U, where U is orthogonal and +*> D has geometrically spaced entries 1, ..., ULP with random +*> signs on the diagonal. +*> +*> (10) A matrix of the form U' D U, where U is orthogonal and +*> D has "clustered" entries 1, ULP,..., ULP with random +*> signs on the diagonal. +*> +*> (11) Same as (8), but multiplied by SQRT( overflow threshold ) +*> (12) Same as (8), but multiplied by SQRT( underflow threshold ) +*> +*> (13) Symmetric matrix with random entries chosen from (-1,1). +*> (14) Same as (13), but multiplied by SQRT( overflow threshold ) +*> (15) Same as (13), but multiplied by SQRT( underflow threshold ) +*> (16) Same as (8), but diagonal elements are all positive. +*> (17) Same as (9), but diagonal elements are all positive. +*> (18) Same as (10), but diagonal elements are all positive. +*> (19) Same as (16), but multiplied by SQRT( overflow threshold ) +*> (20) Same as (16), but multiplied by SQRT( underflow threshold ) +*> (21) A diagonally dominant tridiagonal matrix with geometrically +*> spaced diagonal entries 1, ..., ULP. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NSIZES +*> \verbatim +*> NSIZES is INTEGER +*> The number of sizes of matrices to use. If it is zero, +*> DCHKST2STG does nothing. It must be at least zero. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER array, dimension (NSIZES) +*> An array containing the sizes to be used for the matrices. +*> Zero values will be skipped. The values must be at least +*> zero. +*> \endverbatim +*> +*> \param[in] NTYPES +*> \verbatim +*> NTYPES is INTEGER +*> The number of elements in DOTYPE. If it is zero, DCHKST2STG +*> does nothing. It must be at least zero. If it is MAXTYP+1 +*> and NSIZES is 1, then an additional type, MAXTYP+1 is +*> defined, which is to use whatever matrix is in A. This +*> is only useful if DOTYPE(1:MAXTYP) is .FALSE. and +*> DOTYPE(MAXTYP+1) is .TRUE. . +*> \endverbatim +*> +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> If DOTYPE(j) is .TRUE., then for each size in NN a +*> matrix of that size and of type j will be generated. +*> If NTYPES is smaller than the maximum number of types +*> defined (PARAMETER MAXTYP), then types NTYPES+1 through +*> MAXTYP will not be generated. If NTYPES is larger +*> than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) +*> will be ignored. +*> \endverbatim +*> +*> \param[in,out] ISEED +*> \verbatim +*> ISEED is INTEGER array, dimension (4) +*> On entry ISEED specifies the seed of the random number +*> generator. The array elements should be between 0 and 4095; +*> if not they will be reduced mod 4096. Also, ISEED(4) must +*> be odd. The random number generator uses a linear +*> congruential sequence limited to small integers, and so +*> should produce machine independent random numbers. The +*> values of ISEED are changed on exit, and can be used in the +*> next call to DCHKST2STG to continue the same random number +*> sequence. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is DOUBLE PRECISION +*> A test will count as "failed" if the "error", computed as +*> described above, exceeds THRESH. Note that the error +*> is scaled to be O(1), so THRESH should be a reasonably +*> small multiple of 1, e.g., 10 or 100. In particular, +*> it should not depend on the precision (single vs. double) +*> or the size of the matrix. It must be at least zero. +*> \endverbatim +*> +*> \param[in] NOUNIT +*> \verbatim +*> NOUNIT is INTEGER +*> The FORTRAN unit number for printing out error messages +*> (e.g., if a routine returns IINFO not equal to 0.) +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array of +*> dimension ( LDA , max(NN) ) +*> Used to hold the matrix whose eigenvalues are to be +*> computed. On exit, A contains the last matrix actually +*> used. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. It must be at +*> least 1 and at least max( NN ). +*> \endverbatim +*> +*> \param[out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array of +*> dimension( max(NN)*max(NN+1)/2 ) +*> The matrix A stored in packed format. +*> \endverbatim +*> +*> \param[out] SD +*> \verbatim +*> SD is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> The diagonal of the tridiagonal matrix computed by DSYTRD. +*> On exit, SD and SE contain the tridiagonal form of the +*> matrix in A. +*> \endverbatim +*> +*> \param[out] SE +*> \verbatim +*> SE is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> The off-diagonal of the tridiagonal matrix computed by +*> DSYTRD. On exit, SD and SE contain the tridiagonal form of +*> the matrix in A. +*> \endverbatim +*> +*> \param[out] D1 +*> \verbatim +*> D1 is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by DSTEQR simlutaneously +*> with Z. On exit, the eigenvalues in D1 correspond with the +*> matrix in A. +*> \endverbatim +*> +*> \param[out] D2 +*> \verbatim +*> D2 is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by DSTEQR if Z is not +*> computed. On exit, the eigenvalues in D2 correspond with +*> the matrix in A. +*> \endverbatim +*> +*> \param[out] D3 +*> \verbatim +*> D3 is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by DSTERF. On exit, the +*> eigenvalues in D3 correspond with the matrix in A. +*> \endverbatim +*> +*> \param[out] D4 +*> \verbatim +*> D4 is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by DPTEQR(V). +*> DPTEQR factors S as Z4 D4 Z4* +*> On exit, the eigenvalues in D4 correspond with the matrix in A. +*> \endverbatim +*> +*> \param[out] D5 +*> \verbatim +*> D5 is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by DPTEQR(N) +*> when Z is not computed. On exit, the +*> eigenvalues in D4 correspond with the matrix in A. +*> \endverbatim +*> +*> \param[out] WA1 +*> \verbatim +*> WA1 is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> All eigenvalues of A, computed to high +*> absolute accuracy, with different range options. +*> as computed by DSTEBZ. +*> \endverbatim +*> +*> \param[out] WA2 +*> \verbatim +*> WA2 is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> Selected eigenvalues of A, computed to high +*> absolute accuracy, with different range options. +*> as computed by DSTEBZ. +*> Choose random values for IL and IU, and ask for the +*> IL-th through IU-th eigenvalues. +*> \endverbatim +*> +*> \param[out] WA3 +*> \verbatim +*> WA3 is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> Selected eigenvalues of A, computed to high +*> absolute accuracy, with different range options. +*> as computed by DSTEBZ. +*> Determine the values VL and VU of the IL-th and IU-th +*> eigenvalues and ask for all eigenvalues in this range. +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> All eigenvalues of A, computed to high +*> absolute accuracy, with different options. +*> as computed by DSTEBZ. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is DOUBLE PRECISION array of +*> dimension( LDU, max(NN) ). +*> The orthogonal matrix computed by DSYTRD + DORGTR. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of U, Z, and V. It must be at least 1 +*> and at least max( NN ). +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is DOUBLE PRECISION array of +*> dimension( LDU, max(NN) ). +*> The Housholder vectors computed by DSYTRD in reducing A to +*> tridiagonal form. The vectors computed with UPLO='U' are +*> in the upper triangle, and the vectors computed with UPLO='L' +*> are in the lower triangle. (As described in DSYTRD, the +*> sub- and superdiagonal are not set to 1, although the +*> true Householder vector has a 1 in that position. The +*> routines that use V, such as DORGTR, set those entries to +*> 1 before using them, and then restore them later.) +*> \endverbatim +*> +*> \param[out] VP +*> \verbatim +*> VP is DOUBLE PRECISION array of +*> dimension( max(NN)*max(NN+1)/2 ) +*> The matrix V stored in packed format. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> The Householder factors computed by DSYTRD in reducing A +*> to tridiagonal form. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array of +*> dimension( LDU, max(NN) ). +*> The orthogonal matrix of eigenvectors computed by DSTEQR, +*> DPTEQR, and DSTEIN. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array of +*> dimension( LWORK ) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The number of entries in WORK. This must be at least +*> 1 + 4 * Nmax + 2 * Nmax * lg Nmax + 3 * Nmax**2 +*> where Nmax = max( NN(j), 2 ) and lg = log base 2. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, +*> Workspace. +*> \endverbatim +*> +*> \param[out] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The number of entries in IWORK. This must be at least +*> 6 + 6*Nmax + 5 * Nmax * lg Nmax +*> where Nmax = max( NN(j), 2 ) and lg = log base 2. +*> \endverbatim +*> +*> \param[out] RESULT +*> \verbatim +*> RESULT is DOUBLE PRECISION array, dimension (26) +*> The values computed by the tests described above. +*> The values are currently limited to 1/ulp, to avoid +*> overflow. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> If 0, then everything ran OK. +*> -1: NSIZES < 0 +*> -2: Some NN(j) < 0 +*> -3: NTYPES < 0 +*> -5: THRESH < 0 +*> -9: LDA < 1 or LDA < NMAX, where NMAX is max( NN(j) ). +*> -23: LDU < 1 or LDU < NMAX. +*> -29: LWORK too small. +*> If DLATMR, SLATMS, DSYTRD, DORGTR, DSTEQR, SSTERF, +*> or DORMC2 returns an error code, the +*> absolute value of it is returned. +*> +*>----------------------------------------------------------------------- +*> +*> Some Local Variables and Parameters: +*> ---- ----- --------- --- ---------- +*> ZERO, ONE Real 0 and 1. +*> MAXTYP The number of types defined. +*> NTEST The number of tests performed, or which can +*> be performed so far, for the current matrix. +*> NTESTT The total number of tests performed so far. +*> NBLOCK Blocksize as returned by ENVIR. +*> NMAX Largest value in NN. +*> NMATS The number of matrices generated so far. +*> NERRS The number of tests which have exceeded THRESH +*> so far. +*> COND, IMODE Values to be passed to the matrix generators. +*> ANORM Norm of A; passed to matrix generators. +*> +*> OVFL, UNFL Overflow and underflow thresholds. +*> ULP, ULPINV Finest relative precision and its inverse. +*> RTOVFL, RTUNFL Square roots of the previous 2 values. +*> The following four arrays decode JTYPE: +*> KTYPE(j) The general type (1-10) for type "j". +*> KMODE(j) The MODE value to be passed to the matrix +*> generator for type "j". +*> KMAGN(j) The order of magnitude ( O(1), +*> O(overflow^(1/2) ), O(underflow^(1/2) ) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup double_eig +* +* ===================================================================== + SUBROUTINE DCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, + $ NOUNIT, A, LDA, AP, SD, SE, D1, D2, D3, D4, D5, + $ WA1, WA2, WA3, WR, U, LDU, V, VP, TAU, Z, WORK, + $ LWORK, IWORK, LIWORK, RESULT, INFO ) +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDU, LIWORK, LWORK, NOUNIT, NSIZES, + $ NTYPES + DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER ISEED( 4 ), IWORK( * ), NN( * ) + DOUBLE PRECISION A( LDA, * ), AP( * ), D1( * ), D2( * ), + $ D3( * ), D4( * ), D5( * ), RESULT( * ), + $ SD( * ), SE( * ), TAU( * ), U( LDU, * ), + $ V( LDU, * ), VP( * ), WA1( * ), WA2( * ), + $ WA3( * ), WORK( * ), WR( * ), Z( LDU, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, EIGHT, TEN, HUN + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ EIGHT = 8.0D0, TEN = 10.0D0, HUN = 100.0D0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = ONE / TWO ) + INTEGER MAXTYP + PARAMETER ( MAXTYP = 21 ) + LOGICAL SRANGE + PARAMETER ( SRANGE = .FALSE. ) + LOGICAL SREL + PARAMETER ( SREL = .FALSE. ) +* .. +* .. Local Scalars .. + LOGICAL BADNN, TRYRAC + INTEGER I, IINFO, IL, IMODE, ITEMP, ITYPE, IU, J, JC, + $ JR, JSIZE, JTYPE, LGN, LIWEDC, LOG2UI, LWEDC, + $ M, M2, M3, MTYPES, N, NAP, NBLOCK, NERRS, + $ NMATS, NMAX, NSPLIT, NTEST, NTESTT, LH, LW + DOUBLE PRECISION ABSTOL, ANINV, ANORM, COND, OVFL, RTOVFL, + $ RTUNFL, TEMP1, TEMP2, TEMP3, TEMP4, ULP, + $ ULPINV, UNFL, VL, VU +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ), IOLDSD( 4 ), ISEED2( 4 ), + $ KMAGN( MAXTYP ), KMODE( MAXTYP ), + $ KTYPE( MAXTYP ) + DOUBLE PRECISION DUMMA( 1 ) +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLARND, DSXT1 + EXTERNAL ILAENV, DLAMCH, DLARND, DSXT1 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLABAD, DLACPY, DLASET, DLASUM, DLATMR, + $ DLATMS, DOPGTR, DORGTR, DPTEQR, DSPT21, DSPTRD, + $ DSTEBZ, DSTECH, DSTEDC, DSTEMR, DSTEIN, DSTEQR, + $ DSTERF, DSTT21, DSTT22, DSYT21, DSYTRD, XERBLA, + $ DSYTRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG, MAX, MIN, SQRT +* .. +* .. Data statements .. + DATA KTYPE / 1, 2, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 8, + $ 8, 8, 9, 9, 9, 9, 9, 10 / + DATA KMAGN / 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, + $ 2, 3, 1, 1, 1, 2, 3, 1 / + DATA KMODE / 0, 0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, + $ 0, 0, 4, 3, 1, 4, 4, 3 / +* .. +* .. Executable Statements .. +* +* Keep ftnchek happy + IDUMMA( 1 ) = 1 +* +* Check for errors +* + NTESTT = 0 + INFO = 0 +* +* Important constants +* + BADNN = .FALSE. + TRYRAC = .TRUE. + NMAX = 1 + DO 10 J = 1, NSIZES + NMAX = MAX( NMAX, NN( J ) ) + IF( NN( J ).LT.0 ) + $ BADNN = .TRUE. + 10 CONTINUE +* + NBLOCK = ILAENV( 1, 'DSYTRD', 'L', NMAX, -1, -1, -1 ) + NBLOCK = MIN( NMAX, MAX( 1, NBLOCK ) ) +* +* Check for errors +* + IF( NSIZES.LT.0 ) THEN + INFO = -1 + ELSE IF( BADNN ) THEN + INFO = -2 + ELSE IF( NTYPES.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.NMAX ) THEN + INFO = -9 + ELSE IF( LDU.LT.NMAX ) THEN + INFO = -23 + ELSE IF( 2*MAX( 2, NMAX )**2.GT.LWORK ) THEN + INFO = -29 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DCHKST2STG', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( NSIZES.EQ.0 .OR. NTYPES.EQ.0 ) + $ RETURN +* +* More Important constants +* + UNFL = DLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL DLABAD( UNFL, OVFL ) + ULP = DLAMCH( 'Epsilon' )*DLAMCH( 'Base' ) + ULPINV = ONE / ULP + LOG2UI = INT( LOG( ULPINV ) / LOG( TWO ) ) + RTUNFL = SQRT( UNFL ) + RTOVFL = SQRT( OVFL ) +* +* Loop over sizes, types +* + DO 20 I = 1, 4 + ISEED2( I ) = ISEED( I ) + 20 CONTINUE + NERRS = 0 + NMATS = 0 +* + DO 310 JSIZE = 1, NSIZES + N = NN( JSIZE ) + IF( N.GT.0 ) THEN + LGN = INT( LOG( DBLE( N ) ) / LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + LWEDC = 1 + 4*N + 2*N*LGN + 4*N**2 + LIWEDC = 6 + 6*N + 5*N*LGN + ELSE + LWEDC = 8 + LIWEDC = 12 + END IF + NAP = ( N*( N+1 ) ) / 2 + ANINV = ONE / DBLE( MAX( 1, N ) ) +* + IF( NSIZES.NE.1 ) THEN + MTYPES = MIN( MAXTYP, NTYPES ) + ELSE + MTYPES = MIN( MAXTYP+1, NTYPES ) + END IF +* + DO 300 JTYPE = 1, MTYPES + IF( .NOT.DOTYPE( JTYPE ) ) + $ GO TO 300 + NMATS = NMATS + 1 + NTEST = 0 +* + DO 30 J = 1, 4 + IOLDSD( J ) = ISEED( J ) + 30 CONTINUE +* +* Compute "A" +* +* Control parameters: +* +* KMAGN KMODE KTYPE +* =1 O(1) clustered 1 zero +* =2 large clustered 2 identity +* =3 small exponential (none) +* =4 arithmetic diagonal, (w/ eigenvalues) +* =5 random log symmetric, w/ eigenvalues +* =6 random (none) +* =7 random diagonal +* =8 random symmetric +* =9 positive definite +* =10 diagonally dominant tridiagonal +* + IF( MTYPES.GT.MAXTYP ) + $ GO TO 100 +* + ITYPE = KTYPE( JTYPE ) + IMODE = KMODE( JTYPE ) +* +* Compute norm +* + GO TO ( 40, 50, 60 )KMAGN( JTYPE ) +* + 40 CONTINUE + ANORM = ONE + GO TO 70 +* + 50 CONTINUE + ANORM = ( RTOVFL*ULP )*ANINV + GO TO 70 +* + 60 CONTINUE + ANORM = RTUNFL*N*ULPINV + GO TO 70 +* + 70 CONTINUE +* + CALL DLASET( 'Full', LDA, N, ZERO, ZERO, A, LDA ) + IINFO = 0 + IF( JTYPE.LE.15 ) THEN + COND = ULPINV + ELSE + COND = ULPINV*ANINV / TEN + END IF +* +* Special Matrices -- Identity & Jordan block +* +* Zero +* + IF( ITYPE.EQ.1 ) THEN + IINFO = 0 +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Identity +* + DO 80 JC = 1, N + A( JC, JC ) = ANORM + 80 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Diagonal Matrix, [Eigen]values Specified +* + CALL DLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, 0, 0, 'N', A, LDA, WORK( N+1 ), + $ IINFO ) +* +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Symmetric, eigenvalues specified +* + CALL DLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, N, N, 'N', A, LDA, WORK( N+1 ), + $ IINFO ) +* + ELSE IF( ITYPE.EQ.7 ) THEN +* +* Diagonal, random eigenvalues +* + CALL DLATMR( N, N, 'S', ISEED, 'S', WORK, 6, ONE, ONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, 0, 0, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.8 ) THEN +* +* Symmetric, random eigenvalues +* + CALL DLATMR( N, N, 'S', ISEED, 'S', WORK, 6, ONE, ONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, N, N, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.9 ) THEN +* +* Positive definite, eigenvalues specified. +* + CALL DLATMS( N, N, 'S', ISEED, 'P', WORK, IMODE, COND, + $ ANORM, N, N, 'N', A, LDA, WORK( N+1 ), + $ IINFO ) +* + ELSE IF( ITYPE.EQ.10 ) THEN +* +* Positive definite tridiagonal, eigenvalues specified. +* + CALL DLATMS( N, N, 'S', ISEED, 'P', WORK, IMODE, COND, + $ ANORM, 1, 1, 'N', A, LDA, WORK( N+1 ), + $ IINFO ) + DO 90 I = 2, N + TEMP1 = ABS( A( I-1, I ) ) / + $ SQRT( ABS( A( I-1, I-1 )*A( I, I ) ) ) + IF( TEMP1.GT.HALF ) THEN + A( I-1, I ) = HALF*SQRT( ABS( A( I-1, I-1 )*A( I, + $ I ) ) ) + A( I, I-1 ) = A( I-1, I ) + END IF + 90 CONTINUE +* + ELSE +* + IINFO = 1 + END IF +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'Generator', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + RETURN + END IF +* + 100 CONTINUE +* +* Call DSYTRD and DORGTR to compute S and U from +* upper triangle. +* + CALL DLACPY( 'U', N, N, A, LDA, V, LDU ) +* + NTEST = 1 + CALL DSYTRD( 'U', N, V, LDU, SD, SE, TAU, WORK, LWORK, + $ IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSYTRD(U)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 1 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL DLACPY( 'U', N, N, V, LDU, U, LDU ) +* + NTEST = 2 + CALL DORGTR( 'U', N, U, LDU, TAU, WORK, LWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DORGTR(U)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 2 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do tests 1 and 2 +* + CALL DSYT21( 2, 'Upper', N, 1, A, LDA, SD, SE, U, LDU, V, + $ LDU, TAU, WORK, RESULT( 1 ) ) + CALL DSYT21( 3, 'Upper', N, 1, A, LDA, SD, SE, U, LDU, V, + $ LDU, TAU, WORK, RESULT( 2 ) ) +* +* Compute D1 the eigenvalues resulting from the tridiagonal +* form using the standard 1-stage algorithm and use it as a +* reference to compare with the 2-stage technique +* +* Compute D1 from the 1-stage and used as reference for the +* 2-stage +* + CALL DCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) +* + CALL DSTEQR( 'N', N, D1, WORK, WORK( N+1 ), LDU, + $ WORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEQR(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 3 ) = ULPINV + GO TO 280 + END IF + END IF +* +* 2-STAGE TRD Upper case is used to compute D2. +* Note to set SD and SE to zero to be sure not reusing +* the one from above. Compare it with D1 computed +* using the 1-stage. +* + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL DLACPY( "U", N, N, A, LDA, V, LDU ) + LH = MAX(1, 4*N) + LW = LWORK - LH + CALL DSYTRD_2STAGE( 'N', "U", N, V, LDU, SD, SE, TAU, + $ WORK, LH, WORK( LH+1 ), LW, IINFO ) +* +* Compute D2 from the 2-stage Upper case +* + CALL DCOPY( N, SD, 1, D2, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) +* + CALL DSTEQR( 'N', N, D2, WORK, WORK( N+1 ), LDU, + $ WORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEQR(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 3 ) = ULPINV + GO TO 280 + END IF + END IF +* +* 2-STAGE TRD Lower case is used to compute D3. +* Note to set SD and SE to zero to be sure not reusing +* the one from above. Compare it with D1 computed +* using the 1-stage. +* + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL DLACPY( "L", N, N, A, LDA, V, LDU ) + CALL DSYTRD_2STAGE( 'N', "L", N, V, LDU, SD, SE, TAU, + $ WORK, LH, WORK( LH+1 ), LW, IINFO ) +* +* Compute D3 from the 2-stage Upper case +* + CALL DCOPY( N, SD, 1, D3, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) +* + CALL DSTEQR( 'N', N, D3, WORK, WORK( N+1 ), LDU, + $ WORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEQR(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 4 ) = ULPINV + GO TO 280 + END IF + END IF +* +* +* Do Tests 3 and 4 which are similar to 11 and 12 but with the +* D1 computed using the standard 1-stage reduction as reference +* + NTEST = 4 + TEMP1 = ZERO + TEMP2 = ZERO + TEMP3 = ZERO + TEMP4 = ZERO +* + DO 151 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + TEMP3 = MAX( TEMP3, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP4 = MAX( TEMP4, ABS( D1( J )-D3( J ) ) ) + 151 CONTINUE +* + RESULT( 3 ) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) + RESULT( 4 ) = TEMP4 / MAX( UNFL, ULP*MAX( TEMP3, TEMP4 ) ) +* +* Skip the DSYTRD for lower that since we replaced its testing +* 3 and 4 by the 2-stage one. + GOTO 101 +* +* Call DSYTRD and DORGTR to compute S and U from +* lower triangle, do tests. +* + CALL DLACPY( 'L', N, N, A, LDA, V, LDU ) +* + NTEST = 3 + CALL DSYTRD( 'L', N, V, LDU, SD, SE, TAU, WORK, LWORK, + $ IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSYTRD(L)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 3 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL DLACPY( 'L', N, N, V, LDU, U, LDU ) +* + NTEST = 4 + CALL DORGTR( 'L', N, U, LDU, TAU, WORK, LWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DORGTR(L)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 4 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL DSYT21( 2, 'Lower', N, 1, A, LDA, SD, SE, U, LDU, V, + $ LDU, TAU, WORK, RESULT( 3 ) ) + CALL DSYT21( 3, 'Lower', N, 1, A, LDA, SD, SE, U, LDU, V, + $ LDU, TAU, WORK, RESULT( 4 ) ) +* +*after skipping old tests 3 4 back to the normal +* + 101 CONTINUE +* +* Store the upper triangle of A in AP +* + I = 0 + DO 120 JC = 1, N + DO 110 JR = 1, JC + I = I + 1 + AP( I ) = A( JR, JC ) + 110 CONTINUE + 120 CONTINUE +* +* Call DSPTRD and DOPGTR to compute S and U from AP +* + CALL DCOPY( NAP, AP, 1, VP, 1 ) +* + NTEST = 5 + CALL DSPTRD( 'U', N, VP, SD, SE, TAU, IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSPTRD(U)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 5 ) = ULPINV + GO TO 280 + END IF + END IF +* + NTEST = 6 + CALL DOPGTR( 'U', N, VP, TAU, U, LDU, WORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DOPGTR(U)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 6 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do tests 5 and 6 +* + CALL DSPT21( 2, 'Upper', N, 1, AP, SD, SE, U, LDU, VP, TAU, + $ WORK, RESULT( 5 ) ) + CALL DSPT21( 3, 'Upper', N, 1, AP, SD, SE, U, LDU, VP, TAU, + $ WORK, RESULT( 6 ) ) +* +* Store the lower triangle of A in AP +* + I = 0 + DO 140 JC = 1, N + DO 130 JR = JC, N + I = I + 1 + AP( I ) = A( JR, JC ) + 130 CONTINUE + 140 CONTINUE +* +* Call DSPTRD and DOPGTR to compute S and U from AP +* + CALL DCOPY( NAP, AP, 1, VP, 1 ) +* + NTEST = 7 + CALL DSPTRD( 'L', N, VP, SD, SE, TAU, IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSPTRD(L)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 7 ) = ULPINV + GO TO 280 + END IF + END IF +* + NTEST = 8 + CALL DOPGTR( 'L', N, VP, TAU, U, LDU, WORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DOPGTR(L)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 8 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL DSPT21( 2, 'Lower', N, 1, AP, SD, SE, U, LDU, VP, TAU, + $ WORK, RESULT( 7 ) ) + CALL DSPT21( 3, 'Lower', N, 1, AP, SD, SE, U, LDU, VP, TAU, + $ WORK, RESULT( 8 ) ) +* +* Call DSTEQR to compute D1, D2, and Z, do tests. +* +* Compute D1 and Z +* + CALL DCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) + CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDU ) +* + NTEST = 9 + CALL DSTEQR( 'V', N, D1, WORK, Z, LDU, WORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEQR(V)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 9 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Compute D2 +* + CALL DCOPY( N, SD, 1, D2, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) +* + NTEST = 11 + CALL DSTEQR( 'N', N, D2, WORK, WORK( N+1 ), LDU, + $ WORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEQR(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 11 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Compute D3 (using PWK method) +* + CALL DCOPY( N, SD, 1, D3, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) +* + NTEST = 12 + CALL DSTERF( N, D3, WORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTERF', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 12 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 9 and 10 +* + CALL DSTT21( N, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, + $ RESULT( 9 ) ) +* +* Do Tests 11 and 12 +* + TEMP1 = ZERO + TEMP2 = ZERO + TEMP3 = ZERO + TEMP4 = ZERO +* + DO 150 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + TEMP3 = MAX( TEMP3, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP4 = MAX( TEMP4, ABS( D1( J )-D3( J ) ) ) + 150 CONTINUE +* + RESULT( 11 ) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) + RESULT( 12 ) = TEMP4 / MAX( UNFL, ULP*MAX( TEMP3, TEMP4 ) ) +* +* Do Test 13 -- Sturm Sequence Test of Eigenvalues +* Go up by factors of two until it succeeds +* + NTEST = 13 + TEMP1 = THRESH*( HALF-ULP ) +* + DO 160 J = 0, LOG2UI + CALL DSTECH( N, SD, SE, D1, TEMP1, WORK, IINFO ) + IF( IINFO.EQ.0 ) + $ GO TO 170 + TEMP1 = TEMP1*TWO + 160 CONTINUE +* + 170 CONTINUE + RESULT( 13 ) = TEMP1 +* +* For positive definite matrices ( JTYPE.GT.15 ) call DPTEQR +* and do tests 14, 15, and 16 . +* + IF( JTYPE.GT.15 ) THEN +* +* Compute D4 and Z4 +* + CALL DCOPY( N, SD, 1, D4, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) + CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDU ) +* + NTEST = 14 + CALL DPTEQR( 'V', N, D4, WORK, Z, LDU, WORK( N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DPTEQR(V)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 14 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 14 and 15 +* + CALL DSTT21( N, 0, SD, SE, D4, DUMMA, Z, LDU, WORK, + $ RESULT( 14 ) ) +* +* Compute D5 +* + CALL DCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) +* + NTEST = 16 + CALL DPTEQR( 'N', N, D5, WORK, Z, LDU, WORK( N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DPTEQR(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 16 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 16 +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 180 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D4( J ) ), ABS( D5( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D4( J )-D5( J ) ) ) + 180 CONTINUE +* + RESULT( 16 ) = TEMP2 / MAX( UNFL, + $ HUN*ULP*MAX( TEMP1, TEMP2 ) ) + ELSE + RESULT( 14 ) = ZERO + RESULT( 15 ) = ZERO + RESULT( 16 ) = ZERO + END IF +* +* Call DSTEBZ with different options and do tests 17-18. +* +* If S is positive definite and diagonally dominant, +* ask for all eigenvalues with high relative accuracy. +* + VL = ZERO + VU = ZERO + IL = 0 + IU = 0 + IF( JTYPE.EQ.21 ) THEN + NTEST = 17 + ABSTOL = UNFL + UNFL + CALL DSTEBZ( 'A', 'E', N, VL, VU, IL, IU, ABSTOL, SD, SE, + $ M, NSPLIT, WR, IWORK( 1 ), IWORK( N+1 ), + $ WORK, IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEBZ(A,rel)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 17 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do test 17 +* + TEMP2 = TWO*( TWO*N-ONE )*ULP*( ONE+EIGHT*HALF**2 ) / + $ ( ONE-HALF )**4 +* + TEMP1 = ZERO + DO 190 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D4( J )-WR( N-J+1 ) ) / + $ ( ABSTOL+ABS( D4( J ) ) ) ) + 190 CONTINUE +* + RESULT( 17 ) = TEMP1 / TEMP2 + ELSE + RESULT( 17 ) = ZERO + END IF +* +* Now ask for all eigenvalues with high absolute accuracy. +* + NTEST = 18 + ABSTOL = UNFL + UNFL + CALL DSTEBZ( 'A', 'E', N, VL, VU, IL, IU, ABSTOL, SD, SE, M, + $ NSPLIT, WA1, IWORK( 1 ), IWORK( N+1 ), WORK, + $ IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEBZ(A)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 18 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do test 18 +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 200 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D3( J ) ), ABS( WA1( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D3( J )-WA1( J ) ) ) + 200 CONTINUE +* + RESULT( 18 ) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) +* +* Choose random values for IL and IU, and ask for the +* IL-th through IU-th eigenvalues. +* + NTEST = 19 + IF( N.LE.1 ) THEN + IL = 1 + IU = N + ELSE + IL = 1 + ( N-1 )*INT( DLARND( 1, ISEED2 ) ) + IU = 1 + ( N-1 )*INT( DLARND( 1, ISEED2 ) ) + IF( IU.LT.IL ) THEN + ITEMP = IU + IU = IL + IL = ITEMP + END IF + END IF +* + CALL DSTEBZ( 'I', 'E', N, VL, VU, IL, IU, ABSTOL, SD, SE, + $ M2, NSPLIT, WA2, IWORK( 1 ), IWORK( N+1 ), + $ WORK, IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEBZ(I)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 19 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Determine the values VL and VU of the IL-th and IU-th +* eigenvalues and ask for all eigenvalues in this range. +* + IF( N.GT.0 ) THEN + IF( IL.NE.1 ) THEN + VL = WA1( IL ) - MAX( HALF*( WA1( IL )-WA1( IL-1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + ELSE + VL = WA1( 1 ) - MAX( HALF*( WA1( N )-WA1( 1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = WA1( IU ) + MAX( HALF*( WA1( IU+1 )-WA1( IU ) ), + $ ULP*ANORM, TWO*RTUNFL ) + ELSE + VU = WA1( N ) + MAX( HALF*( WA1( N )-WA1( 1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + END IF + ELSE + VL = ZERO + VU = ONE + END IF +* + CALL DSTEBZ( 'V', 'E', N, VL, VU, IL, IU, ABSTOL, SD, SE, + $ M3, NSPLIT, WA3, IWORK( 1 ), IWORK( N+1 ), + $ WORK, IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEBZ(V)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 19 ) = ULPINV + GO TO 280 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.NE.0 ) THEN + RESULT( 19 ) = ULPINV + GO TO 280 + END IF +* +* Do test 19 +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( N ) ), ABS( WA1( 1 ) ) ) + ELSE + TEMP3 = ZERO + END IF +* + RESULT( 19 ) = ( TEMP1+TEMP2 ) / MAX( UNFL, TEMP3*ULP ) +* +* Call DSTEIN to compute eigenvectors corresponding to +* eigenvalues in WA1. (First call DSTEBZ again, to make sure +* it returns these eigenvalues in the correct order.) +* + NTEST = 21 + CALL DSTEBZ( 'A', 'B', N, VL, VU, IL, IU, ABSTOL, SD, SE, M, + $ NSPLIT, WA1, IWORK( 1 ), IWORK( N+1 ), WORK, + $ IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEBZ(A,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 20 ) = ULPINV + RESULT( 21 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL DSTEIN( N, SD, SE, M, WA1, IWORK( 1 ), IWORK( N+1 ), Z, + $ LDU, WORK, IWORK( 2*N+1 ), IWORK( 3*N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEIN', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 20 ) = ULPINV + RESULT( 21 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do tests 20 and 21 +* + CALL DSTT21( N, 0, SD, SE, WA1, DUMMA, Z, LDU, WORK, + $ RESULT( 20 ) ) +* +* Call DSTEDC(I) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + CALL DCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) + CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDU ) +* + NTEST = 22 + CALL DSTEDC( 'I', N, D1, WORK, Z, LDU, WORK( N+1 ), LWEDC-N, + $ IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEDC(I)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 22 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 22 and 23 +* + CALL DSTT21( N, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, + $ RESULT( 22 ) ) +* +* Call DSTEDC(V) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + CALL DCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) + CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDU ) +* + NTEST = 24 + CALL DSTEDC( 'V', N, D1, WORK, Z, LDU, WORK( N+1 ), LWEDC-N, + $ IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEDC(V)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 24 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 24 and 25 +* + CALL DSTT21( N, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, + $ RESULT( 24 ) ) +* +* Call DSTEDC(N) to compute D2, do tests. +* +* Compute D2 +* + CALL DCOPY( N, SD, 1, D2, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) + CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDU ) +* + NTEST = 26 + CALL DSTEDC( 'N', N, D2, WORK, Z, LDU, WORK( N+1 ), LWEDC-N, + $ IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEDC(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 26 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 26 +* + TEMP1 = ZERO + TEMP2 = ZERO +* + DO 210 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + 210 CONTINUE +* + RESULT( 26 ) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) +* +* Only test DSTEMR if IEEE compliant +* + IF( ILAENV( 10, 'DSTEMR', 'VA', 1, 0, 0, 0 ).EQ.1 .AND. + $ ILAENV( 11, 'DSTEMR', 'VA', 1, 0, 0, 0 ).EQ.1 ) THEN +* +* Call DSTEMR, do test 27 (relative eigenvalue accuracy) +* +* If S is positive definite and diagonally dominant, +* ask for all eigenvalues with high relative accuracy. +* + VL = ZERO + VU = ZERO + IL = 0 + IU = 0 + IF( JTYPE.EQ.21 .AND. SREL ) THEN + NTEST = 27 + ABSTOL = UNFL + UNFL + CALL DSTEMR( 'V', 'A', N, SD, SE, VL, VU, IL, IU, + $ M, WR, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ WORK, LWORK, IWORK( 2*N+1 ), LWORK-2*N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEMR(V,A,rel)', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 27 ) = ULPINV + GO TO 270 + END IF + END IF +* +* Do test 27 +* + TEMP2 = TWO*( TWO*N-ONE )*ULP*( ONE+EIGHT*HALF**2 ) / + $ ( ONE-HALF )**4 +* + TEMP1 = ZERO + DO 220 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D4( J )-WR( N-J+1 ) ) / + $ ( ABSTOL+ABS( D4( J ) ) ) ) + 220 CONTINUE +* + RESULT( 27 ) = TEMP1 / TEMP2 +* + IL = 1 + ( N-1 )*INT( DLARND( 1, ISEED2 ) ) + IU = 1 + ( N-1 )*INT( DLARND( 1, ISEED2 ) ) + IF( IU.LT.IL ) THEN + ITEMP = IU + IU = IL + IL = ITEMP + END IF +* + IF( SRANGE ) THEN + NTEST = 28 + ABSTOL = UNFL + UNFL + CALL DSTEMR( 'V', 'I', N, SD, SE, VL, VU, IL, IU, + $ M, WR, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ WORK, LWORK, IWORK( 2*N+1 ), + $ LWORK-2*N, IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEMR(V,I,rel)', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 28 ) = ULPINV + GO TO 270 + END IF + END IF +* +* +* Do test 28 +* + TEMP2 = TWO*( TWO*N-ONE )*ULP* + $ ( ONE+EIGHT*HALF**2 ) / ( ONE-HALF )**4 +* + TEMP1 = ZERO + DO 230 J = IL, IU + TEMP1 = MAX( TEMP1, ABS( WR( J-IL+1 )-D4( N-J+ + $ 1 ) ) / ( ABSTOL+ABS( WR( J-IL+1 ) ) ) ) + 230 CONTINUE +* + RESULT( 28 ) = TEMP1 / TEMP2 + ELSE + RESULT( 28 ) = ZERO + END IF + ELSE + RESULT( 27 ) = ZERO + RESULT( 28 ) = ZERO + END IF +* +* Call DSTEMR(V,I) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + CALL DCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) + CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDU ) +* + IF( SRANGE ) THEN + NTEST = 29 + IL = 1 + ( N-1 )*INT( DLARND( 1, ISEED2 ) ) + IU = 1 + ( N-1 )*INT( DLARND( 1, ISEED2 ) ) + IF( IU.LT.IL ) THEN + ITEMP = IU + IU = IL + IL = ITEMP + END IF + CALL DSTEMR( 'V', 'I', N, D5, WORK, VL, VU, IL, IU, + $ M, D1, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ WORK( N+1 ), LWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEMR(V,I)', IINFO, + $ N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 29 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 29 and 30 +* + CALL DSTT22( N, M, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, + $ M, RESULT( 29 ) ) +* +* Call DSTEMR to compute D2, do tests. +* +* Compute D2 +* + CALL DCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) +* + NTEST = 31 + CALL DSTEMR( 'N', 'I', N, D5, WORK, VL, VU, IL, IU, + $ M, D2, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ WORK( N+1 ), LWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEMR(N,I)', IINFO, + $ N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 31 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 31 +* + TEMP1 = ZERO + TEMP2 = ZERO +* + DO 240 J = 1, IU - IL + 1 + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), + $ ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + 240 CONTINUE +* + RESULT( 31 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* +* +* Call DSTEMR(V,V) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + CALL DCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) + CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDU ) +* + NTEST = 32 +* + IF( N.GT.0 ) THEN + IF( IL.NE.1 ) THEN + VL = D2( IL ) - MAX( HALF* + $ ( D2( IL )-D2( IL-1 ) ), ULP*ANORM, + $ TWO*RTUNFL ) + ELSE + VL = D2( 1 ) - MAX( HALF*( D2( N )-D2( 1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = D2( IU ) + MAX( HALF* + $ ( D2( IU+1 )-D2( IU ) ), ULP*ANORM, + $ TWO*RTUNFL ) + ELSE + VU = D2( N ) + MAX( HALF*( D2( N )-D2( 1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + END IF + ELSE + VL = ZERO + VU = ONE + END IF +* + CALL DSTEMR( 'V', 'V', N, D5, WORK, VL, VU, IL, IU, + $ M, D1, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ WORK( N+1 ), LWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEMR(V,V)', IINFO, + $ N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 32 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 32 and 33 +* + CALL DSTT22( N, M, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, + $ M, RESULT( 32 ) ) +* +* Call DSTEMR to compute D2, do tests. +* +* Compute D2 +* + CALL DCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) +* + NTEST = 34 + CALL DSTEMR( 'N', 'V', N, D5, WORK, VL, VU, IL, IU, + $ M, D2, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ WORK( N+1 ), LWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEMR(N,V)', IINFO, + $ N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 34 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 34 +* + TEMP1 = ZERO + TEMP2 = ZERO +* + DO 250 J = 1, IU - IL + 1 + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), + $ ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + 250 CONTINUE +* + RESULT( 34 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) + ELSE + RESULT( 29 ) = ZERO + RESULT( 30 ) = ZERO + RESULT( 31 ) = ZERO + RESULT( 32 ) = ZERO + RESULT( 33 ) = ZERO + RESULT( 34 ) = ZERO + END IF +* +* +* Call DSTEMR(V,A) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + CALL DCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) +* + NTEST = 35 +* + CALL DSTEMR( 'V', 'A', N, D5, WORK, VL, VU, IL, IU, + $ M, D1, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ WORK( N+1 ), LWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEMR(V,A)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 35 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 35 and 36 +* + CALL DSTT22( N, M, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, M, + $ RESULT( 35 ) ) +* +* Call DSTEMR to compute D2, do tests. +* +* Compute D2 +* + CALL DCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, WORK, 1 ) +* + NTEST = 37 + CALL DSTEMR( 'N', 'A', N, D5, WORK, VL, VU, IL, IU, + $ M, D2, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ WORK( N+1 ), LWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEMR(N,A)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 37 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 34 +* + TEMP1 = ZERO + TEMP2 = ZERO +* + DO 260 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + 260 CONTINUE +* + RESULT( 37 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) + END IF + 270 CONTINUE + 280 CONTINUE + NTESTT = NTESTT + NTEST +* +* End of Loop -- Check for RESULT(j) > THRESH +* +* +* Print out tests which fail. +* + DO 290 JR = 1, NTEST + IF( RESULT( JR ).GE.THRESH ) THEN +* +* If this is the first test to fail, +* print a header to the data file. +* + IF( NERRS.EQ.0 ) THEN + WRITE( NOUNIT, FMT = 9998 )'DST' + WRITE( NOUNIT, FMT = 9997 ) + WRITE( NOUNIT, FMT = 9996 ) + WRITE( NOUNIT, FMT = 9995 )'Symmetric' + WRITE( NOUNIT, FMT = 9994 ) +* +* Tests performed +* + WRITE( NOUNIT, FMT = 9988 ) + END IF + NERRS = NERRS + 1 + WRITE( NOUNIT, FMT = 9990 )N, IOLDSD, JTYPE, JR, + $ RESULT( JR ) + END IF + 290 CONTINUE + 300 CONTINUE + 310 CONTINUE +* +* Summary +* + CALL DLASUM( 'DST', NOUNIT, NERRS, NTESTT ) + RETURN +* + 9999 FORMAT( ' DCHKST2STG: ', A, ' returned INFO=', I6, '.', / 9X, + $ 'N=', I6, ', JTYPE=', I6, ', ISEED=(', 3( I5, ',' ), I5, ')' ) +* + 9998 FORMAT( / 1X, A3, ' -- Real Symmetric eigenvalue problem' ) + 9997 FORMAT( ' Matrix types (see DCHKST2STG for details): ' ) +* + 9996 FORMAT( / ' Special Matrices:', + $ / ' 1=Zero matrix. ', + $ ' 5=Diagonal: clustered entries.', + $ / ' 2=Identity matrix. ', + $ ' 6=Diagonal: large, evenly spaced.', + $ / ' 3=Diagonal: evenly spaced entries. ', + $ ' 7=Diagonal: small, evenly spaced.', + $ / ' 4=Diagonal: geometr. spaced entries.' ) + 9995 FORMAT( ' Dense ', A, ' Matrices:', + $ / ' 8=Evenly spaced eigenvals. ', + $ ' 12=Small, evenly spaced eigenvals.', + $ / ' 9=Geometrically spaced eigenvals. ', + $ ' 13=Matrix with random O(1) entries.', + $ / ' 10=Clustered eigenvalues. ', + $ ' 14=Matrix with large random entries.', + $ / ' 11=Large, evenly spaced eigenvals. ', + $ ' 15=Matrix with small random entries.' ) + 9994 FORMAT( ' 16=Positive definite, evenly spaced eigenvalues', + $ / ' 17=Positive definite, geometrically spaced eigenvlaues', + $ / ' 18=Positive definite, clustered eigenvalues', + $ / ' 19=Positive definite, small evenly spaced eigenvalues', + $ / ' 20=Positive definite, large evenly spaced eigenvalues', + $ / ' 21=Diagonally dominant tridiagonal, geometrically', + $ ' spaced eigenvalues' ) +* + 9990 FORMAT( ' N=', I5, ', seed=', 4( I4, ',' ), ' type ', I2, + $ ', test(', I2, ')=', G10.3 ) +* + 9988 FORMAT( / 'Test performed: see DCHKST2STG for details.', / ) +* End of DCHKST2STG +* + END diff --git a/TESTING/EIG/ddrvsg2stg.f b/TESTING/EIG/ddrvsg2stg.f new file mode 100644 index 00000000..b26b7777 --- /dev/null +++ b/TESTING/EIG/ddrvsg2stg.f @@ -0,0 +1,1364 @@ +*> \brief \b DDRVSG2STG +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, +* NOUNIT, A, LDA, B, LDB, D, D2, Z, LDZ, AB, +* BB, AP, BP, WORK, NWORK, IWORK, LIWORK, +* RESULT, INFO ) +* +* IMPLICIT NONE +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDZ, LIWORK, NOUNIT, NSIZES, +* $ NTYPES, NWORK +* DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER ISEED( 4 ), IWORK( * ), NN( * ) +* DOUBLE PRECISION A( LDA, * ), AB( LDA, * ), AP( * ), +* $ B( LDB, * ), BB( LDB, * ), BP( * ), D( * ), +* $ RESULT( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DDRVSG2STG checks the real symmetric generalized eigenproblem +*> drivers. +*> +*> DSYGV computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric-definite generalized +*> eigenproblem. +*> +*> DSYGVD computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric-definite generalized +*> eigenproblem using a divide and conquer algorithm. +*> +*> DSYGVX computes selected eigenvalues and, optionally, +*> eigenvectors of a real symmetric-definite generalized +*> eigenproblem. +*> +*> DSPGV computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric-definite generalized +*> eigenproblem in packed storage. +*> +*> DSPGVD computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric-definite generalized +*> eigenproblem in packed storage using a divide and +*> conquer algorithm. +*> +*> DSPGVX computes selected eigenvalues and, optionally, +*> eigenvectors of a real symmetric-definite generalized +*> eigenproblem in packed storage. +*> +*> DSBGV computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric-definite banded +*> generalized eigenproblem. +*> +*> DSBGVD computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric-definite banded +*> generalized eigenproblem using a divide and conquer +*> algorithm. +*> +*> DSBGVX computes selected eigenvalues and, optionally, +*> eigenvectors of a real symmetric-definite banded +*> generalized eigenproblem. +*> +*> When DDRVSG2STG is called, a number of matrix "sizes" ("n's") and a +*> number of matrix "types" are specified. For each size ("n") +*> and each type of matrix, one matrix A of the given type will be +*> generated; a random well-conditioned matrix B is also generated +*> and the pair (A,B) is used to test the drivers. +*> +*> For each pair (A,B), the following tests are performed: +*> +*> (1) DSYGV with ITYPE = 1 and UPLO ='U': +*> +*> | A Z - B Z D | / ( |A| |Z| n ulp ) +*> | D - D2 | / ( |D| ulp ) where D is computed by +*> DSYGV and D2 is computed by +*> DSYGV_2STAGE. This test is +*> only performed for DSYGV +*> +*> (2) as (1) but calling DSPGV +*> (3) as (1) but calling DSBGV +*> (4) as (1) but with UPLO = 'L' +*> (5) as (4) but calling DSPGV +*> (6) as (4) but calling DSBGV +*> +*> (7) DSYGV with ITYPE = 2 and UPLO ='U': +*> +*> | A B Z - Z D | / ( |A| |Z| n ulp ) +*> +*> (8) as (7) but calling DSPGV +*> (9) as (7) but with UPLO = 'L' +*> (10) as (9) but calling DSPGV +*> +*> (11) DSYGV with ITYPE = 3 and UPLO ='U': +*> +*> | B A Z - Z D | / ( |A| |Z| n ulp ) +*> +*> (12) as (11) but calling DSPGV +*> (13) as (11) but with UPLO = 'L' +*> (14) as (13) but calling DSPGV +*> +*> DSYGVD, DSPGVD and DSBGVD performed the same 14 tests. +*> +*> DSYGVX, DSPGVX and DSBGVX performed the above 14 tests with +*> the parameter RANGE = 'A', 'N' and 'I', respectively. +*> +*> The "sizes" are specified by an array NN(1:NSIZES); the value +*> of each element NN(j) specifies one size. +*> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); +*> if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. +*> This type is used for the matrix A which has half-bandwidth KA. +*> B is generated as a well-conditioned positive definite matrix +*> with half-bandwidth KB (<= KA). +*> Currently, the list of possible types for A is: +*> +*> (1) The zero matrix. +*> (2) The identity matrix. +*> +*> (3) A diagonal matrix with evenly spaced entries +*> 1, ..., ULP and random signs. +*> (ULP = (first number larger than 1) - 1 ) +*> (4) A diagonal matrix with geometrically spaced entries +*> 1, ..., ULP and random signs. +*> (5) A diagonal matrix with "clustered" entries +*> 1, ULP, ..., ULP and random signs. +*> +*> (6) Same as (4), but multiplied by SQRT( overflow threshold ) +*> (7) Same as (4), but multiplied by SQRT( underflow threshold ) +*> +*> (8) A matrix of the form U* D U, where U is orthogonal and +*> D has evenly spaced entries 1, ..., ULP with random signs +*> on the diagonal. +*> +*> (9) A matrix of the form U* D U, where U is orthogonal and +*> D has geometrically spaced entries 1, ..., ULP with random +*> signs on the diagonal. +*> +*> (10) A matrix of the form U* D U, where U is orthogonal and +*> D has "clustered" entries 1, ULP,..., ULP with random +*> signs on the diagonal. +*> +*> (11) Same as (8), but multiplied by SQRT( overflow threshold ) +*> (12) Same as (8), but multiplied by SQRT( underflow threshold ) +*> +*> (13) symmetric matrix with random entries chosen from (-1,1). +*> (14) Same as (13), but multiplied by SQRT( overflow threshold ) +*> (15) Same as (13), but multiplied by SQRT( underflow threshold) +*> +*> (16) Same as (8), but with KA = 1 and KB = 1 +*> (17) Same as (8), but with KA = 2 and KB = 1 +*> (18) Same as (8), but with KA = 2 and KB = 2 +*> (19) Same as (8), but with KA = 3 and KB = 1 +*> (20) Same as (8), but with KA = 3 and KB = 2 +*> (21) Same as (8), but with KA = 3 and KB = 3 +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> NSIZES INTEGER +*> The number of sizes of matrices to use. If it is zero, +*> DDRVSG2STG does nothing. It must be at least zero. +*> Not modified. +*> +*> NN INTEGER array, dimension (NSIZES) +*> An array containing the sizes to be used for the matrices. +*> Zero values will be skipped. The values must be at least +*> zero. +*> Not modified. +*> +*> NTYPES INTEGER +*> The number of elements in DOTYPE. If it is zero, DDRVSG2STG +*> does nothing. It must be at least zero. If it is MAXTYP+1 +*> and NSIZES is 1, then an additional type, MAXTYP+1 is +*> defined, which is to use whatever matrix is in A. This +*> is only useful if DOTYPE(1:MAXTYP) is .FALSE. and +*> DOTYPE(MAXTYP+1) is .TRUE. . +*> Not modified. +*> +*> DOTYPE LOGICAL array, dimension (NTYPES) +*> If DOTYPE(j) is .TRUE., then for each size in NN a +*> matrix of that size and of type j will be generated. +*> If NTYPES is smaller than the maximum number of types +*> defined (PARAMETER MAXTYP), then types NTYPES+1 through +*> MAXTYP will not be generated. If NTYPES is larger +*> than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) +*> will be ignored. +*> Not modified. +*> +*> ISEED INTEGER array, dimension (4) +*> On entry ISEED specifies the seed of the random number +*> generator. The array elements should be between 0 and 4095; +*> if not they will be reduced mod 4096. Also, ISEED(4) must +*> be odd. The random number generator uses a linear +*> congruential sequence limited to small integers, and so +*> should produce machine independent random numbers. The +*> values of ISEED are changed on exit, and can be used in the +*> next call to DDRVSG2STG to continue the same random number +*> sequence. +*> Modified. +*> +*> THRESH DOUBLE PRECISION +*> A test will count as "failed" if the "error", computed as +*> described above, exceeds THRESH. Note that the error +*> is scaled to be O(1), so THRESH should be a reasonably +*> small multiple of 1, e.g., 10 or 100. In particular, +*> it should not depend on the precision (single vs. double) +*> or the size of the matrix. It must be at least zero. +*> Not modified. +*> +*> NOUNIT INTEGER +*> The FORTRAN unit number for printing out error messages +*> (e.g., if a routine returns IINFO not equal to 0.) +*> Not modified. +*> +*> A DOUBLE PRECISION array, dimension (LDA , max(NN)) +*> Used to hold the matrix whose eigenvalues are to be +*> computed. On exit, A contains the last matrix actually +*> used. +*> Modified. +*> +*> LDA INTEGER +*> The leading dimension of A and AB. It must be at +*> least 1 and at least max( NN ). +*> Not modified. +*> +*> B DOUBLE PRECISION array, dimension (LDB , max(NN)) +*> Used to hold the symmetric positive definite matrix for +*> the generailzed problem. +*> On exit, B contains the last matrix actually +*> used. +*> Modified. +*> +*> LDB INTEGER +*> The leading dimension of B and BB. It must be at +*> least 1 and at least max( NN ). +*> Not modified. +*> +*> D DOUBLE PRECISION array, dimension (max(NN)) +*> The eigenvalues of A. On exit, the eigenvalues in D +*> correspond with the matrix in A. +*> Modified. +*> +*> Z DOUBLE PRECISION array, dimension (LDZ, max(NN)) +*> The matrix of eigenvectors. +*> Modified. +*> +*> LDZ INTEGER +*> The leading dimension of Z. It must be at least 1 and +*> at least max( NN ). +*> Not modified. +*> +*> AB DOUBLE PRECISION array, dimension (LDA, max(NN)) +*> Workspace. +*> Modified. +*> +*> BB DOUBLE PRECISION array, dimension (LDB, max(NN)) +*> Workspace. +*> Modified. +*> +*> AP DOUBLE PRECISION array, dimension (max(NN)**2) +*> Workspace. +*> Modified. +*> +*> BP DOUBLE PRECISION array, dimension (max(NN)**2) +*> Workspace. +*> Modified. +*> +*> WORK DOUBLE PRECISION array, dimension (NWORK) +*> Workspace. +*> Modified. +*> +*> NWORK INTEGER +*> The number of entries in WORK. This must be at least +*> 1+5*N+2*N*lg(N)+3*N**2 where N = max( NN(j) ) and +*> lg( N ) = smallest integer k such that 2**k >= N. +*> Not modified. +*> +*> IWORK INTEGER array, dimension (LIWORK) +*> Workspace. +*> Modified. +*> +*> LIWORK INTEGER +*> The number of entries in WORK. This must be at least 6*N. +*> Not modified. +*> +*> RESULT DOUBLE PRECISION array, dimension (70) +*> The values computed by the 70 tests described above. +*> Modified. +*> +*> INFO INTEGER +*> If 0, then everything ran OK. +*> -1: NSIZES < 0 +*> -2: Some NN(j) < 0 +*> -3: NTYPES < 0 +*> -5: THRESH < 0 +*> -9: LDA < 1 or LDA < NMAX, where NMAX is max( NN(j) ). +*> -16: LDZ < 1 or LDZ < NMAX. +*> -21: NWORK too small. +*> -23: LIWORK too small. +*> If DLATMR, SLATMS, DSYGV, DSPGV, DSBGV, SSYGVD, SSPGVD, +*> DSBGVD, DSYGVX, DSPGVX or SSBGVX returns an error code, +*> the absolute value of it is returned. +*> Modified. +*> +*> ---------------------------------------------------------------------- +*> +*> Some Local Variables and Parameters: +*> ---- ----- --------- --- ---------- +*> ZERO, ONE Real 0 and 1. +*> MAXTYP The number of types defined. +*> NTEST The number of tests that have been run +*> on this matrix. +*> NTESTT The total number of tests for this call. +*> NMAX Largest value in NN. +*> NMATS The number of matrices generated so far. +*> NERRS The number of tests which have exceeded THRESH +*> so far (computed by DLAFTS). +*> COND, IMODE Values to be passed to the matrix generators. +*> ANORM Norm of A; passed to matrix generators. +*> +*> OVFL, UNFL Overflow and underflow thresholds. +*> ULP, ULPINV Finest relative precision and its inverse. +*> RTOVFL, RTUNFL Square roots of the previous 2 values. +*> The following four arrays decode JTYPE: +*> KTYPE(j) The general type (1-10) for type "j". +*> KMODE(j) The MODE value to be passed to the matrix +*> generator for type "j". +*> KMAGN(j) The order of magnitude ( O(1), +*> O(overflow^(1/2) ), O(underflow^(1/2) ) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup double_eig +* +* ===================================================================== + SUBROUTINE DDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, + $ NOUNIT, A, LDA, B, LDB, D, D2, Z, LDZ, AB, + $ BB, AP, BP, WORK, NWORK, IWORK, LIWORK, + $ RESULT, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDZ, LIWORK, NOUNIT, NSIZES, + $ NTYPES, NWORK + DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER ISEED( 4 ), IWORK( * ), NN( * ) + DOUBLE PRECISION A( LDA, * ), AB( LDA, * ), AP( * ), + $ B( LDB, * ), BB( LDB, * ), BP( * ), D( * ), + $ D2( * ), RESULT( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TEN + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TEN = 10.0D0 ) + INTEGER MAXTYP + PARAMETER ( MAXTYP = 21 ) +* .. +* .. Local Scalars .. + LOGICAL BADNN + CHARACTER UPLO + INTEGER I, IBTYPE, IBUPLO, IINFO, IJ, IL, IMODE, ITEMP, + $ ITYPE, IU, J, JCOL, JSIZE, JTYPE, KA, KA9, KB, + $ KB9, M, MTYPES, N, NERRS, NMATS, NMAX, NTEST, + $ NTESTT + DOUBLE PRECISION ABSTOL, ANINV, ANORM, COND, OVFL, RTOVFL, + $ RTUNFL, ULP, ULPINV, UNFL, VL, VU, TEMP1, TEMP2 +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ), IOLDSD( 4 ), ISEED2( 4 ), + $ KMAGN( MAXTYP ), KMODE( MAXTYP ), + $ KTYPE( MAXTYP ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLARND + EXTERNAL LSAME, DLAMCH, DLARND +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, DLACPY, DLAFTS, DLASET, DLASUM, DLATMR, + $ DLATMS, DSBGV, DSBGVD, DSBGVX, DSGT01, DSPGV, + $ DSPGVD, DSPGVX, DSYGV, DSYGVD, DSYGVX, XERBLA, + $ DSYGV_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN, SQRT +* .. +* .. Data statements .. + DATA KTYPE / 1, 2, 5*4, 5*5, 3*8, 6*9 / + DATA KMAGN / 2*1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, + $ 2, 3, 6*1 / + DATA KMODE / 2*0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, + $ 0, 0, 6*4 / +* .. +* .. Executable Statements .. +* +* 1) Check for errors +* + NTESTT = 0 + INFO = 0 +* + BADNN = .FALSE. + NMAX = 0 + DO 10 J = 1, NSIZES + NMAX = MAX( NMAX, NN( J ) ) + IF( NN( J ).LT.0 ) + $ BADNN = .TRUE. + 10 CONTINUE +* +* Check for errors +* + IF( NSIZES.LT.0 ) THEN + INFO = -1 + ELSE IF( BADNN ) THEN + INFO = -2 + ELSE IF( NTYPES.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LE.1 .OR. LDA.LT.NMAX ) THEN + INFO = -9 + ELSE IF( LDZ.LE.1 .OR. LDZ.LT.NMAX ) THEN + INFO = -16 + ELSE IF( 2*MAX( NMAX, 3 )**2.GT.NWORK ) THEN + INFO = -21 + ELSE IF( 2*MAX( NMAX, 3 )**2.GT.LIWORK ) THEN + INFO = -23 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DDRVSG2STG', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( NSIZES.EQ.0 .OR. NTYPES.EQ.0 ) + $ RETURN +* +* More Important constants +* + UNFL = DLAMCH( 'Safe minimum' ) + OVFL = DLAMCH( 'Overflow' ) + CALL DLABAD( UNFL, OVFL ) + ULP = DLAMCH( 'Epsilon' )*DLAMCH( 'Base' ) + ULPINV = ONE / ULP + RTUNFL = SQRT( UNFL ) + RTOVFL = SQRT( OVFL ) +* + DO 20 I = 1, 4 + ISEED2( I ) = ISEED( I ) + 20 CONTINUE +* +* Loop over sizes, types +* + NERRS = 0 + NMATS = 0 +* + DO 650 JSIZE = 1, NSIZES + N = NN( JSIZE ) + ANINV = ONE / DBLE( MAX( 1, N ) ) +* + IF( NSIZES.NE.1 ) THEN + MTYPES = MIN( MAXTYP, NTYPES ) + ELSE + MTYPES = MIN( MAXTYP+1, NTYPES ) + END IF +* + KA9 = 0 + KB9 = 0 + DO 640 JTYPE = 1, MTYPES + IF( .NOT.DOTYPE( JTYPE ) ) + $ GO TO 640 + NMATS = NMATS + 1 + NTEST = 0 +* + DO 30 J = 1, 4 + IOLDSD( J ) = ISEED( J ) + 30 CONTINUE +* +* 2) Compute "A" +* +* Control parameters: +* +* KMAGN KMODE KTYPE +* =1 O(1) clustered 1 zero +* =2 large clustered 2 identity +* =3 small exponential (none) +* =4 arithmetic diagonal, w/ eigenvalues +* =5 random log hermitian, w/ eigenvalues +* =6 random (none) +* =7 random diagonal +* =8 random hermitian +* =9 banded, w/ eigenvalues +* + IF( MTYPES.GT.MAXTYP ) + $ GO TO 90 +* + ITYPE = KTYPE( JTYPE ) + IMODE = KMODE( JTYPE ) +* +* Compute norm +* + GO TO ( 40, 50, 60 )KMAGN( JTYPE ) +* + 40 CONTINUE + ANORM = ONE + GO TO 70 +* + 50 CONTINUE + ANORM = ( RTOVFL*ULP )*ANINV + GO TO 70 +* + 60 CONTINUE + ANORM = RTUNFL*N*ULPINV + GO TO 70 +* + 70 CONTINUE +* + IINFO = 0 + COND = ULPINV +* +* Special Matrices -- Identity & Jordan block +* + IF( ITYPE.EQ.1 ) THEN +* +* Zero +* + KA = 0 + KB = 0 + CALL DLASET( 'Full', LDA, N, ZERO, ZERO, A, LDA ) +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Identity +* + KA = 0 + KB = 0 + CALL DLASET( 'Full', LDA, N, ZERO, ZERO, A, LDA ) + DO 80 JCOL = 1, N + A( JCOL, JCOL ) = ANORM + 80 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Diagonal Matrix, [Eigen]values Specified +* + KA = 0 + KB = 0 + CALL DLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, 0, 0, 'N', A, LDA, WORK( N+1 ), + $ IINFO ) +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* symmetric, eigenvalues specified +* + KA = MAX( 0, N-1 ) + KB = KA + CALL DLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, N, N, 'N', A, LDA, WORK( N+1 ), + $ IINFO ) +* + ELSE IF( ITYPE.EQ.7 ) THEN +* +* Diagonal, random eigenvalues +* + KA = 0 + KB = 0 + CALL DLATMR( N, N, 'S', ISEED, 'S', WORK, 6, ONE, ONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, 0, 0, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.8 ) THEN +* +* symmetric, random eigenvalues +* + KA = MAX( 0, N-1 ) + KB = KA + CALL DLATMR( N, N, 'S', ISEED, 'H', WORK, 6, ONE, ONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, N, N, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.9 ) THEN +* +* symmetric banded, eigenvalues specified +* +* The following values are used for the half-bandwidths: +* +* ka = 1 kb = 1 +* ka = 2 kb = 1 +* ka = 2 kb = 2 +* ka = 3 kb = 1 +* ka = 3 kb = 2 +* ka = 3 kb = 3 +* + KB9 = KB9 + 1 + IF( KB9.GT.KA9 ) THEN + KA9 = KA9 + 1 + KB9 = 1 + END IF + KA = MAX( 0, MIN( N-1, KA9 ) ) + KB = MAX( 0, MIN( N-1, KB9 ) ) + CALL DLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, KA, KA, 'N', A, LDA, WORK( N+1 ), + $ IINFO ) +* + ELSE +* + IINFO = 1 + END IF +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'Generator', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + RETURN + END IF +* + 90 CONTINUE +* + ABSTOL = UNFL + UNFL + IF( N.LE.1 ) THEN + IL = 1 + IU = N + ELSE + IL = 1 + INT( ( N-1 )*DLARND( 1, ISEED2 ) ) + IU = 1 + INT( ( N-1 )*DLARND( 1, ISEED2 ) ) + IF( IL.GT.IU ) THEN + ITEMP = IL + IL = IU + IU = ITEMP + END IF + END IF +* +* 3) Call DSYGV, DSPGV, DSBGV, SSYGVD, SSPGVD, SSBGVD, +* DSYGVX, DSPGVX, and DSBGVX, do tests. +* +* loop over the three generalized problems +* IBTYPE = 1: A*x = (lambda)*B*x +* IBTYPE = 2: A*B*x = (lambda)*x +* IBTYPE = 3: B*A*x = (lambda)*x +* + DO 630 IBTYPE = 1, 3 +* +* loop over the setting UPLO +* + DO 620 IBUPLO = 1, 2 + IF( IBUPLO.EQ.1 ) + $ UPLO = 'U' + IF( IBUPLO.EQ.2 ) + $ UPLO = 'L' +* +* Generate random well-conditioned positive definite +* matrix B, of bandwidth not greater than that of A. +* + CALL DLATMS( N, N, 'U', ISEED, 'P', WORK, 5, TEN, ONE, + $ KB, KB, UPLO, B, LDB, WORK( N+1 ), + $ IINFO ) +* +* Test DSYGV +* + NTEST = NTEST + 1 +* + CALL DLACPY( ' ', N, N, A, LDA, Z, LDZ ) + CALL DLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL DSYGV( IBTYPE, 'V', UPLO, N, Z, LDZ, BB, LDB, D, + $ WORK, NWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSYGV(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL DSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* +* Test DSYGV_2STAGE +* + NTEST = NTEST + 1 +* + CALL DLACPY( ' ', N, N, A, LDA, Z, LDZ ) + CALL DLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL DSYGV_2STAGE( IBTYPE, 'N', UPLO, N, Z, LDZ, + $ BB, LDB, D2, WORK, NWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'DSYGV_2STAGE(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* +C CALL DSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, +C $ LDZ, D, WORK, RESULT( NTEST ) ) +* +* Do Tests | D1 - D2 | / ( |D1| ulp ) +* D1 computed using the standard 1-stage reduction as reference +* D2 computed using the 2-stage reduction +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 151 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D( J ) ), + $ ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D( J )-D2( J ) ) ) + 151 CONTINUE +* + RESULT( NTEST ) = TEMP2 / + $ MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) +* +* Test DSYGVD +* + NTEST = NTEST + 1 +* + CALL DLACPY( ' ', N, N, A, LDA, Z, LDZ ) + CALL DLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL DSYGVD( IBTYPE, 'V', UPLO, N, Z, LDZ, BB, LDB, D, + $ WORK, NWORK, IWORK, LIWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSYGVD(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL DSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* +* Test DSYGVX +* + NTEST = NTEST + 1 +* + CALL DLACPY( ' ', N, N, A, LDA, AB, LDA ) + CALL DLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL DSYGVX( IBTYPE, 'V', 'A', UPLO, N, AB, LDA, BB, + $ LDB, VL, VU, IL, IU, ABSTOL, M, D, Z, + $ LDZ, WORK, NWORK, IWORK( N+1 ), IWORK, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSYGVX(V,A' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL DSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* + CALL DLACPY( ' ', N, N, A, LDA, AB, LDA ) + CALL DLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* +* since we do not know the exact eigenvalues of this +* eigenpair, we just set VL and VU as constants. +* It is quite possible that there are no eigenvalues +* in this interval. +* + VL = ZERO + VU = ANORM + CALL DSYGVX( IBTYPE, 'V', 'V', UPLO, N, AB, LDA, BB, + $ LDB, VL, VU, IL, IU, ABSTOL, M, D, Z, + $ LDZ, WORK, NWORK, IWORK( N+1 ), IWORK, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSYGVX(V,V,' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL DSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* + CALL DLACPY( ' ', N, N, A, LDA, AB, LDA ) + CALL DLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL DSYGVX( IBTYPE, 'V', 'I', UPLO, N, AB, LDA, BB, + $ LDB, VL, VU, IL, IU, ABSTOL, M, D, Z, + $ LDZ, WORK, NWORK, IWORK( N+1 ), IWORK, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSYGVX(V,I,' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL DSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* + 100 CONTINUE +* +* Test DSPGV +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 120 J = 1, N + DO 110 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 110 CONTINUE + 120 CONTINUE + ELSE + IJ = 1 + DO 140 J = 1, N + DO 130 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 130 CONTINUE + 140 CONTINUE + END IF +* + CALL DSPGV( IBTYPE, 'V', UPLO, N, AP, BP, D, Z, LDZ, + $ WORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSPGV(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL DSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* +* Test DSPGVD +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 160 J = 1, N + DO 150 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 150 CONTINUE + 160 CONTINUE + ELSE + IJ = 1 + DO 180 J = 1, N + DO 170 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 170 CONTINUE + 180 CONTINUE + END IF +* + CALL DSPGVD( IBTYPE, 'V', UPLO, N, AP, BP, D, Z, LDZ, + $ WORK, NWORK, IWORK, LIWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSPGVD(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL DSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* +* Test DSPGVX +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 200 J = 1, N + DO 190 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 190 CONTINUE + 200 CONTINUE + ELSE + IJ = 1 + DO 220 J = 1, N + DO 210 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 210 CONTINUE + 220 CONTINUE + END IF +* + CALL DSPGVX( IBTYPE, 'V', 'A', UPLO, N, AP, BP, VL, + $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, + $ IWORK( N+1 ), IWORK, INFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSPGVX(V,A' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL DSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 240 J = 1, N + DO 230 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 230 CONTINUE + 240 CONTINUE + ELSE + IJ = 1 + DO 260 J = 1, N + DO 250 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 250 CONTINUE + 260 CONTINUE + END IF +* + VL = ZERO + VU = ANORM + CALL DSPGVX( IBTYPE, 'V', 'V', UPLO, N, AP, BP, VL, + $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, + $ IWORK( N+1 ), IWORK, INFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSPGVX(V,V' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL DSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 280 J = 1, N + DO 270 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 270 CONTINUE + 280 CONTINUE + ELSE + IJ = 1 + DO 300 J = 1, N + DO 290 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 290 CONTINUE + 300 CONTINUE + END IF +* + CALL DSPGVX( IBTYPE, 'V', 'I', UPLO, N, AP, BP, VL, + $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, + $ IWORK( N+1 ), IWORK, INFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSPGVX(V,I' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL DSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* + 310 CONTINUE +* + IF( IBTYPE.EQ.1 ) THEN +* +* TEST DSBGV +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 340 J = 1, N + DO 320 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 320 CONTINUE + DO 330 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 330 CONTINUE + 340 CONTINUE + ELSE + DO 370 J = 1, N + DO 350 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 350 CONTINUE + DO 360 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 360 CONTINUE + 370 CONTINUE + END IF +* + CALL DSBGV( 'V', UPLO, N, KA, KB, AB, LDA, BB, LDB, + $ D, Z, LDZ, WORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSBGV(V,' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL DSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* +* TEST DSBGVD +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 400 J = 1, N + DO 380 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 380 CONTINUE + DO 390 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 390 CONTINUE + 400 CONTINUE + ELSE + DO 430 J = 1, N + DO 410 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 410 CONTINUE + DO 420 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 420 CONTINUE + 430 CONTINUE + END IF +* + CALL DSBGVD( 'V', UPLO, N, KA, KB, AB, LDA, BB, + $ LDB, D, Z, LDZ, WORK, NWORK, IWORK, + $ LIWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSBGVD(V,' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL DSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* +* Test DSBGVX +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 460 J = 1, N + DO 440 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 440 CONTINUE + DO 450 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 450 CONTINUE + 460 CONTINUE + ELSE + DO 490 J = 1, N + DO 470 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 470 CONTINUE + DO 480 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 480 CONTINUE + 490 CONTINUE + END IF +* + CALL DSBGVX( 'V', 'A', UPLO, N, KA, KB, AB, LDA, + $ BB, LDB, BP, MAX( 1, N ), VL, VU, IL, + $ IU, ABSTOL, M, D, Z, LDZ, WORK, + $ IWORK( N+1 ), IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSBGVX(V,A' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL DSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 520 J = 1, N + DO 500 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 500 CONTINUE + DO 510 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 510 CONTINUE + 520 CONTINUE + ELSE + DO 550 J = 1, N + DO 530 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 530 CONTINUE + DO 540 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 540 CONTINUE + 550 CONTINUE + END IF +* + VL = ZERO + VU = ANORM + CALL DSBGVX( 'V', 'V', UPLO, N, KA, KB, AB, LDA, + $ BB, LDB, BP, MAX( 1, N ), VL, VU, IL, + $ IU, ABSTOL, M, D, Z, LDZ, WORK, + $ IWORK( N+1 ), IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSBGVX(V,V' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL DSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 580 J = 1, N + DO 560 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 560 CONTINUE + DO 570 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 570 CONTINUE + 580 CONTINUE + ELSE + DO 610 J = 1, N + DO 590 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 590 CONTINUE + DO 600 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 600 CONTINUE + 610 CONTINUE + END IF +* + CALL DSBGVX( 'V', 'I', UPLO, N, KA, KB, AB, LDA, + $ BB, LDB, BP, MAX( 1, N ), VL, VU, IL, + $ IU, ABSTOL, M, D, Z, LDZ, WORK, + $ IWORK( N+1 ), IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSBGVX(V,I' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL DSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* + END IF +* + 620 CONTINUE + 630 CONTINUE +* +* End of Loop -- Check for RESULT(j) > THRESH +* + NTESTT = NTESTT + NTEST + CALL DLAFTS( 'DSG', N, N, JTYPE, NTEST, RESULT, IOLDSD, + $ THRESH, NOUNIT, NERRS ) + 640 CONTINUE + 650 CONTINUE +* +* Summary +* + CALL DLASUM( 'DSG', NOUNIT, NERRS, NTESTT ) +* + RETURN +* +* End of DDRVSG2STG +* + 9999 FORMAT( ' DDRVSG2STG: ', A, ' returned INFO=', I6, '.', / 9X, + $ 'N=', I6, ', JTYPE=', I6, ', ISEED=(', 3( I5, ',' ), I5, ')' ) + END diff --git a/TESTING/EIG/ddrvst2stg.f b/TESTING/EIG/ddrvst2stg.f new file mode 100644 index 00000000..75385fda --- /dev/null +++ b/TESTING/EIG/ddrvst2stg.f @@ -0,0 +1,2874 @@ +*> \brief \b DDRVST2STG +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DDRVST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, +* NOUNIT, A, LDA, D1, D2, D3, D4, EVEIGS, WA1, +* WA2, WA3, U, LDU, V, TAU, Z, WORK, LWORK, +* IWORK, LIWORK, RESULT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDU, LIWORK, LWORK, NOUNIT, NSIZES, +* $ NTYPES +* DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER ISEED( 4 ), IWORK( * ), NN( * ) +* DOUBLE PRECISION A( LDA, * ), D1( * ), D2( * ), D3( * ), +* $ D4( * ), EVEIGS( * ), RESULT( * ), TAU( * ), +* $ U( LDU, * ), V( LDU, * ), WA1( * ), WA2( * ), +* $ WA3( * ), WORK( * ), Z( LDU, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DDRVST2STG checks the symmetric eigenvalue problem drivers. +*> +*> DSTEV computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric tridiagonal matrix. +*> +*> DSTEVX computes selected eigenvalues and, optionally, +*> eigenvectors of a real symmetric tridiagonal matrix. +*> +*> DSTEVR computes selected eigenvalues and, optionally, +*> eigenvectors of a real symmetric tridiagonal matrix +*> using the Relatively Robust Representation where it can. +*> +*> DSYEV computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric matrix. +*> +*> DSYEVX computes selected eigenvalues and, optionally, +*> eigenvectors of a real symmetric matrix. +*> +*> DSYEVR computes selected eigenvalues and, optionally, +*> eigenvectors of a real symmetric matrix +*> using the Relatively Robust Representation where it can. +*> +*> DSPEV computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric matrix in packed +*> storage. +*> +*> DSPEVX computes selected eigenvalues and, optionally, +*> eigenvectors of a real symmetric matrix in packed +*> storage. +*> +*> DSBEV computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric band matrix. +*> +*> DSBEVX computes selected eigenvalues and, optionally, +*> eigenvectors of a real symmetric band matrix. +*> +*> DSYEVD computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric matrix using +*> a divide and conquer algorithm. +*> +*> DSPEVD computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric matrix in packed +*> storage, using a divide and conquer algorithm. +*> +*> DSBEVD computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric band matrix, +*> using a divide and conquer algorithm. +*> +*> When DDRVST2STG is called, a number of matrix "sizes" ("n's") and a +*> number of matrix "types" are specified. For each size ("n") +*> and each type of matrix, one matrix will be generated and used +*> to test the appropriate drivers. For each matrix and each +*> driver routine called, the following tests will be performed: +*> +*> (1) | A - Z D Z' | / ( |A| n ulp ) +*> +*> (2) | I - Z Z' | / ( n ulp ) +*> +*> (3) | D1 - D2 | / ( |D1| ulp ) +*> +*> where Z is the matrix of eigenvectors returned when the +*> eigenvector option is given and D1 and D2 are the eigenvalues +*> returned with and without the eigenvector option. +*> +*> The "sizes" are specified by an array NN(1:NSIZES); the value of +*> each element NN(j) specifies one size. +*> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); +*> if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. +*> Currently, the list of possible types is: +*> +*> (1) The zero matrix. +*> (2) The identity matrix. +*> +*> (3) A diagonal matrix with evenly spaced eigenvalues +*> 1, ..., ULP and random signs. +*> (ULP = (first number larger than 1) - 1 ) +*> (4) A diagonal matrix with geometrically spaced eigenvalues +*> 1, ..., ULP and random signs. +*> (5) A diagonal matrix with "clustered" eigenvalues +*> 1, ULP, ..., ULP and random signs. +*> +*> (6) Same as (4), but multiplied by SQRT( overflow threshold ) +*> (7) Same as (4), but multiplied by SQRT( underflow threshold ) +*> +*> (8) A matrix of the form U' D U, where U is orthogonal and +*> D has evenly spaced entries 1, ..., ULP with random signs +*> on the diagonal. +*> +*> (9) A matrix of the form U' D U, where U is orthogonal and +*> D has geometrically spaced entries 1, ..., ULP with random +*> signs on the diagonal. +*> +*> (10) A matrix of the form U' D U, where U is orthogonal and +*> D has "clustered" entries 1, ULP,..., ULP with random +*> signs on the diagonal. +*> +*> (11) Same as (8), but multiplied by SQRT( overflow threshold ) +*> (12) Same as (8), but multiplied by SQRT( underflow threshold ) +*> +*> (13) Symmetric matrix with random entries chosen from (-1,1). +*> (14) Same as (13), but multiplied by SQRT( overflow threshold ) +*> (15) Same as (13), but multiplied by SQRT( underflow threshold ) +*> (16) A band matrix with half bandwidth randomly chosen between +*> 0 and N-1, with evenly spaced eigenvalues 1, ..., ULP +*> with random signs. +*> (17) Same as (16), but multiplied by SQRT( overflow threshold ) +*> (18) Same as (16), but multiplied by SQRT( underflow threshold ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> NSIZES INTEGER +*> The number of sizes of matrices to use. If it is zero, +*> DDRVST2STG does nothing. It must be at least zero. +*> Not modified. +*> +*> NN INTEGER array, dimension (NSIZES) +*> An array containing the sizes to be used for the matrices. +*> Zero values will be skipped. The values must be at least +*> zero. +*> Not modified. +*> +*> NTYPES INTEGER +*> The number of elements in DOTYPE. If it is zero, DDRVST2STG +*> does nothing. It must be at least zero. If it is MAXTYP+1 +*> and NSIZES is 1, then an additional type, MAXTYP+1 is +*> defined, which is to use whatever matrix is in A. This +*> is only useful if DOTYPE(1:MAXTYP) is .FALSE. and +*> DOTYPE(MAXTYP+1) is .TRUE. . +*> Not modified. +*> +*> DOTYPE LOGICAL array, dimension (NTYPES) +*> If DOTYPE(j) is .TRUE., then for each size in NN a +*> matrix of that size and of type j will be generated. +*> If NTYPES is smaller than the maximum number of types +*> defined (PARAMETER MAXTYP), then types NTYPES+1 through +*> MAXTYP will not be generated. If NTYPES is larger +*> than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) +*> will be ignored. +*> Not modified. +*> +*> ISEED INTEGER array, dimension (4) +*> On entry ISEED specifies the seed of the random number +*> generator. The array elements should be between 0 and 4095; +*> if not they will be reduced mod 4096. Also, ISEED(4) must +*> be odd. The random number generator uses a linear +*> congruential sequence limited to small integers, and so +*> should produce machine independent random numbers. The +*> values of ISEED are changed on exit, and can be used in the +*> next call to DDRVST2STG to continue the same random number +*> sequence. +*> Modified. +*> +*> THRESH DOUBLE PRECISION +*> A test will count as "failed" if the "error", computed as +*> described above, exceeds THRESH. Note that the error +*> is scaled to be O(1), so THRESH should be a reasonably +*> small multiple of 1, e.g., 10 or 100. In particular, +*> it should not depend on the precision (single vs. double) +*> or the size of the matrix. It must be at least zero. +*> Not modified. +*> +*> NOUNIT INTEGER +*> The FORTRAN unit number for printing out error messages +*> (e.g., if a routine returns IINFO not equal to 0.) +*> Not modified. +*> +*> A DOUBLE PRECISION array, dimension (LDA , max(NN)) +*> Used to hold the matrix whose eigenvalues are to be +*> computed. On exit, A contains the last matrix actually +*> used. +*> Modified. +*> +*> LDA INTEGER +*> The leading dimension of A. It must be at +*> least 1 and at least max( NN ). +*> Not modified. +*> +*> D1 DOUBLE PRECISION array, dimension (max(NN)) +*> The eigenvalues of A, as computed by DSTEQR simlutaneously +*> with Z. On exit, the eigenvalues in D1 correspond with the +*> matrix in A. +*> Modified. +*> +*> D2 DOUBLE PRECISION array, dimension (max(NN)) +*> The eigenvalues of A, as computed by DSTEQR if Z is not +*> computed. On exit, the eigenvalues in D2 correspond with +*> the matrix in A. +*> Modified. +*> +*> D3 DOUBLE PRECISION array, dimension (max(NN)) +*> The eigenvalues of A, as computed by DSTERF. On exit, the +*> eigenvalues in D3 correspond with the matrix in A. +*> Modified. +*> +*> D4 DOUBLE PRECISION array, dimension +*> +*> EVEIGS DOUBLE PRECISION array, dimension (max(NN)) +*> The eigenvalues as computed by DSTEV('N', ... ) +*> (I reserve the right to change this to the output of +*> whichever algorithm computes the most accurate eigenvalues). +*> +*> WA1 DOUBLE PRECISION array, dimension +*> +*> WA2 DOUBLE PRECISION array, dimension +*> +*> WA3 DOUBLE PRECISION array, dimension +*> +*> U DOUBLE PRECISION array, dimension (LDU, max(NN)) +*> The orthogonal matrix computed by DSYTRD + DORGTR. +*> Modified. +*> +*> LDU INTEGER +*> The leading dimension of U, Z, and V. It must be at +*> least 1 and at least max( NN ). +*> Not modified. +*> +*> V DOUBLE PRECISION array, dimension (LDU, max(NN)) +*> The Housholder vectors computed by DSYTRD in reducing A to +*> tridiagonal form. +*> Modified. +*> +*> TAU DOUBLE PRECISION array, dimension (max(NN)) +*> The Householder factors computed by DSYTRD in reducing A +*> to tridiagonal form. +*> Modified. +*> +*> Z DOUBLE PRECISION array, dimension (LDU, max(NN)) +*> The orthogonal matrix of eigenvectors computed by DSTEQR, +*> DPTEQR, and DSTEIN. +*> Modified. +*> +*> WORK DOUBLE PRECISION array, dimension (LWORK) +*> Workspace. +*> Modified. +*> +*> LWORK INTEGER +*> The number of entries in WORK. This must be at least +*> 1 + 4 * Nmax + 2 * Nmax * lg Nmax + 4 * Nmax**2 +*> where Nmax = max( NN(j), 2 ) and lg = log base 2. +*> Not modified. +*> +*> IWORK INTEGER array, +*> dimension (6 + 6*Nmax + 5 * Nmax * lg Nmax ) +*> where Nmax = max( NN(j), 2 ) and lg = log base 2. +*> Workspace. +*> Modified. +*> +*> RESULT DOUBLE PRECISION array, dimension (105) +*> The values computed by the tests described above. +*> The values are currently limited to 1/ulp, to avoid +*> overflow. +*> Modified. +*> +*> INFO INTEGER +*> If 0, then everything ran OK. +*> -1: NSIZES < 0 +*> -2: Some NN(j) < 0 +*> -3: NTYPES < 0 +*> -5: THRESH < 0 +*> -9: LDA < 1 or LDA < NMAX, where NMAX is max( NN(j) ). +*> -16: LDU < 1 or LDU < NMAX. +*> -21: LWORK too small. +*> If DLATMR, DLATMS, DSYTRD, DORGTR, DSTEQR, DSTERF, +*> or DORMTR returns an error code, the +*> absolute value of it is returned. +*> Modified. +*> +*>----------------------------------------------------------------------- +*> +*> Some Local Variables and Parameters: +*> ---- ----- --------- --- ---------- +*> ZERO, ONE Real 0 and 1. +*> MAXTYP The number of types defined. +*> NTEST The number of tests performed, or which can +*> be performed so far, for the current matrix. +*> NTESTT The total number of tests performed so far. +*> NMAX Largest value in NN. +*> NMATS The number of matrices generated so far. +*> NERRS The number of tests which have exceeded THRESH +*> so far (computed by DLAFTS). +*> COND, IMODE Values to be passed to the matrix generators. +*> ANORM Norm of A; passed to matrix generators. +*> +*> OVFL, UNFL Overflow and underflow thresholds. +*> ULP, ULPINV Finest relative precision and its inverse. +*> RTOVFL, RTUNFL Square roots of the previous 2 values. +*> The following four arrays decode JTYPE: +*> KTYPE(j) The general type (1-10) for type "j". +*> KMODE(j) The MODE value to be passed to the matrix +*> generator for type "j". +*> KMAGN(j) The order of magnitude ( O(1), +*> O(overflow^(1/2) ), O(underflow^(1/2) ) +*> +*> The tests performed are: Routine tested +*> 1= | A - U S U' | / ( |A| n ulp ) DSTEV('V', ... ) +*> 2= | I - U U' | / ( n ulp ) DSTEV('V', ... ) +*> 3= |D(with Z) - D(w/o Z)| / (|D| ulp) DSTEV('N', ... ) +*> 4= | A - U S U' | / ( |A| n ulp ) DSTEVX('V','A', ... ) +*> 5= | I - U U' | / ( n ulp ) DSTEVX('V','A', ... ) +*> 6= |D(with Z) - EVEIGS| / (|D| ulp) DSTEVX('N','A', ... ) +*> 7= | A - U S U' | / ( |A| n ulp ) DSTEVR('V','A', ... ) +*> 8= | I - U U' | / ( n ulp ) DSTEVR('V','A', ... ) +*> 9= |D(with Z) - EVEIGS| / (|D| ulp) DSTEVR('N','A', ... ) +*> 10= | A - U S U' | / ( |A| n ulp ) DSTEVX('V','I', ... ) +*> 11= | I - U U' | / ( n ulp ) DSTEVX('V','I', ... ) +*> 12= |D(with Z) - D(w/o Z)| / (|D| ulp) DSTEVX('N','I', ... ) +*> 13= | A - U S U' | / ( |A| n ulp ) DSTEVX('V','V', ... ) +*> 14= | I - U U' | / ( n ulp ) DSTEVX('V','V', ... ) +*> 15= |D(with Z) - D(w/o Z)| / (|D| ulp) DSTEVX('N','V', ... ) +*> 16= | A - U S U' | / ( |A| n ulp ) DSTEVD('V', ... ) +*> 17= | I - U U' | / ( n ulp ) DSTEVD('V', ... ) +*> 18= |D(with Z) - EVEIGS| / (|D| ulp) DSTEVD('N', ... ) +*> 19= | A - U S U' | / ( |A| n ulp ) DSTEVR('V','I', ... ) +*> 20= | I - U U' | / ( n ulp ) DSTEVR('V','I', ... ) +*> 21= |D(with Z) - D(w/o Z)| / (|D| ulp) DSTEVR('N','I', ... ) +*> 22= | A - U S U' | / ( |A| n ulp ) DSTEVR('V','V', ... ) +*> 23= | I - U U' | / ( n ulp ) DSTEVR('V','V', ... ) +*> 24= |D(with Z) - D(w/o Z)| / (|D| ulp) DSTEVR('N','V', ... ) +*> +*> 25= | A - U S U' | / ( |A| n ulp ) DSYEV('L','V', ... ) +*> 26= | I - U U' | / ( n ulp ) DSYEV('L','V', ... ) +*> 27= |D(with Z) - D(w/o Z)| / (|D| ulp) DSYEV_2STAGE('L','N', ... ) +*> 28= | A - U S U' | / ( |A| n ulp ) DSYEVX('L','V','A', ... ) +*> 29= | I - U U' | / ( n ulp ) DSYEVX('L','V','A', ... ) +*> 30= |D(with Z) - D(w/o Z)| / (|D| ulp) DSYEVX_2STAGE('L','N','A', ... ) +*> 31= | A - U S U' | / ( |A| n ulp ) DSYEVX('L','V','I', ... ) +*> 32= | I - U U' | / ( n ulp ) DSYEVX('L','V','I', ... ) +*> 33= |D(with Z) - D(w/o Z)| / (|D| ulp) DSYEVX_2STAGE('L','N','I', ... ) +*> 34= | A - U S U' | / ( |A| n ulp ) DSYEVX('L','V','V', ... ) +*> 35= | I - U U' | / ( n ulp ) DSYEVX('L','V','V', ... ) +*> 36= |D(with Z) - D(w/o Z)| / (|D| ulp) DSYEVX_2STAGE('L','N','V', ... ) +*> 37= | A - U S U' | / ( |A| n ulp ) DSPEV('L','V', ... ) +*> 38= | I - U U' | / ( n ulp ) DSPEV('L','V', ... ) +*> 39= |D(with Z) - D(w/o Z)| / (|D| ulp) DSPEV('L','N', ... ) +*> 40= | A - U S U' | / ( |A| n ulp ) DSPEVX('L','V','A', ... ) +*> 41= | I - U U' | / ( n ulp ) DSPEVX('L','V','A', ... ) +*> 42= |D(with Z) - D(w/o Z)| / (|D| ulp) DSPEVX('L','N','A', ... ) +*> 43= | A - U S U' | / ( |A| n ulp ) DSPEVX('L','V','I', ... ) +*> 44= | I - U U' | / ( n ulp ) DSPEVX('L','V','I', ... ) +*> 45= |D(with Z) - D(w/o Z)| / (|D| ulp) DSPEVX('L','N','I', ... ) +*> 46= | A - U S U' | / ( |A| n ulp ) DSPEVX('L','V','V', ... ) +*> 47= | I - U U' | / ( n ulp ) DSPEVX('L','V','V', ... ) +*> 48= |D(with Z) - D(w/o Z)| / (|D| ulp) DSPEVX('L','N','V', ... ) +*> 49= | A - U S U' | / ( |A| n ulp ) DSBEV('L','V', ... ) +*> 50= | I - U U' | / ( n ulp ) DSBEV('L','V', ... ) +*> 51= |D(with Z) - D(w/o Z)| / (|D| ulp) DSBEV_2STAGE('L','N', ... ) +*> 52= | A - U S U' | / ( |A| n ulp ) DSBEVX('L','V','A', ... ) +*> 53= | I - U U' | / ( n ulp ) DSBEVX('L','V','A', ... ) +*> 54= |D(with Z) - D(w/o Z)| / (|D| ulp) DSBEVX_2STAGE('L','N','A', ... ) +*> 55= | A - U S U' | / ( |A| n ulp ) DSBEVX('L','V','I', ... ) +*> 56= | I - U U' | / ( n ulp ) DSBEVX('L','V','I', ... ) +*> 57= |D(with Z) - D(w/o Z)| / (|D| ulp) DSBEVX_2STAGE('L','N','I', ... ) +*> 58= | A - U S U' | / ( |A| n ulp ) DSBEVX('L','V','V', ... ) +*> 59= | I - U U' | / ( n ulp ) DSBEVX('L','V','V', ... ) +*> 60= |D(with Z) - D(w/o Z)| / (|D| ulp) DSBEVX_2STAGE('L','N','V', ... ) +*> 61= | A - U S U' | / ( |A| n ulp ) DSYEVD('L','V', ... ) +*> 62= | I - U U' | / ( n ulp ) DSYEVD('L','V', ... ) +*> 63= |D(with Z) - D(w/o Z)| / (|D| ulp) DSYEVD_2STAGE('L','N', ... ) +*> 64= | A - U S U' | / ( |A| n ulp ) DSPEVD('L','V', ... ) +*> 65= | I - U U' | / ( n ulp ) DSPEVD('L','V', ... ) +*> 66= |D(with Z) - D(w/o Z)| / (|D| ulp) DSPEVD('L','N', ... ) +*> 67= | A - U S U' | / ( |A| n ulp ) DSBEVD('L','V', ... ) +*> 68= | I - U U' | / ( n ulp ) DSBEVD('L','V', ... ) +*> 69= |D(with Z) - D(w/o Z)| / (|D| ulp) DSBEVD_2STAGE('L','N', ... ) +*> 70= | A - U S U' | / ( |A| n ulp ) DSYEVR('L','V','A', ... ) +*> 71= | I - U U' | / ( n ulp ) DSYEVR('L','V','A', ... ) +*> 72= |D(with Z) - D(w/o Z)| / (|D| ulp) DSYEVR_2STAGE('L','N','A', ... ) +*> 73= | A - U S U' | / ( |A| n ulp ) DSYEVR('L','V','I', ... ) +*> 74= | I - U U' | / ( n ulp ) DSYEVR('L','V','I', ... ) +*> 75= |D(with Z) - D(w/o Z)| / (|D| ulp) DSYEVR_2STAGE('L','N','I', ... ) +*> 76= | A - U S U' | / ( |A| n ulp ) DSYEVR('L','V','V', ... ) +*> 77= | I - U U' | / ( n ulp ) DSYEVR('L','V','V', ... ) +*> 78= |D(with Z) - D(w/o Z)| / (|D| ulp) DSYEVR_2STAGE('L','N','V', ... ) +*> +*> Tests 25 through 78 are repeated (as tests 79 through 132) +*> with UPLO='U' +*> +*> To be added in 1999 +*> +*> 79= | A - U S U' | / ( |A| n ulp ) DSPEVR('L','V','A', ... ) +*> 80= | I - U U' | / ( n ulp ) DSPEVR('L','V','A', ... ) +*> 81= |D(with Z) - D(w/o Z)| / (|D| ulp) DSPEVR('L','N','A', ... ) +*> 82= | A - U S U' | / ( |A| n ulp ) DSPEVR('L','V','I', ... ) +*> 83= | I - U U' | / ( n ulp ) DSPEVR('L','V','I', ... ) +*> 84= |D(with Z) - D(w/o Z)| / (|D| ulp) DSPEVR('L','N','I', ... ) +*> 85= | A - U S U' | / ( |A| n ulp ) DSPEVR('L','V','V', ... ) +*> 86= | I - U U' | / ( n ulp ) DSPEVR('L','V','V', ... ) +*> 87= |D(with Z) - D(w/o Z)| / (|D| ulp) DSPEVR('L','N','V', ... ) +*> 88= | A - U S U' | / ( |A| n ulp ) DSBEVR('L','V','A', ... ) +*> 89= | I - U U' | / ( n ulp ) DSBEVR('L','V','A', ... ) +*> 90= |D(with Z) - D(w/o Z)| / (|D| ulp) DSBEVR('L','N','A', ... ) +*> 91= | A - U S U' | / ( |A| n ulp ) DSBEVR('L','V','I', ... ) +*> 92= | I - U U' | / ( n ulp ) DSBEVR('L','V','I', ... ) +*> 93= |D(with Z) - D(w/o Z)| / (|D| ulp) DSBEVR('L','N','I', ... ) +*> 94= | A - U S U' | / ( |A| n ulp ) DSBEVR('L','V','V', ... ) +*> 95= | I - U U' | / ( n ulp ) DSBEVR('L','V','V', ... ) +*> 96= |D(with Z) - D(w/o Z)| / (|D| ulp) DSBEVR('L','N','V', ... ) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup double_eig +* +* ===================================================================== + SUBROUTINE DDRVST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, + $ NOUNIT, A, LDA, D1, D2, D3, D4, EVEIGS, WA1, + $ WA2, WA3, U, LDU, V, TAU, Z, WORK, LWORK, + $ IWORK, LIWORK, RESULT, INFO ) +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDU, LIWORK, LWORK, NOUNIT, NSIZES, + $ NTYPES + DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER ISEED( 4 ), IWORK( * ), NN( * ) + DOUBLE PRECISION A( LDA, * ), D1( * ), D2( * ), D3( * ), + $ D4( * ), EVEIGS( * ), RESULT( * ), TAU( * ), + $ U( LDU, * ), V( LDU, * ), WA1( * ), WA2( * ), + $ WA3( * ), WORK( * ), Z( LDU, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, TEN + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ TEN = 10.0D0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = 0.5D0 ) + INTEGER MAXTYP + PARAMETER ( MAXTYP = 18 ) +* .. +* .. Local Scalars .. + LOGICAL BADNN + CHARACTER UPLO + INTEGER I, IDIAG, IHBW, IINFO, IL, IMODE, INDX, IROW, + $ ITEMP, ITYPE, IU, IUPLO, J, J1, J2, JCOL, + $ JSIZE, JTYPE, KD, LGN, LIWEDC, LWEDC, M, M2, + $ M3, MTYPES, N, NERRS, NMATS, NMAX, NTEST, + $ NTESTT + DOUBLE PRECISION ABSTOL, ANINV, ANORM, COND, OVFL, RTOVFL, + $ RTUNFL, TEMP1, TEMP2, TEMP3, ULP, ULPINV, UNFL, + $ VL, VU +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ), IOLDSD( 4 ), ISEED2( 4 ), + $ ISEED3( 4 ), KMAGN( MAXTYP ), KMODE( MAXTYP ), + $ KTYPE( MAXTYP ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLARND, DSXT1 + EXTERNAL DLAMCH, DLARND, DSXT1 +* .. +* .. External Subroutines .. + EXTERNAL ALASVM, DLABAD, DLACPY, DLAFTS, DLASET, DLATMR, + $ DLATMS, DSBEV, DSBEVD, DSBEVX, DSPEV, DSPEVD, + $ DSPEVX, DSTEV, DSTEVD, DSTEVR, DSTEVX, DSTT21, + $ DSTT22, DSYEV, DSYEVD, DSYEVR, DSYEVX, DSYT21, + $ DSYEVD_2STAGE, DSYEVR_2STAGE, DSYEVX_2STAGE, + $ DSYEV_2STAGE, DSBEV_2STAGE, DSBEVD_2STAGE, + $ DSBEVX_2STAGE, DSYTRD_2STAGE, DSYTRD_SY2SB, + $ DSYTRD_SB2ST, DSYT22, XERBLA +* .. +* .. Scalars in Common .. + CHARACTER*32 SRNAMT +* .. +* .. Common blocks .. + COMMON / SRNAMC / SRNAMT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG, MAX, MIN, SQRT +* .. +* .. Data statements .. + DATA KTYPE / 1, 2, 5*4, 5*5, 3*8, 3*9 / + DATA KMAGN / 2*1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, + $ 2, 3, 1, 2, 3 / + DATA KMODE / 2*0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, + $ 0, 0, 4, 4, 4 / +* .. +* .. Executable Statements .. +* +* Keep ftrnchek happy +* + VL = ZERO + VU = ZERO +* +* 1) Check for errors +* + NTESTT = 0 + INFO = 0 +* + BADNN = .FALSE. + NMAX = 1 + DO 10 J = 1, NSIZES + NMAX = MAX( NMAX, NN( J ) ) + IF( NN( J ).LT.0 ) + $ BADNN = .TRUE. + 10 CONTINUE +* +* Check for errors +* + IF( NSIZES.LT.0 ) THEN + INFO = -1 + ELSE IF( BADNN ) THEN + INFO = -2 + ELSE IF( NTYPES.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.NMAX ) THEN + INFO = -9 + ELSE IF( LDU.LT.NMAX ) THEN + INFO = -16 + ELSE IF( 2*MAX( 2, NMAX )**2.GT.LWORK ) THEN + INFO = -21 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DDRVST2STG', -INFO ) + RETURN + END IF +* +* Quick return if nothing to do +* + IF( NSIZES.EQ.0 .OR. NTYPES.EQ.0 ) + $ RETURN +* +* More Important constants +* + UNFL = DLAMCH( 'Safe minimum' ) + OVFL = DLAMCH( 'Overflow' ) + CALL DLABAD( UNFL, OVFL ) + ULP = DLAMCH( 'Epsilon' )*DLAMCH( 'Base' ) + ULPINV = ONE / ULP + RTUNFL = SQRT( UNFL ) + RTOVFL = SQRT( OVFL ) +* +* Loop over sizes, types +* + DO 20 I = 1, 4 + ISEED2( I ) = ISEED( I ) + ISEED3( I ) = ISEED( I ) + 20 CONTINUE +* + NERRS = 0 + NMATS = 0 +* +* + DO 1740 JSIZE = 1, NSIZES + N = NN( JSIZE ) + IF( N.GT.0 ) THEN + LGN = INT( LOG( DBLE( N ) ) / LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + LWEDC = 1 + 4*N + 2*N*LGN + 4*N**2 +c LIWEDC = 6 + 6*N + 5*N*LGN + LIWEDC = 3 + 5*N + ELSE + LWEDC = 9 +c LIWEDC = 12 + LIWEDC = 8 + END IF + ANINV = ONE / DBLE( MAX( 1, N ) ) +* + IF( NSIZES.NE.1 ) THEN + MTYPES = MIN( MAXTYP, NTYPES ) + ELSE + MTYPES = MIN( MAXTYP+1, NTYPES ) + END IF +* + DO 1730 JTYPE = 1, MTYPES +* + IF( .NOT.DOTYPE( JTYPE ) ) + $ GO TO 1730 + NMATS = NMATS + 1 + NTEST = 0 +* + DO 30 J = 1, 4 + IOLDSD( J ) = ISEED( J ) + 30 CONTINUE +* +* 2) Compute "A" +* +* Control parameters: +* +* KMAGN KMODE KTYPE +* =1 O(1) clustered 1 zero +* =2 large clustered 2 identity +* =3 small exponential (none) +* =4 arithmetic diagonal, (w/ eigenvalues) +* =5 random log symmetric, w/ eigenvalues +* =6 random (none) +* =7 random diagonal +* =8 random symmetric +* =9 band symmetric, w/ eigenvalues +* + IF( MTYPES.GT.MAXTYP ) + $ GO TO 110 +* + ITYPE = KTYPE( JTYPE ) + IMODE = KMODE( JTYPE ) +* +* Compute norm +* + GO TO ( 40, 50, 60 )KMAGN( JTYPE ) +* + 40 CONTINUE + ANORM = ONE + GO TO 70 +* + 50 CONTINUE + ANORM = ( RTOVFL*ULP )*ANINV + GO TO 70 +* + 60 CONTINUE + ANORM = RTUNFL*N*ULPINV + GO TO 70 +* + 70 CONTINUE +* + CALL DLASET( 'Full', LDA, N, ZERO, ZERO, A, LDA ) + IINFO = 0 + COND = ULPINV +* +* Special Matrices -- Identity & Jordan block +* +* Zero +* + IF( ITYPE.EQ.1 ) THEN + IINFO = 0 +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Identity +* + DO 80 JCOL = 1, N + A( JCOL, JCOL ) = ANORM + 80 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Diagonal Matrix, [Eigen]values Specified +* + CALL DLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, 0, 0, 'N', A, LDA, WORK( N+1 ), + $ IINFO ) +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Symmetric, eigenvalues specified +* + CALL DLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, N, N, 'N', A, LDA, WORK( N+1 ), + $ IINFO ) +* + ELSE IF( ITYPE.EQ.7 ) THEN +* +* Diagonal, random eigenvalues +* + IDUMMA( 1 ) = 1 + CALL DLATMR( N, N, 'S', ISEED, 'S', WORK, 6, ONE, ONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, 0, 0, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.8 ) THEN +* +* Symmetric, random eigenvalues +* + IDUMMA( 1 ) = 1 + CALL DLATMR( N, N, 'S', ISEED, 'S', WORK, 6, ONE, ONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, N, N, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.9 ) THEN +* +* Symmetric banded, eigenvalues specified +* + IHBW = INT( ( N-1 )*DLARND( 1, ISEED3 ) ) + CALL DLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, IHBW, IHBW, 'Z', U, LDU, WORK( N+1 ), + $ IINFO ) +* +* Store as dense matrix for most routines. +* + CALL DLASET( 'Full', LDA, N, ZERO, ZERO, A, LDA ) + DO 100 IDIAG = -IHBW, IHBW + IROW = IHBW - IDIAG + 1 + J1 = MAX( 1, IDIAG+1 ) + J2 = MIN( N, N+IDIAG ) + DO 90 J = J1, J2 + I = J - IDIAG + A( I, J ) = U( IROW, J ) + 90 CONTINUE + 100 CONTINUE + ELSE + IINFO = 1 + END IF +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'Generator', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + RETURN + END IF +* + 110 CONTINUE +* + ABSTOL = UNFL + UNFL + IF( N.LE.1 ) THEN + IL = 1 + IU = N + ELSE + IL = 1 + INT( ( N-1 )*DLARND( 1, ISEED2 ) ) + IU = 1 + INT( ( N-1 )*DLARND( 1, ISEED2 ) ) + IF( IL.GT.IU ) THEN + ITEMP = IL + IL = IU + IU = ITEMP + END IF + END IF +* +* 3) If matrix is tridiagonal, call DSTEV and DSTEVX. +* + IF( JTYPE.LE.7 ) THEN + NTEST = 1 + DO 120 I = 1, N + D1( I ) = DBLE( A( I, I ) ) + 120 CONTINUE + DO 130 I = 1, N - 1 + D2( I ) = DBLE( A( I+1, I ) ) + 130 CONTINUE + SRNAMT = 'DSTEV' + CALL DSTEV( 'V', N, D1, D2, Z, LDU, WORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEV(V)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 1 ) = ULPINV + RESULT( 2 ) = ULPINV + RESULT( 3 ) = ULPINV + GO TO 180 + END IF + END IF +* +* Do tests 1 and 2. +* + DO 140 I = 1, N + D3( I ) = DBLE( A( I, I ) ) + 140 CONTINUE + DO 150 I = 1, N - 1 + D4( I ) = DBLE( A( I+1, I ) ) + 150 CONTINUE + CALL DSTT21( N, 0, D3, D4, D1, D2, Z, LDU, WORK, + $ RESULT( 1 ) ) +* + NTEST = 3 + DO 160 I = 1, N - 1 + D4( I ) = DBLE( A( I+1, I ) ) + 160 CONTINUE + SRNAMT = 'DSTEV' + CALL DSTEV( 'N', N, D3, D4, Z, LDU, WORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEV(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 3 ) = ULPINV + GO TO 180 + END IF + END IF +* +* Do test 3. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 170 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 170 CONTINUE + RESULT( 3 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 180 CONTINUE +* + NTEST = 4 + DO 190 I = 1, N + EVEIGS( I ) = D3( I ) + D1( I ) = DBLE( A( I, I ) ) + 190 CONTINUE + DO 200 I = 1, N - 1 + D2( I ) = DBLE( A( I+1, I ) ) + 200 CONTINUE + SRNAMT = 'DSTEVX' + CALL DSTEVX( 'V', 'A', N, D1, D2, VL, VU, IL, IU, ABSTOL, + $ M, WA1, Z, LDU, WORK, IWORK, IWORK( 5*N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEVX(V,A)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 4 ) = ULPINV + RESULT( 5 ) = ULPINV + RESULT( 6 ) = ULPINV + GO TO 250 + END IF + END IF + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF +* +* Do tests 4 and 5. +* + DO 210 I = 1, N + D3( I ) = DBLE( A( I, I ) ) + 210 CONTINUE + DO 220 I = 1, N - 1 + D4( I ) = DBLE( A( I+1, I ) ) + 220 CONTINUE + CALL DSTT21( N, 0, D3, D4, WA1, D2, Z, LDU, WORK, + $ RESULT( 4 ) ) +* + NTEST = 6 + DO 230 I = 1, N - 1 + D4( I ) = DBLE( A( I+1, I ) ) + 230 CONTINUE + SRNAMT = 'DSTEVX' + CALL DSTEVX( 'N', 'A', N, D3, D4, VL, VU, IL, IU, ABSTOL, + $ M2, WA2, Z, LDU, WORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEVX(N,A)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 6 ) = ULPINV + GO TO 250 + END IF + END IF +* +* Do test 6. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 240 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA2( J ) ), + $ ABS( EVEIGS( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA2( J )-EVEIGS( J ) ) ) + 240 CONTINUE + RESULT( 6 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 250 CONTINUE +* + NTEST = 7 + DO 260 I = 1, N + D1( I ) = DBLE( A( I, I ) ) + 260 CONTINUE + DO 270 I = 1, N - 1 + D2( I ) = DBLE( A( I+1, I ) ) + 270 CONTINUE + SRNAMT = 'DSTEVR' + CALL DSTEVR( 'V', 'A', N, D1, D2, VL, VU, IL, IU, ABSTOL, + $ M, WA1, Z, LDU, IWORK, WORK, LWORK, + $ IWORK(2*N+1), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEVR(V,A)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 7 ) = ULPINV + RESULT( 8 ) = ULPINV + GO TO 320 + END IF + END IF + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF +* +* Do tests 7 and 8. +* + DO 280 I = 1, N + D3( I ) = DBLE( A( I, I ) ) + 280 CONTINUE + DO 290 I = 1, N - 1 + D4( I ) = DBLE( A( I+1, I ) ) + 290 CONTINUE + CALL DSTT21( N, 0, D3, D4, WA1, D2, Z, LDU, WORK, + $ RESULT( 7 ) ) +* + NTEST = 9 + DO 300 I = 1, N - 1 + D4( I ) = DBLE( A( I+1, I ) ) + 300 CONTINUE + SRNAMT = 'DSTEVR' + CALL DSTEVR( 'N', 'A', N, D3, D4, VL, VU, IL, IU, ABSTOL, + $ M2, WA2, Z, LDU, IWORK, WORK, LWORK, + $ IWORK(2*N+1), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEVR(N,A)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 9 ) = ULPINV + GO TO 320 + END IF + END IF +* +* Do test 9. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 310 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA2( J ) ), + $ ABS( EVEIGS( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA2( J )-EVEIGS( J ) ) ) + 310 CONTINUE + RESULT( 9 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 320 CONTINUE +* +* + NTEST = 10 + DO 330 I = 1, N + D1( I ) = DBLE( A( I, I ) ) + 330 CONTINUE + DO 340 I = 1, N - 1 + D2( I ) = DBLE( A( I+1, I ) ) + 340 CONTINUE + SRNAMT = 'DSTEVX' + CALL DSTEVX( 'V', 'I', N, D1, D2, VL, VU, IL, IU, ABSTOL, + $ M2, WA2, Z, LDU, WORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEVX(V,I)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 10 ) = ULPINV + RESULT( 11 ) = ULPINV + RESULT( 12 ) = ULPINV + GO TO 380 + END IF + END IF +* +* Do tests 10 and 11. +* + DO 350 I = 1, N + D3( I ) = DBLE( A( I, I ) ) + 350 CONTINUE + DO 360 I = 1, N - 1 + D4( I ) = DBLE( A( I+1, I ) ) + 360 CONTINUE + CALL DSTT22( N, M2, 0, D3, D4, WA2, D2, Z, LDU, WORK, + $ MAX( 1, M2 ), RESULT( 10 ) ) +* +* + NTEST = 12 + DO 370 I = 1, N - 1 + D4( I ) = DBLE( A( I+1, I ) ) + 370 CONTINUE + SRNAMT = 'DSTEVX' + CALL DSTEVX( 'N', 'I', N, D3, D4, VL, VU, IL, IU, ABSTOL, + $ M3, WA3, Z, LDU, WORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEVX(N,I)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 12 ) = ULPINV + GO TO 380 + END IF + END IF +* +* Do test 12. +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + RESULT( 12 ) = ( TEMP1+TEMP2 ) / MAX( UNFL, ULP*TEMP3 ) +* + 380 CONTINUE +* + NTEST = 12 + IF( N.GT.0 ) THEN + IF( IL.NE.1 ) THEN + VL = WA1( IL ) - MAX( HALF* + $ ( WA1( IL )-WA1( IL-1 ) ), TEN*ULP*TEMP3, + $ TEN*RTUNFL ) + ELSE + VL = WA1( 1 ) - MAX( HALF*( WA1( N )-WA1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = WA1( IU ) + MAX( HALF* + $ ( WA1( IU+1 )-WA1( IU ) ), TEN*ULP*TEMP3, + $ TEN*RTUNFL ) + ELSE + VU = WA1( N ) + MAX( HALF*( WA1( N )-WA1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + ELSE + VL = ZERO + VU = ONE + END IF +* + DO 390 I = 1, N + D1( I ) = DBLE( A( I, I ) ) + 390 CONTINUE + DO 400 I = 1, N - 1 + D2( I ) = DBLE( A( I+1, I ) ) + 400 CONTINUE + SRNAMT = 'DSTEVX' + CALL DSTEVX( 'V', 'V', N, D1, D2, VL, VU, IL, IU, ABSTOL, + $ M2, WA2, Z, LDU, WORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEVX(V,V)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 13 ) = ULPINV + RESULT( 14 ) = ULPINV + RESULT( 15 ) = ULPINV + GO TO 440 + END IF + END IF +* + IF( M2.EQ.0 .AND. N.GT.0 ) THEN + RESULT( 13 ) = ULPINV + RESULT( 14 ) = ULPINV + RESULT( 15 ) = ULPINV + GO TO 440 + END IF +* +* Do tests 13 and 14. +* + DO 410 I = 1, N + D3( I ) = DBLE( A( I, I ) ) + 410 CONTINUE + DO 420 I = 1, N - 1 + D4( I ) = DBLE( A( I+1, I ) ) + 420 CONTINUE + CALL DSTT22( N, M2, 0, D3, D4, WA2, D2, Z, LDU, WORK, + $ MAX( 1, M2 ), RESULT( 13 ) ) +* + NTEST = 15 + DO 430 I = 1, N - 1 + D4( I ) = DBLE( A( I+1, I ) ) + 430 CONTINUE + SRNAMT = 'DSTEVX' + CALL DSTEVX( 'N', 'V', N, D3, D4, VL, VU, IL, IU, ABSTOL, + $ M3, WA3, Z, LDU, WORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEVX(N,V)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 15 ) = ULPINV + GO TO 440 + END IF + END IF +* +* Do test 15. +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + RESULT( 15 ) = ( TEMP1+TEMP2 ) / MAX( UNFL, TEMP3*ULP ) +* + 440 CONTINUE +* + NTEST = 16 + DO 450 I = 1, N + D1( I ) = DBLE( A( I, I ) ) + 450 CONTINUE + DO 460 I = 1, N - 1 + D2( I ) = DBLE( A( I+1, I ) ) + 460 CONTINUE + SRNAMT = 'DSTEVD' + CALL DSTEVD( 'V', N, D1, D2, Z, LDU, WORK, LWEDC, IWORK, + $ LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEVD(V)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 16 ) = ULPINV + RESULT( 17 ) = ULPINV + RESULT( 18 ) = ULPINV + GO TO 510 + END IF + END IF +* +* Do tests 16 and 17. +* + DO 470 I = 1, N + D3( I ) = DBLE( A( I, I ) ) + 470 CONTINUE + DO 480 I = 1, N - 1 + D4( I ) = DBLE( A( I+1, I ) ) + 480 CONTINUE + CALL DSTT21( N, 0, D3, D4, D1, D2, Z, LDU, WORK, + $ RESULT( 16 ) ) +* + NTEST = 18 + DO 490 I = 1, N - 1 + D4( I ) = DBLE( A( I+1, I ) ) + 490 CONTINUE + SRNAMT = 'DSTEVD' + CALL DSTEVD( 'N', N, D3, D4, Z, LDU, WORK, LWEDC, IWORK, + $ LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEVD(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 18 ) = ULPINV + GO TO 510 + END IF + END IF +* +* Do test 18. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 500 J = 1, N + TEMP1 = MAX( TEMP1, ABS( EVEIGS( J ) ), + $ ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( EVEIGS( J )-D3( J ) ) ) + 500 CONTINUE + RESULT( 18 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 510 CONTINUE +* + NTEST = 19 + DO 520 I = 1, N + D1( I ) = DBLE( A( I, I ) ) + 520 CONTINUE + DO 530 I = 1, N - 1 + D2( I ) = DBLE( A( I+1, I ) ) + 530 CONTINUE + SRNAMT = 'DSTEVR' + CALL DSTEVR( 'V', 'I', N, D1, D2, VL, VU, IL, IU, ABSTOL, + $ M2, WA2, Z, LDU, IWORK, WORK, LWORK, + $ IWORK(2*N+1), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEVR(V,I)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 19 ) = ULPINV + RESULT( 20 ) = ULPINV + RESULT( 21 ) = ULPINV + GO TO 570 + END IF + END IF +* +* DO tests 19 and 20. +* + DO 540 I = 1, N + D3( I ) = DBLE( A( I, I ) ) + 540 CONTINUE + DO 550 I = 1, N - 1 + D4( I ) = DBLE( A( I+1, I ) ) + 550 CONTINUE + CALL DSTT22( N, M2, 0, D3, D4, WA2, D2, Z, LDU, WORK, + $ MAX( 1, M2 ), RESULT( 19 ) ) +* +* + NTEST = 21 + DO 560 I = 1, N - 1 + D4( I ) = DBLE( A( I+1, I ) ) + 560 CONTINUE + SRNAMT = 'DSTEVR' + CALL DSTEVR( 'N', 'I', N, D3, D4, VL, VU, IL, IU, ABSTOL, + $ M3, WA3, Z, LDU, IWORK, WORK, LWORK, + $ IWORK(2*N+1), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEVR(N,I)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 21 ) = ULPINV + GO TO 570 + END IF + END IF +* +* Do test 21. +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + RESULT( 21 ) = ( TEMP1+TEMP2 ) / MAX( UNFL, ULP*TEMP3 ) +* + 570 CONTINUE +* + NTEST = 21 + IF( N.GT.0 ) THEN + IF( IL.NE.1 ) THEN + VL = WA1( IL ) - MAX( HALF* + $ ( WA1( IL )-WA1( IL-1 ) ), TEN*ULP*TEMP3, + $ TEN*RTUNFL ) + ELSE + VL = WA1( 1 ) - MAX( HALF*( WA1( N )-WA1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = WA1( IU ) + MAX( HALF* + $ ( WA1( IU+1 )-WA1( IU ) ), TEN*ULP*TEMP3, + $ TEN*RTUNFL ) + ELSE + VU = WA1( N ) + MAX( HALF*( WA1( N )-WA1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + ELSE + VL = ZERO + VU = ONE + END IF +* + DO 580 I = 1, N + D1( I ) = DBLE( A( I, I ) ) + 580 CONTINUE + DO 590 I = 1, N - 1 + D2( I ) = DBLE( A( I+1, I ) ) + 590 CONTINUE + SRNAMT = 'DSTEVR' + CALL DSTEVR( 'V', 'V', N, D1, D2, VL, VU, IL, IU, ABSTOL, + $ M2, WA2, Z, LDU, IWORK, WORK, LWORK, + $ IWORK(2*N+1), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEVR(V,V)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 22 ) = ULPINV + RESULT( 23 ) = ULPINV + RESULT( 24 ) = ULPINV + GO TO 630 + END IF + END IF +* + IF( M2.EQ.0 .AND. N.GT.0 ) THEN + RESULT( 22 ) = ULPINV + RESULT( 23 ) = ULPINV + RESULT( 24 ) = ULPINV + GO TO 630 + END IF +* +* Do tests 22 and 23. +* + DO 600 I = 1, N + D3( I ) = DBLE( A( I, I ) ) + 600 CONTINUE + DO 610 I = 1, N - 1 + D4( I ) = DBLE( A( I+1, I ) ) + 610 CONTINUE + CALL DSTT22( N, M2, 0, D3, D4, WA2, D2, Z, LDU, WORK, + $ MAX( 1, M2 ), RESULT( 22 ) ) +* + NTEST = 24 + DO 620 I = 1, N - 1 + D4( I ) = DBLE( A( I+1, I ) ) + 620 CONTINUE + SRNAMT = 'DSTEVR' + CALL DSTEVR( 'N', 'V', N, D3, D4, VL, VU, IL, IU, ABSTOL, + $ M3, WA3, Z, LDU, IWORK, WORK, LWORK, + $ IWORK(2*N+1), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEVR(N,V)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 24 ) = ULPINV + GO TO 630 + END IF + END IF +* +* Do test 24. +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + RESULT( 24 ) = ( TEMP1+TEMP2 ) / MAX( UNFL, TEMP3*ULP ) +* + 630 CONTINUE +* +* +* + ELSE +* + DO 640 I = 1, 24 + RESULT( I ) = ZERO + 640 CONTINUE + NTEST = 24 + END IF +* +* Perform remaining tests storing upper or lower triangular +* part of matrix. +* + DO 1720 IUPLO = 0, 1 + IF( IUPLO.EQ.0 ) THEN + UPLO = 'L' + ELSE + UPLO = 'U' + END IF +* +* 4) Call DSYEV and DSYEVX. +* + CALL DLACPY( ' ', N, N, A, LDA, V, LDU ) +* + NTEST = NTEST + 1 + SRNAMT = 'DSYEV' + CALL DSYEV( 'V', UPLO, N, A, LDU, D1, WORK, LWORK, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSYEV(V,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 660 + END IF + END IF +* +* Do tests 25 and 26 (or +54) +* + CALL DSYT21( 1, UPLO, N, 0, V, LDU, D1, D2, A, LDU, Z, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) +* + NTEST = NTEST + 2 + SRNAMT = 'DSYEV_2STAGE' + CALL DSYEV_2STAGE( 'N', UPLO, N, A, LDU, D3, WORK, LWORK, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'DSYEV_2STAGE(N,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 660 + END IF + END IF +* +* Do test 27 (or +54) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 650 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 650 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 660 CONTINUE + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) +* + NTEST = NTEST + 1 +* + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( D1( 1 ) ), ABS( D1( N ) ) ) + IF( IL.NE.1 ) THEN + VL = D1( IL ) - MAX( HALF*( D1( IL )-D1( IL-1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + ELSE IF( N.GT.0 ) THEN + VL = D1( 1 ) - MAX( HALF*( D1( N )-D1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = D1( IU ) + MAX( HALF*( D1( IU+1 )-D1( IU ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + ELSE IF( N.GT.0 ) THEN + VU = D1( N ) + MAX( HALF*( D1( N )-D1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + ELSE + TEMP3 = ZERO + VL = ZERO + VU = ONE + END IF +* + SRNAMT = 'DSYEVX' + CALL DSYEVX( 'V', 'A', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M, WA1, Z, LDU, WORK, LWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSYEVX(V,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 680 + END IF + END IF +* +* Do tests 28 and 29 (or +54) +* + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL DSYT21( 1, UPLO, N, 0, A, LDU, D1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + SRNAMT = 'DSYEVX_2STAGE' + CALL DSYEVX_2STAGE( 'N', 'A', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M2, WA2, Z, LDU, WORK, + $ LWORK, IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'DSYEVX_2STAGE(N,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 680 + END IF + END IF +* +* Do test 30 (or +54) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 670 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA1( J ) ), ABS( WA2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA1( J )-WA2( J ) ) ) + 670 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 680 CONTINUE +* + NTEST = NTEST + 1 + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) + SRNAMT = 'DSYEVX' + CALL DSYEVX( 'V', 'I', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, WORK, LWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSYEVX(V,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 690 + END IF + END IF +* +* Do tests 31 and 32 (or +54) +* + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL DSYT22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) + SRNAMT = 'DSYEVX_2STAGE' + CALL DSYEVX_2STAGE( 'N', 'I', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M3, WA3, Z, LDU, WORK, + $ LWORK, IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'DSYEVX_2STAGE(N,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 690 + END IF + END IF +* +* Do test 33 (or +54) +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, ULP*TEMP3 ) + 690 CONTINUE +* + NTEST = NTEST + 1 + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) + SRNAMT = 'DSYEVX' + CALL DSYEVX( 'V', 'V', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, WORK, LWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSYEVX(V,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 700 + END IF + END IF +* +* Do tests 34 and 35 (or +54) +* + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL DSYT22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) + SRNAMT = 'DSYEVX_2STAGE' + CALL DSYEVX_2STAGE( 'N', 'V', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M3, WA3, Z, LDU, WORK, + $ LWORK, IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'DSYEVX_2STAGE(N,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 700 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.GT.0 ) THEN + RESULT( NTEST ) = ULPINV + GO TO 700 + END IF +* +* Do test 36 (or +54) +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 700 CONTINUE +* +* 5) Call DSPEV and DSPEVX. +* + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) +* +* Load array WORK with the upper or lower triangular +* part of the matrix in packed form. +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 720 J = 1, N + DO 710 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 710 CONTINUE + 720 CONTINUE + ELSE + INDX = 1 + DO 740 J = 1, N + DO 730 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 730 CONTINUE + 740 CONTINUE + END IF +* + NTEST = NTEST + 1 + SRNAMT = 'DSPEV' + CALL DSPEV( 'V', UPLO, N, WORK, D1, Z, LDU, V, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSPEV(V,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 800 + END IF + END IF +* +* Do tests 37 and 38 (or +54) +* + CALL DSYT21( 1, UPLO, N, 0, A, LDA, D1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 760 J = 1, N + DO 750 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 750 CONTINUE + 760 CONTINUE + ELSE + INDX = 1 + DO 780 J = 1, N + DO 770 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 770 CONTINUE + 780 CONTINUE + END IF +* + NTEST = NTEST + 2 + SRNAMT = 'DSPEV' + CALL DSPEV( 'N', UPLO, N, WORK, D3, Z, LDU, V, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSPEV(N,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 800 + END IF + END IF +* +* Do test 39 (or +54) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 790 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 790 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* +* Load array WORK with the upper or lower triangular part +* of the matrix in packed form. +* + 800 CONTINUE + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 820 J = 1, N + DO 810 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 810 CONTINUE + 820 CONTINUE + ELSE + INDX = 1 + DO 840 J = 1, N + DO 830 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 830 CONTINUE + 840 CONTINUE + END IF +* + NTEST = NTEST + 1 +* + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( D1( 1 ) ), ABS( D1( N ) ) ) + IF( IL.NE.1 ) THEN + VL = D1( IL ) - MAX( HALF*( D1( IL )-D1( IL-1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + ELSE IF( N.GT.0 ) THEN + VL = D1( 1 ) - MAX( HALF*( D1( N )-D1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = D1( IU ) + MAX( HALF*( D1( IU+1 )-D1( IU ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + ELSE IF( N.GT.0 ) THEN + VU = D1( N ) + MAX( HALF*( D1( N )-D1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + ELSE + TEMP3 = ZERO + VL = ZERO + VU = ONE + END IF +* + SRNAMT = 'DSPEVX' + CALL DSPEVX( 'V', 'A', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M, WA1, Z, LDU, V, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSPEVX(V,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 900 + END IF + END IF +* +* Do tests 40 and 41 (or +54) +* + CALL DSYT21( 1, UPLO, N, 0, A, LDU, WA1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 860 J = 1, N + DO 850 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 850 CONTINUE + 860 CONTINUE + ELSE + INDX = 1 + DO 880 J = 1, N + DO 870 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 870 CONTINUE + 880 CONTINUE + END IF +* + SRNAMT = 'DSPEVX' + CALL DSPEVX( 'N', 'A', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, V, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSPEVX(N,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 900 + END IF + END IF +* +* Do test 42 (or +54) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 890 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA1( J ) ), ABS( WA2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA1( J )-WA2( J ) ) ) + 890 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 900 CONTINUE + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 920 J = 1, N + DO 910 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 910 CONTINUE + 920 CONTINUE + ELSE + INDX = 1 + DO 940 J = 1, N + DO 930 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 930 CONTINUE + 940 CONTINUE + END IF +* + NTEST = NTEST + 1 +* + SRNAMT = 'DSPEVX' + CALL DSPEVX( 'V', 'I', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, V, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSPEVX(V,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 990 + END IF + END IF +* +* Do tests 43 and 44 (or +54) +* + CALL DSYT22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 960 J = 1, N + DO 950 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 950 CONTINUE + 960 CONTINUE + ELSE + INDX = 1 + DO 980 J = 1, N + DO 970 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 970 CONTINUE + 980 CONTINUE + END IF +* + SRNAMT = 'DSPEVX' + CALL DSPEVX( 'N', 'I', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M3, WA3, Z, LDU, V, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSPEVX(N,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 990 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.GT.0 ) THEN + RESULT( NTEST ) = ULPINV + GO TO 990 + END IF +* +* Do test 45 (or +54) +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 990 CONTINUE + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 1010 J = 1, N + DO 1000 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1000 CONTINUE + 1010 CONTINUE + ELSE + INDX = 1 + DO 1030 J = 1, N + DO 1020 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1020 CONTINUE + 1030 CONTINUE + END IF +* + NTEST = NTEST + 1 +* + SRNAMT = 'DSPEVX' + CALL DSPEVX( 'V', 'V', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, V, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSPEVX(V,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1080 + END IF + END IF +* +* Do tests 46 and 47 (or +54) +* + CALL DSYT22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 1050 J = 1, N + DO 1040 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1040 CONTINUE + 1050 CONTINUE + ELSE + INDX = 1 + DO 1070 J = 1, N + DO 1060 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1060 CONTINUE + 1070 CONTINUE + END IF +* + SRNAMT = 'DSPEVX' + CALL DSPEVX( 'N', 'V', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M3, WA3, Z, LDU, V, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSPEVX(N,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1080 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.GT.0 ) THEN + RESULT( NTEST ) = ULPINV + GO TO 1080 + END IF +* +* Do test 48 (or +54) +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 1080 CONTINUE +* +* 6) Call DSBEV and DSBEVX. +* + IF( JTYPE.LE.7 ) THEN + KD = 1 + ELSE IF( JTYPE.GE.8 .AND. JTYPE.LE.15 ) THEN + KD = MAX( N-1, 0 ) + ELSE + KD = IHBW + END IF +* +* Load array V with the upper or lower triangular part +* of the matrix in band form. +* + IF( IUPLO.EQ.1 ) THEN + DO 1100 J = 1, N + DO 1090 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1090 CONTINUE + 1100 CONTINUE + ELSE + DO 1120 J = 1, N + DO 1110 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1110 CONTINUE + 1120 CONTINUE + END IF +* + NTEST = NTEST + 1 + SRNAMT = 'DSBEV' + CALL DSBEV( 'V', UPLO, N, KD, V, LDU, D1, Z, LDU, WORK, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSBEV(V,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1180 + END IF + END IF +* +* Do tests 49 and 50 (or ... ) +* + CALL DSYT21( 1, UPLO, N, 0, A, LDA, D1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + IF( IUPLO.EQ.1 ) THEN + DO 1140 J = 1, N + DO 1130 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1130 CONTINUE + 1140 CONTINUE + ELSE + DO 1160 J = 1, N + DO 1150 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1150 CONTINUE + 1160 CONTINUE + END IF +* + NTEST = NTEST + 2 + SRNAMT = 'DSBEV_2STAGE' + CALL DSBEV_2STAGE( 'N', UPLO, N, KD, V, LDU, D3, Z, LDU, + $ WORK, LWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'DSBEV_2STAGE(N,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1180 + END IF + END IF +* +* Do test 51 (or +54) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 1170 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 1170 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* +* Load array V with the upper or lower triangular part +* of the matrix in band form. +* + 1180 CONTINUE + IF( IUPLO.EQ.1 ) THEN + DO 1200 J = 1, N + DO 1190 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1190 CONTINUE + 1200 CONTINUE + ELSE + DO 1220 J = 1, N + DO 1210 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1210 CONTINUE + 1220 CONTINUE + END IF +* + NTEST = NTEST + 1 + SRNAMT = 'DSBEVX' + CALL DSBEVX( 'V', 'A', UPLO, N, KD, V, LDU, U, LDU, VL, + $ VU, IL, IU, ABSTOL, M, WA2, Z, LDU, WORK, + $ IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSBEVX(V,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1280 + END IF + END IF +* +* Do tests 52 and 53 (or +54) +* + CALL DSYT21( 1, UPLO, N, 0, A, LDU, WA2, D2, Z, LDU, V, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + DO 1240 J = 1, N + DO 1230 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1230 CONTINUE + 1240 CONTINUE + ELSE + DO 1260 J = 1, N + DO 1250 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1250 CONTINUE + 1260 CONTINUE + END IF +* + SRNAMT = 'DSBEVX_2STAGE' + CALL DSBEVX_2STAGE( 'N', 'A', UPLO, N, KD, V, LDU, + $ U, LDU, VL, VU, IL, IU, ABSTOL, M3, WA3, + $ Z, LDU, WORK, LWORK, IWORK, IWORK( 5*N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'DSBEVX_2STAGE(N,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1280 + END IF + END IF +* +* Do test 54 (or +54) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 1270 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA2( J ) ), ABS( WA3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA2( J )-WA3( J ) ) ) + 1270 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 1280 CONTINUE + NTEST = NTEST + 1 + IF( IUPLO.EQ.1 ) THEN + DO 1300 J = 1, N + DO 1290 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1290 CONTINUE + 1300 CONTINUE + ELSE + DO 1320 J = 1, N + DO 1310 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1310 CONTINUE + 1320 CONTINUE + END IF +* + SRNAMT = 'DSBEVX' + CALL DSBEVX( 'V', 'I', UPLO, N, KD, V, LDU, U, LDU, VL, + $ VU, IL, IU, ABSTOL, M2, WA2, Z, LDU, WORK, + $ IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSBEVX(V,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1370 + END IF + END IF +* +* Do tests 55 and 56 (or +54) +* + CALL DSYT22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + DO 1340 J = 1, N + DO 1330 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1330 CONTINUE + 1340 CONTINUE + ELSE + DO 1360 J = 1, N + DO 1350 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1350 CONTINUE + 1360 CONTINUE + END IF +* + SRNAMT = 'DSBEVX_2STAGE' + CALL DSBEVX_2STAGE( 'N', 'I', UPLO, N, KD, V, LDU, + $ U, LDU, VL, VU, IL, IU, ABSTOL, M3, WA3, + $ Z, LDU, WORK, LWORK, IWORK, IWORK( 5*N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'DSBEVX_2STAGE(N,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1370 + END IF + END IF +* +* Do test 57 (or +54) +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 1370 CONTINUE + NTEST = NTEST + 1 + IF( IUPLO.EQ.1 ) THEN + DO 1390 J = 1, N + DO 1380 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1380 CONTINUE + 1390 CONTINUE + ELSE + DO 1410 J = 1, N + DO 1400 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1400 CONTINUE + 1410 CONTINUE + END IF +* + SRNAMT = 'DSBEVX' + CALL DSBEVX( 'V', 'V', UPLO, N, KD, V, LDU, U, LDU, VL, + $ VU, IL, IU, ABSTOL, M2, WA2, Z, LDU, WORK, + $ IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSBEVX(V,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1460 + END IF + END IF +* +* Do tests 58 and 59 (or +54) +* + CALL DSYT22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + DO 1430 J = 1, N + DO 1420 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1420 CONTINUE + 1430 CONTINUE + ELSE + DO 1450 J = 1, N + DO 1440 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1440 CONTINUE + 1450 CONTINUE + END IF +* + SRNAMT = 'DSBEVX_2STAGE' + CALL DSBEVX_2STAGE( 'N', 'V', UPLO, N, KD, V, LDU, + $ U, LDU, VL, VU, IL, IU, ABSTOL, M3, WA3, + $ Z, LDU, WORK, LWORK, IWORK, IWORK( 5*N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'DSBEVX_2STAGE(N,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1460 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.GT.0 ) THEN + RESULT( NTEST ) = ULPINV + GO TO 1460 + END IF +* +* Do test 60 (or +54) +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 1460 CONTINUE +* +* 7) Call DSYEVD +* + CALL DLACPY( ' ', N, N, A, LDA, V, LDU ) +* + NTEST = NTEST + 1 + SRNAMT = 'DSYEVD' + CALL DSYEVD( 'V', UPLO, N, A, LDU, D1, WORK, LWEDC, + $ IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSYEVD(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1480 + END IF + END IF +* +* Do tests 61 and 62 (or +54) +* + CALL DSYT21( 1, UPLO, N, 0, V, LDU, D1, D2, A, LDU, Z, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) +* + NTEST = NTEST + 2 + SRNAMT = 'DSYEVD_2STAGE' + CALL DSYEVD_2STAGE( 'N', UPLO, N, A, LDU, D3, WORK, + $ LWORK, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'DSYEVD_2STAGE(N,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1480 + END IF + END IF +* +* Do test 63 (or +54) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 1470 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 1470 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 1480 CONTINUE +* +* 8) Call DSPEVD. +* + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) +* +* Load array WORK with the upper or lower triangular +* part of the matrix in packed form. +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 1500 J = 1, N + DO 1490 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1490 CONTINUE + 1500 CONTINUE + ELSE + INDX = 1 + DO 1520 J = 1, N + DO 1510 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1510 CONTINUE + 1520 CONTINUE + END IF +* + NTEST = NTEST + 1 + SRNAMT = 'DSPEVD' + CALL DSPEVD( 'V', UPLO, N, WORK, D1, Z, LDU, + $ WORK( INDX ), LWEDC-INDX+1, IWORK, LIWEDC, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSPEVD(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1580 + END IF + END IF +* +* Do tests 64 and 65 (or +54) +* + CALL DSYT21( 1, UPLO, N, 0, A, LDA, D1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 1540 J = 1, N + DO 1530 I = 1, J +* + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1530 CONTINUE + 1540 CONTINUE + ELSE + INDX = 1 + DO 1560 J = 1, N + DO 1550 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1550 CONTINUE + 1560 CONTINUE + END IF +* + NTEST = NTEST + 2 + SRNAMT = 'DSPEVD' + CALL DSPEVD( 'N', UPLO, N, WORK, D3, Z, LDU, + $ WORK( INDX ), LWEDC-INDX+1, IWORK, LIWEDC, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSPEVD(N,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1580 + END IF + END IF +* +* Do test 66 (or +54) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 1570 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 1570 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) + 1580 CONTINUE +* +* 9) Call DSBEVD. +* + IF( JTYPE.LE.7 ) THEN + KD = 1 + ELSE IF( JTYPE.GE.8 .AND. JTYPE.LE.15 ) THEN + KD = MAX( N-1, 0 ) + ELSE + KD = IHBW + END IF +* +* Load array V with the upper or lower triangular part +* of the matrix in band form. +* + IF( IUPLO.EQ.1 ) THEN + DO 1600 J = 1, N + DO 1590 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1590 CONTINUE + 1600 CONTINUE + ELSE + DO 1620 J = 1, N + DO 1610 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1610 CONTINUE + 1620 CONTINUE + END IF +* + NTEST = NTEST + 1 + SRNAMT = 'DSBEVD' + CALL DSBEVD( 'V', UPLO, N, KD, V, LDU, D1, Z, LDU, WORK, + $ LWEDC, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSBEVD(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1680 + END IF + END IF +* +* Do tests 67 and 68 (or +54) +* + CALL DSYT21( 1, UPLO, N, 0, A, LDA, D1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + IF( IUPLO.EQ.1 ) THEN + DO 1640 J = 1, N + DO 1630 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1630 CONTINUE + 1640 CONTINUE + ELSE + DO 1660 J = 1, N + DO 1650 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1650 CONTINUE + 1660 CONTINUE + END IF +* + NTEST = NTEST + 2 + SRNAMT = 'DSBEVD_2STAGE' + CALL DSBEVD_2STAGE( 'N', UPLO, N, KD, V, LDU, D3, Z, LDU, + $ WORK, LWORK, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'DSBEVD_2STAGE(N,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1680 + END IF + END IF +* +* Do test 69 (or +54) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 1670 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 1670 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 1680 CONTINUE +* +* + CALL DLACPY( ' ', N, N, A, LDA, V, LDU ) + NTEST = NTEST + 1 + SRNAMT = 'DSYEVR' + CALL DSYEVR( 'V', 'A', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M, WA1, Z, LDU, IWORK, WORK, LWORK, + $ IWORK(2*N+1), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSYEVR(V,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1700 + END IF + END IF +* +* Do tests 70 and 71 (or ... ) +* + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL DSYT21( 1, UPLO, N, 0, A, LDU, WA1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + SRNAMT = 'DSYEVR_2STAGE' + CALL DSYEVR_2STAGE( 'N', 'A', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M2, WA2, Z, LDU, IWORK, + $ WORK, LWORK, IWORK(2*N+1), LIWORK-2*N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'DSYEVR_2STAGE(N,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1700 + END IF + END IF +* +* Do test 72 (or ... ) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 1690 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA1( J ) ), ABS( WA2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA1( J )-WA2( J ) ) ) + 1690 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 1700 CONTINUE +* + NTEST = NTEST + 1 + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) + SRNAMT = 'DSYEVR' + CALL DSYEVR( 'V', 'I', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, IWORK, WORK, LWORK, + $ IWORK(2*N+1), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSYEVR(V,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1710 + END IF + END IF +* +* Do tests 73 and 74 (or +54) +* + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL DSYT22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) + SRNAMT = 'DSYEVR_2STAGE' + CALL DSYEVR_2STAGE( 'N', 'I', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M3, WA3, Z, LDU, IWORK, + $ WORK, LWORK, IWORK(2*N+1), LIWORK-2*N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'DSYEVR_2STAGE(N,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1710 + END IF + END IF +* +* Do test 75 (or +54) +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, ULP*TEMP3 ) + 1710 CONTINUE +* + NTEST = NTEST + 1 + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) + SRNAMT = 'DSYEVR' + CALL DSYEVR( 'V', 'V', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, IWORK, WORK, LWORK, + $ IWORK(2*N+1), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSYEVR(V,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 700 + END IF + END IF +* +* Do tests 76 and 77 (or +54) +* + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL DSYT22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) + SRNAMT = 'DSYEVR_2STAGE' + CALL DSYEVR_2STAGE( 'N', 'V', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M3, WA3, Z, LDU, IWORK, + $ WORK, LWORK, IWORK(2*N+1), LIWORK-2*N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'DSYEVR_2STAGE(N,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 700 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.GT.0 ) THEN + RESULT( NTEST ) = ULPINV + GO TO 700 + END IF +* +* Do test 78 (or +54) +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) +* + 1720 CONTINUE +* +* End of Loop -- Check for RESULT(j) > THRESH +* + NTESTT = NTESTT + NTEST +* + CALL DLAFTS( 'DST', N, N, JTYPE, NTEST, RESULT, IOLDSD, + $ THRESH, NOUNIT, NERRS ) +* + 1730 CONTINUE + 1740 CONTINUE +* +* Summary +* + CALL ALASVM( 'DST', NOUNIT, NERRS, NTESTT, 0 ) +* + 9999 FORMAT( ' DDRVST2STG: ', A, ' returned INFO=', I6, '.', / 9X, + $ 'N=', I6, ', JTYPE=', I6, ', ISEED=(', 3( I5, ',' ), I5, ')' ) +* + RETURN +* +* End of DDRVST2STG +* + END diff --git a/TESTING/EIG/derrst.f b/TESTING/EIG/derrst.f index dfb3452e..9f149fe0 100644 --- a/TESTING/EIG/derrst.f +++ b/TESTING/EIG/derrst.f @@ -25,6 +25,10 @@ *> DOPGTR, DOPMTR, DSTEQR, SSTERF, SSTEBZ, SSTEIN, DPTEQR, DSBTRD, *> DSYEV, SSYEVX, SSYEVD, DSBEV, SSBEVX, SSBEVD, *> DSPEV, SSPEVX, SSPEVD, DSTEV, SSTEVX, SSTEVD, and SSTEDC. +*> DSYEVD_2STAGE, DSYEVR_2STAGE, DSYEVX_2STAGE, +*> DSYEV_2STAGE, DSBEV_2STAGE, DSBEVD_2STAGE, +*> DSBEVX_2STAGE, DSYTRD_2STAGE, DSYTRD_SY2SB, +*> DSYTRD_SB2ST *> \endverbatim * * Arguments: @@ -94,7 +98,11 @@ $ DSBEV, DSBEVD, DSBEVX, DSBTRD, DSPEV, DSPEVD, $ DSPEVX, DSPTRD, DSTEBZ, DSTEDC, DSTEIN, DSTEQR, $ DSTERF, DSTEV, DSTEVD, DSTEVR, DSTEVX, DSYEV, - $ DSYEVD, DSYEVR, DSYEVX, DSYTRD + $ DSYEVD, DSYEVR, DSYEVX, DSYTRD, + $ DSYEVD_2STAGE, DSYEVR_2STAGE, DSYEVX_2STAGE, + $ DSYEV_2STAGE, DSBEV_2STAGE, DSBEVD_2STAGE, + $ DSBEVX_2STAGE, DSYTRD_2STAGE, DSYTRD_SY2SB, + $ DSYTRD_SB2ST * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -152,6 +160,103 @@ CALL CHKXER( 'DSYTRD', INFOT, NOUT, LERR, OK ) NT = NT + 4 * +* DSYTRD_2STAGE +* + SRNAMT = 'DSYTRD_2STAGE' + INFOT = 1 + CALL DSYTRD_2STAGE( '/', 'U', 0, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL DSYTRD_2STAGE( 'H', 'U', 0, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DSYTRD_2STAGE( 'N', '/', 0, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DSYTRD_2STAGE( 'N', 'U', -1, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL DSYTRD_2STAGE( 'N', 'U', 2, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL DSYTRD_2STAGE( 'N', 'U', 0, A, 1, D, E, TAU, + $ C, 0, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 12 + CALL DSYTRD_2STAGE( 'N', 'U', 0, A, 1, D, E, TAU, + $ C, 1, W, 0, INFO ) + CALL CHKXER( 'DSYTRD_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 7 +* +* DSYTRD_SY2SB +* + SRNAMT = 'DSYTRD_SY2SB' + INFOT = 1 + CALL DSYTRD_SY2SB( '/', 0, 0, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SY2SB', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DSYTRD_SY2SB( 'U', -1, 0, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SY2SB', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DSYTRD_SY2SB( 'U', 0, -1, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SY2SB', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL DSYTRD_SY2SB( 'U', 2, 0, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SY2SB', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL DSYTRD_SY2SB( 'U', 0, 2, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SY2SB', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL DSYTRD_SY2SB( 'U', 0, 0, A, 1, C, 1, TAU, W, 0, INFO ) + CALL CHKXER( 'DSYTRD_SY2SB', INFOT, NOUT, LERR, OK ) + NT = NT + 6 +* +* DSYTRD_SB2ST +* + SRNAMT = 'DSYTRD_SB2ST' + INFOT = 1 + CALL DSYTRD_SB2ST( '/', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DSYTRD_SB2ST( 'Y', '/', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DSYTRD_SB2ST( 'Y', 'H', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DSYTRD_SB2ST( 'Y', 'N', '/', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL DSYTRD_SB2ST( 'Y', 'N', 'U', -1, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL DSYTRD_SB2ST( 'Y', 'N', 'U', 0, -1, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL DSYTRD_SB2ST( 'Y', 'N', 'U', 0, 1, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL DSYTRD_SB2ST( 'Y', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 0, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL DSYTRD_SB2ST( 'Y', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 0, INFO ) + CALL CHKXER( 'DSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + NT = NT + 9 +* * DORGTR * SRNAMT = 'DORGTR' @@ -536,6 +641,44 @@ CALL CHKXER( 'DSYEVD', INFOT, NOUT, LERR, OK ) NT = NT + 10 * +* DSYEVD_2STAGE +* + SRNAMT = 'DSYEVD_2STAGE' + INFOT = 1 + CALL DSYEVD_2STAGE( '/', 'U', 0, A, 1, X, W, 1, IW, 1, INFO ) + CALL CHKXER( 'DSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL DSYEVD_2STAGE( 'V', 'U', 0, A, 1, X, W, 1, IW, 1, INFO ) + CALL CHKXER( 'DSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DSYEVD_2STAGE( 'N', '/', 0, A, 1, X, W, 1, IW, 1, INFO ) + CALL CHKXER( 'DSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DSYEVD_2STAGE( 'N', 'U', -1, A, 1, X, W, 1, IW, 1, INFO ) + CALL CHKXER( 'DSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL DSYEVD_2STAGE( 'N', 'U', 2, A, 1, X, W, 3, IW, 1, INFO ) + CALL CHKXER( 'DSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DSYEVD_2STAGE( 'N', 'U', 1, A, 1, X, W, 0, IW, 1, INFO ) + CALL CHKXER( 'DSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DSYEVD_2STAGE( 'N', 'U', 2, A, 2, X, W, 4, IW, 1, INFO ) + CALL CHKXER( 'DSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 8 +* CALL DSYEVD_2STAGE( 'V', 'U', 2, A, 2, X, W, 20, IW, 12, INFO ) +* CALL CHKXER( 'DSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL DSYEVD_2STAGE( 'N', 'U', 1, A, 1, X, W, 1, IW, 0, INFO ) + CALL CHKXER( 'DSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL DSYEVD_2STAGE( 'N', 'U', 2, A, 2, X, W, 25, IW, 0, INFO ) + CALL CHKXER( 'DSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 10 +* CALL DSYEVD_2STAGE( 'V', 'U', 2, A, 2, X, W, 27, IW, 11, INFO ) +* CALL CHKXER( 'DSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 9 +* * DSYEVR * SRNAMT = 'DSYEVR' @@ -591,6 +734,74 @@ CALL CHKXER( 'DSYEVR', INFOT, NOUT, LERR, OK ) NT = NT + 11 * +* DSYEVR_2STAGE +* + SRNAMT = 'DSYEVR_2STAGE' + N = 1 + INFOT = 1 + CALL DSYEVR_2STAGE( '/', 'A', 'U', 0, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'DSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL DSYEVR_2STAGE( 'V', 'A', 'U', 0, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'DSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DSYEVR_2STAGE( 'N', '/', 'U', 0, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'DSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DSYEVR_2STAGE( 'N', 'A', '/', -1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'DSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL DSYEVR_2STAGE( 'N', 'A', 'U', -1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'DSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL DSYEVR_2STAGE( 'N', 'A', 'U', 2, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'DSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DSYEVR_2STAGE( 'N', 'V', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'DSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 9 + CALL DSYEVR_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 0, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'DSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL DSYEVR_2STAGE( 'N', 'I', 'U', 2, A, 2, + $ 0.0D0, 0.0D0, 2, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'DSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 15 + CALL DSYEVR_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 0, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'DSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 18 + CALL DSYEVR_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 0, IW( 2*N+1 ), 10*N, + $ INFO ) + CALL CHKXER( 'DSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 20 + CALL DSYEVR_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 0, + $ INFO ) + CALL CHKXER( 'DSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 12 +* * DSYEV * SRNAMT = 'DSYEV ' @@ -611,6 +822,29 @@ CALL CHKXER( 'DSYEV ', INFOT, NOUT, LERR, OK ) NT = NT + 5 * +* DSYEV_2STAGE +* + SRNAMT = 'DSYEV_2STAGE ' + INFOT = 1 + CALL DSYEV_2STAGE( '/', 'U', 0, A, 1, X, W, 1, INFO ) + CALL CHKXER( 'DSYEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL DSYEV_2STAGE( 'V', 'U', 0, A, 1, X, W, 1, INFO ) + CALL CHKXER( 'DSYEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DSYEV_2STAGE( 'N', '/', 0, A, 1, X, W, 1, INFO ) + CALL CHKXER( 'DSYEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DSYEV_2STAGE( 'N', 'U', -1, A, 1, X, W, 1, INFO ) + CALL CHKXER( 'DSYEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL DSYEV_2STAGE( 'N', 'U', 2, A, 1, X, W, 3, INFO ) + CALL CHKXER( 'DSYEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DSYEV_2STAGE( 'N', 'U', 1, A, 1, X, W, 1, INFO ) + CALL CHKXER( 'DSYEV_2STAGE ', INFOT, NOUT, LERR, OK ) + NT = NT + 6 +* * DSYEVX * SRNAMT = 'DSYEVX' @@ -663,6 +897,75 @@ CALL CHKXER( 'DSYEVX', INFOT, NOUT, LERR, OK ) NT = NT + 12 * +* DSYEVX_2STAGE +* + SRNAMT = 'DSYEVX_2STAGE' + INFOT = 1 + CALL DSYEVX_2STAGE( '/', 'A', 'U', 0, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 1, IW, I3, INFO ) + CALL CHKXER( 'DSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL DSYEVX_2STAGE( 'V', 'A', 'U', 0, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 1, IW, I3, INFO ) + CALL CHKXER( 'DSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DSYEVX_2STAGE( 'N', '/', 'U', 0, A, 1, + $ 0.0D0, 1.0D0, 1, 0, 0.0D0, + $ M, X, Z, 1, W, 1, IW, I3, INFO ) + CALL CHKXER( 'DSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DSYEVX_2STAGE( 'N', 'A', '/', 0, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 1, IW, I3, INFO ) + INFOT = 4 + CALL DSYEVX_2STAGE( 'N', 'A', 'U', -1, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 1, IW, I3, INFO ) + CALL CHKXER( 'DSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL DSYEVX_2STAGE( 'N', 'A', 'U', 2, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 16, IW, I3, INFO ) + CALL CHKXER( 'DSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL DSYEVX_2STAGE( 'N', 'V', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 8, IW, I3, INFO ) + CALL CHKXER( 'DSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 9 + CALL DSYEVX_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 8, IW, I3, INFO ) + CALL CHKXER( 'DSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 9 + CALL DSYEVX_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 2, 1, 0.0D0, + $ M, X, Z, 1, W, 8, IW, I3, INFO ) + CALL CHKXER( 'DSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL DSYEVX_2STAGE( 'N', 'I', 'U', 2, A, 2, + $ 0.0D0, 0.0D0, 2, 1, 0.0D0, + $ M, X, Z, 1, W, 16, IW, I3, INFO ) + CALL CHKXER( 'DSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL DSYEVX_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 1, 2, 0.0D0, + $ M, X, Z, 1, W, 8, IW, I3, INFO ) + CALL CHKXER( 'DSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 15 + CALL DSYEVX_2STAGE( 'N', 'A', 'U', 2, A, 2, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 0, W, 16, IW, I3, INFO ) + CALL CHKXER( 'DSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 17 + CALL DSYEVX_2STAGE( 'N', 'A', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'DSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 13 +* * DSPEVD * SRNAMT = 'DSPEVD' @@ -786,6 +1089,47 @@ CALL CHKXER( 'DSBTRD', INFOT, NOUT, LERR, OK ) NT = NT + 6 * +* DSYTRD_SB2ST +* + SRNAMT = 'DSYTRD_SB2ST' + INFOT = 1 + CALL DSYTRD_SB2ST( '/', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DSYTRD_SB2ST( 'N', '/', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DSYTRD_SB2ST( 'N', 'H', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DSYTRD_SB2ST( 'N', 'N', '/', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL DSYTRD_SB2ST( 'N', 'N', 'U', -1, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL DSYTRD_SB2ST( 'N', 'N', 'U', 0, -1, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL DSYTRD_SB2ST( 'N', 'N', 'U', 0, 1, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL DSYTRD_SB2ST( 'N', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 0, W, 1, INFO ) + CALL CHKXER( 'DSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL DSYTRD_SB2ST( 'N', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 0, INFO ) + CALL CHKXER( 'DSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + NT = NT + 9 +* * DSBEVD * SRNAMT = 'DSBEVD' @@ -829,6 +1173,60 @@ CALL CHKXER( 'DSBEVD', INFOT, NOUT, LERR, OK ) NT = NT + 11 * +* DSBEVD_2STAGE +* + SRNAMT = 'DSBEVD_2STAGE' + INFOT = 1 + CALL DSBEVD_2STAGE( '/', 'U', 0, 0, A, 1, X, Z, 1, W, + $ 1, IW, 1, INFO ) + CALL CHKXER( 'DSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL DSBEVD_2STAGE( 'V', 'U', 0, 0, A, 1, X, Z, 1, W, + $ 1, IW, 1, INFO ) + CALL CHKXER( 'DSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DSBEVD_2STAGE( 'N', '/', 0, 0, A, 1, X, Z, 1, W, + $ 1, IW, 1, INFO ) + CALL CHKXER( 'DSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DSBEVD_2STAGE( 'N', 'U', -1, 0, A, 1, X, Z, 1, W, + $ 1, IW, 1, INFO ) + CALL CHKXER( 'DSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL DSBEVD_2STAGE( 'N', 'U', 0, -1, A, 1, X, Z, 1, W, + $ 1, IW, 1, INFO ) + CALL CHKXER( 'DSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL DSBEVD_2STAGE( 'N', 'U', 2, 1, A, 1, X, Z, 1, W, + $ 4, IW, 1, INFO ) + CALL CHKXER( 'DSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 9 +* CALL DSBEVD_2STAGE( 'V', 'U', 2, 1, A, 2, X, Z, 1, W, +* $ 25, IW, 12, INFO ) +* CALL CHKXER( 'DSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL DSBEVD_2STAGE( 'N', 'U', 1, 0, A, 1, X, Z, 1, W, + $ 0, IW, 1, INFO ) + CALL CHKXER( 'DSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL DSBEVD_2STAGE( 'N', 'U', 2, 0, A, 1, X, Z, 1, W, + $ 3, IW, 1, INFO ) + CALL CHKXER( 'DSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 11 +* CALL DSBEVD_2STAGE( 'V', 'U', 2, 0, A, 1, X, Z, 2, W, +* $ 18, IW, 12, INFO ) +* CALL CHKXER( 'DSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL DSBEVD_2STAGE( 'N', 'U', 1, 0, A, 1, X, Z, 1, W, + $ 1, IW, 0, INFO ) + CALL CHKXER( 'DSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 13 +* CALL DSBEVD_2STAGE( 'V', 'U', 2, 0, A, 1, X, Z, 2, W, +* $ 25, IW, 11, INFO ) +* CALL CHKXER( 'DSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* NT = NT + 12 + NT = NT + 9 +* * DSBEV * SRNAMT = 'DSBEV ' @@ -852,6 +1250,35 @@ CALL CHKXER( 'DSBEV ', INFOT, NOUT, LERR, OK ) NT = NT + 6 * +* DSBEV_2STAGE +* + SRNAMT = 'DSBEV_2STAGE ' + INFOT = 1 + CALL DSBEV_2STAGE( '/', 'U', 0, 0, A, 1, X, Z, 1, W, 0, INFO ) + CALL CHKXER( 'DSBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL DSBEV_2STAGE( 'V', 'U', 0, 0, A, 1, X, Z, 1, W, 0, INFO ) + CALL CHKXER( 'DSBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DSBEV_2STAGE( 'N', '/', 0, 0, A, 1, X, Z, 1, W, 0, INFO ) + CALL CHKXER( 'DSBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DSBEV_2STAGE( 'N', 'U', -1, 0, A, 1, X, Z, 1, W, 0, INFO ) + CALL CHKXER( 'DSBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL DSBEV_2STAGE( 'N', 'U', 0, -1, A, 1, X, Z, 1, W, 0, INFO ) + CALL CHKXER( 'DSBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL DSBEV_2STAGE( 'N', 'U', 2, 1, A, 1, X, Z, 1, W, 0, INFO ) + CALL CHKXER( 'DSBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 9 + CALL DSBEV_2STAGE( 'N', 'U', 2, 0, A, 1, X, Z, 0, W, 0, INFO ) + CALL CHKXER( 'DSBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL DSBEV_2STAGE( 'N', 'U', 0, 0, A, 1, X, Z, 1, W, 0, INFO ) + CALL CHKXER( 'DSBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + NT = NT + 8 +* * DSBEVX * SRNAMT = 'DSBEVX' @@ -866,6 +1293,7 @@ INFOT = 3 CALL DSBEVX( 'N', 'A', '/', 0, 0, A, 1, Q, 1, 0.0D0, 0.0D0, 0, $ 0, 0.0D0, M, X, Z, 1, W, IW, I3, INFO ) + CALL CHKXER( 'DSBEVX', INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSBEVX( 'N', 'A', 'U', -1, 0, A, 1, Q, 1, 0.0D0, 0.0D0, 0, $ 0, 0.0D0, M, X, Z, 1, W, IW, I3, INFO ) @@ -907,6 +1335,72 @@ $ 0, 0.0D0, M, X, Z, 1, W, IW, I3, INFO ) CALL CHKXER( 'DSBEVX', INFOT, NOUT, LERR, OK ) NT = NT + 13 +* +* DSBEVX_2STAGE +* + SRNAMT = 'DSBEVX_2STAGE' + INFOT = 1 + CALL DSBEVX_2STAGE( '/', 'A', 'U', 0, 0, A, 1, Q, 1, 0.0D0, + $ 0.0D0, 0, 0, 0.0D0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'DSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL DSBEVX_2STAGE( 'V', 'A', 'U', 0, 0, A, 1, Q, 1, 0.0D0, + $ 0.0D0, 0, 0, 0.0D0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'DSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL DSBEVX_2STAGE( 'N', '/', 'U', 0, 0, A, 1, Q, 1, 0.0D0, + $ 0.0D0, 0, 0, 0.0D0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'DSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL DSBEVX_2STAGE( 'N', 'A', '/', 0, 0, A, 1, Q, 1, 0.0D0, + $ 0.0D0, 0, 0, 0.0D0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'DSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL DSBEVX_2STAGE( 'N', 'A', 'U', -1, 0, A, 1, Q, 1, 0.0D0, + $ 0.0D0, 0, 0, 0.0D0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'DSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL DSBEVX_2STAGE( 'N', 'A', 'U', 0, -1, A, 1, Q, 1, 0.0D0, + $ 0.0D0, 0, 0, 0.0D0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'DSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL DSBEVX_2STAGE( 'N', 'A', 'U', 2, 1, A, 1, Q, 1, 0.0D0, + $ 0.0D0, 0, 0, 0.0D0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'DSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 9 +* CALL DSBEVX_2STAGE( 'V', 'A', 'U', 2, 0, A, 1, Q, 1, 0.0D0, +* $ 0.0D0, 0, 0, 0.0D0, M, X, Z, 2, W, 0, IW, I3, INFO ) +* CALL CHKXER( 'DSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL DSBEVX_2STAGE( 'N', 'V', 'U', 1, 0, A, 1, Q, 1, 0.0D0, + $ 0.0D0, 0, 0, 0.0D0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'DSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 12 + CALL DSBEVX_2STAGE( 'N', 'I', 'U', 1, 0, A, 1, Q, 1, 0.0D0, + $ 0.0D0, 0, 0, 0.0D0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'DSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 12 + CALL DSBEVX_2STAGE( 'N', 'I', 'U', 1, 0, A, 1, Q, 1, 0.0D0, + $ 0.0D0, 2, 1, 0.0D0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'DSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL DSBEVX_2STAGE( 'N', 'I', 'U', 2, 0, A, 1, Q, 1, 0.0D0, + $ 0.0D0, 2, 1, 0.0D0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'DSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL DSBEVX_2STAGE( 'N', 'I', 'U', 1, 0, A, 1, Q, 1, 0.0D0, + $ 0.0D0, 1, 2, 0.0D0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'DSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 18 +* CALL DSBEVX_2STAGE( 'V', 'A', 'U', 2, 0, A, 1, Q, 2, 0.0D0, +* $ 0.0D0, 0, 0, 0.0D0, M, X, Z, 1, W, 0, IW, I3, INFO ) +* CALL CHKXER( 'DSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 20 + CALL DSBEVX_2STAGE( 'N', 'A', 'U', 0, 0, A, 1, Q, 1, 0.0D0, + $ 0.0D0, 0, 0, 0.0D0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'DSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) +* NT = NT + 15 + NT = NT + 13 END IF * * Print a summary line. diff --git a/TESTING/EIG/ilaenv.f b/TESTING/EIG/ilaenv.f index 90f80077..6fca6fcb 100644 --- a/TESTING/EIG/ilaenv.f +++ b/TESTING/EIG/ilaenv.f @@ -229,6 +229,16 @@ C ILAENV = 0 * WRITE(*,*) 'ISPEC = ',ISPEC,' ILAENV =',ILAENV * ILAENV = IPARMQ( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) * + ELSE IF(( ISPEC.GE.17 ) .AND. (ISPEC.LE.21)) THEN +* +* 17 <= ISPEC <= 21: 2stage eigenvalues SVD routines. +* + IF( ISPEC.EQ.17 ) THEN + ILAENV = IPARMS( 1 ) + ELSE + ILAENV = IPARAM2STAGE( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) + ENDIF +* ELSE * * Invalid value for ISPEC diff --git a/TESTING/EIG/schkee.f b/TESTING/EIG/schkee.f index 99d717e0..7651c0a3 100644 --- a/TESTING/EIG/schkee.f +++ b/TESTING/EIG/schkee.f @@ -1106,7 +1106,8 @@ $ SDRGEV, SDRGSX, SDRGVX, SDRVBD, SDRVES, SDRVEV, $ SDRVSG, SDRVST, SDRVSX, SDRVVX, SERRBD, $ SERRED, SERRGG, SERRHS, SERRST, ILAVER, XLAENV, - $ SDRGES3, SDRGEV3 + $ SDRGES3, SDRGEV3, + $ SCHKST2STG, SDRVST2STG, SCHKSB2STG, SDRVSG2STG * .. * .. Intrinsic Functions .. INTRINSIC LEN, MIN @@ -1153,7 +1154,8 @@ PATH = LINE( 1: 3 ) NEP = LSAMEN( 3, PATH, 'NEP' ) .OR. LSAMEN( 3, PATH, 'SHS' ) SEP = LSAMEN( 3, PATH, 'SEP' ) .OR. LSAMEN( 3, PATH, 'SST' ) .OR. - $ LSAMEN( 3, PATH, 'SSG' ) + $ LSAMEN( 3, PATH, 'SSG' ) .OR. LSAMEN( 3, PATH, 'SE2' ) + SVD = LSAMEN( 3, PATH, 'SVD' ) .OR. LSAMEN( 3, PATH, 'DBD' ) SVD = LSAMEN( 3, PATH, 'SVD' ) .OR. LSAMEN( 3, PATH, 'SBD' ) SEV = LSAMEN( 3, PATH, 'SEV' ) SES = LSAMEN( 3, PATH, 'SES' ) @@ -1839,7 +1841,8 @@ $ WRITE( NOUT, FMT = 9980 )'SCHKHS', INFO 270 CONTINUE * - ELSE IF( LSAMEN( 3, C3, 'SST' ) .OR. LSAMEN( 3, C3, 'SEP' ) ) THEN + ELSE IF( LSAMEN( 3, C3, 'SST' ) .OR. LSAMEN( 3, C3, 'SEP' ) + $ .OR. LSAMEN( 3, C3, 'SE2' ) ) THEN * * ---------------------------------- * SEP: Symmetric Eigenvalue Problem @@ -1869,6 +1872,15 @@ WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), $ NXVAL( I ) IF( TSTCHK ) THEN + IF( LSAMEN( 3, C3, 'SE2' ) ) THEN + CALL SCHKST2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), + $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), + $ D( 1, 6 ), D( 1, 7 ), D( 1, 8 ), D( 1, 9 ), + $ D( 1, 10 ), D( 1, 11 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), D( 1, 12 ), A( 1, 6 ), + $ WORK, LWORK, IWORK, LIWORK, RESULT, INFO ) + ELSE CALL SCHKST( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), D( 1, 1 ), $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), @@ -1876,16 +1888,26 @@ $ D( 1, 10 ), D( 1, 11 ), A( 1, 3 ), NMAX, $ A( 1, 4 ), A( 1, 5 ), D( 1, 12 ), A( 1, 6 ), $ WORK, LWORK, IWORK, LIWORK, RESULT, INFO ) + ENDIF IF( INFO.NE.0 ) $ WRITE( NOUT, FMT = 9980 )'SCHKST', INFO END IF IF( TSTDRV ) THEN + IF( LSAMEN( 3, C3, 'SE2' ) ) THEN + CALL SDRVST2STG( NN, NVAL, 18, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, D( 1, 3 ), D( 1, 4 ), + $ D( 1, 5 ), D( 1, 6 ), D( 1, 8 ), D( 1, 9 ), + $ D( 1, 10 ), D( 1, 11), A( 1, 2 ), NMAX, + $ A( 1, 3 ), D( 1, 12 ), A( 1, 4 ), WORK, + $ LWORK, IWORK, LIWORK, RESULT, INFO ) + ELSE CALL SDRVST( NN, NVAL, 18, DOTYPE, ISEED, THRESH, $ NOUT, A( 1, 1 ), NMAX, D( 1, 3 ), D( 1, 4 ), $ D( 1, 5 ), D( 1, 6 ), D( 1, 8 ), D( 1, 9 ), $ D( 1, 10 ), D( 1, 11), A( 1, 2 ), NMAX, $ A( 1, 3 ), D( 1, 12 ), A( 1, 4 ), WORK, $ LWORK, IWORK, LIWORK, RESULT, INFO ) + ENDIF IF( INFO.NE.0 ) $ WRITE( NOUT, FMT = 9980 )'SDRVST', INFO END IF @@ -1918,11 +1940,17 @@ WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), $ NXVAL( I ) IF( TSTCHK ) THEN - CALL SDRVSG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, - $ D( 1, 3 ), A( 1, 3 ), NMAX, A( 1, 4 ), - $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), WORK, - $ LWORK, IWORK, LIWORK, RESULT, INFO ) +* CALL SDRVSG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, +* $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, +* $ D( 1, 3 ), A( 1, 3 ), NMAX, A( 1, 4 ), +* $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), WORK, +* $ LWORK, IWORK, LIWORK, RESULT, INFO ) + CALL SDRVSG2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, + $ D( 1, 3 ), D( 1, 3 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), A( 1, 6 ), + $ A( 1, 7 ), WORK, LWORK, IWORK, LIWORK, + $ RESULT, INFO ) IF( INFO.NE.0 ) $ WRITE( NOUT, FMT = 9980 )'SDRVSG', INFO END IF @@ -2284,9 +2312,13 @@ CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) IF( TSTERR ) $ CALL SERRST( 'SSB', NOUT ) - CALL SCHKSB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, D( 1, 1 ), D( 1, 2 ), - $ A( 1, 2 ), NMAX, WORK, LWORK, RESULT, INFO ) +* CALL SCHKSB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH, +* $ NOUT, A( 1, 1 ), NMAX, D( 1, 1 ), D( 1, 2 ), +* $ A( 1, 2 ), NMAX, WORK, LWORK, RESULT, INFO ) + CALL SCHKSB2STG( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, + $ THRESH, NOUT, A( 1, 1 ), NMAX, D( 1, 1 ), + $ D( 1, 2 ), D( 1, 3 ), D( 1, 4 ), D( 1, 5 ), + $ A( 1, 2 ), NMAX, WORK, LWORK, RESULT, INFO ) IF( INFO.NE.0 ) $ WRITE( NOUT, FMT = 9980 )'SCHKSB', INFO * diff --git a/TESTING/EIG/schksb2stg.f b/TESTING/EIG/schksb2stg.f new file mode 100644 index 00000000..02163695 --- /dev/null +++ b/TESTING/EIG/schksb2stg.f @@ -0,0 +1,870 @@ +*> \brief \b SCHKSBSTG +* +* @generated from dchksb2stg.f, fortran d -> s, Sun Nov 6 00:12:41 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE SCHKSB2STG( NSIZES, NN, NWDTHS, KK, NTYPES, DOTYPE, +* ISEED, THRESH, NOUNIT, A, LDA, SD, SE, D1, +* D2, D3, U, LDU, WORK, LWORK, RESULT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDU, LWORK, NOUNIT, NSIZES, NTYPES, +* $ NWDTHS +* REAL THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER ISEED( 4 ), KK( * ), NN( * ) +* REAL A( LDA, * ), RESULT( * ), SD( * ), SE( * ), +* $ U( LDU, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SCHKSBSTG tests the reduction of a symmetric band matrix to tridiagonal +*> form, used with the symmetric eigenvalue problem. +*> +*> SSBTRD factors a symmetric band matrix A as U S U' , where ' means +*> transpose, S is symmetric tridiagonal, and U is orthogonal. +*> SSBTRD can use either just the lower or just the upper triangle +*> of A; SCHKSBSTG checks both cases. +*> +*> SSYTRD_SB2ST factors a symmetric band matrix A as U S U' , +*> where ' means transpose, S is symmetric tridiagonal, and U is +*> orthogonal. SSYTRD_SB2ST can use either just the lower or just +*> the upper triangle of A; SCHKSBSTG checks both cases. +*> +*> SSTEQR factors S as Z D1 Z'. +*> D1 is the matrix of eigenvalues computed when Z is not computed +*> and from the S resulting of SSBTRD "U" (used as reference for SSYTRD_SB2ST) +*> D2 is the matrix of eigenvalues computed when Z is not computed +*> and from the S resulting of SSYTRD_SB2ST "U". +*> D3 is the matrix of eigenvalues computed when Z is not computed +*> and from the S resulting of SSYTRD_SB2ST "L". +*> +*> When SCHKSBSTG is called, a number of matrix "sizes" ("n's"), a number +*> of bandwidths ("k's"), and a number of matrix "types" are +*> specified. For each size ("n"), each bandwidth ("k") less than or +*> equal to "n", and each type of matrix, one matrix will be generated +*> and used to test the symmetric banded reduction routine. For each +*> matrix, a number of tests will be performed: +*> +*> (1) | A - V S V' | / ( |A| n ulp ) computed by SSBTRD with +*> UPLO='U' +*> +*> (2) | I - UU' | / ( n ulp ) +*> +*> (3) | A - V S V' | / ( |A| n ulp ) computed by SSBTRD with +*> UPLO='L' +*> +*> (4) | I - UU' | / ( n ulp ) +*> +*> (5) | D1 - D2 | / ( |D1| ulp ) where D1 is computed by +*> SSBTRD with UPLO='U' and +*> D2 is computed by +*> SSYTRD_SB2ST with UPLO='U' +*> +*> (6) | D1 - D3 | / ( |D1| ulp ) where D1 is computed by +*> SSBTRD with UPLO='U' and +*> D3 is computed by +*> SSYTRD_SB2ST with UPLO='L' +*> +*> The "sizes" are specified by an array NN(1:NSIZES); the value of +*> each element NN(j) specifies one size. +*> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); +*> if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. +*> Currently, the list of possible types is: +*> +*> (1) The zero matrix. +*> (2) The identity matrix. +*> +*> (3) A diagonal matrix with evenly spaced entries +*> 1, ..., ULP and random signs. +*> (ULP = (first number larger than 1) - 1 ) +*> (4) A diagonal matrix with geometrically spaced entries +*> 1, ..., ULP and random signs. +*> (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP +*> and random signs. +*> +*> (6) Same as (4), but multiplied by SQRT( overflow threshold ) +*> (7) Same as (4), but multiplied by SQRT( underflow threshold ) +*> +*> (8) A matrix of the form U' D U, where U is orthogonal and +*> D has evenly spaced entries 1, ..., ULP with random signs +*> on the diagonal. +*> +*> (9) A matrix of the form U' D U, where U is orthogonal and +*> D has geometrically spaced entries 1, ..., ULP with random +*> signs on the diagonal. +*> +*> (10) A matrix of the form U' D U, where U is orthogonal and +*> D has "clustered" entries 1, ULP,..., ULP with random +*> signs on the diagonal. +*> +*> (11) Same as (8), but multiplied by SQRT( overflow threshold ) +*> (12) Same as (8), but multiplied by SQRT( underflow threshold ) +*> +*> (13) Symmetric matrix with random entries chosen from (-1,1). +*> (14) Same as (13), but multiplied by SQRT( overflow threshold ) +*> (15) Same as (13), but multiplied by SQRT( underflow threshold ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NSIZES +*> \verbatim +*> NSIZES is INTEGER +*> The number of sizes of matrices to use. If it is zero, +*> SCHKSBSTG does nothing. It must be at least zero. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER array, dimension (NSIZES) +*> An array containing the sizes to be used for the matrices. +*> Zero values will be skipped. The values must be at least +*> zero. +*> \endverbatim +*> +*> \param[in] NWDTHS +*> \verbatim +*> NWDTHS is INTEGER +*> The number of bandwidths to use. If it is zero, +*> SCHKSBSTG does nothing. It must be at least zero. +*> \endverbatim +*> +*> \param[in] KK +*> \verbatim +*> KK is INTEGER array, dimension (NWDTHS) +*> An array containing the bandwidths to be used for the band +*> matrices. The values must be at least zero. +*> \endverbatim +*> +*> \param[in] NTYPES +*> \verbatim +*> NTYPES is INTEGER +*> The number of elements in DOTYPE. If it is zero, SCHKSBSTG +*> does nothing. It must be at least zero. If it is MAXTYP+1 +*> and NSIZES is 1, then an additional type, MAXTYP+1 is +*> defined, which is to use whatever matrix is in A. This +*> is only useful if DOTYPE(1:MAXTYP) is .FALSE. and +*> DOTYPE(MAXTYP+1) is .TRUE. . +*> \endverbatim +*> +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> If DOTYPE(j) is .TRUE., then for each size in NN a +*> matrix of that size and of type j will be generated. +*> If NTYPES is smaller than the maximum number of types +*> defined (PARAMETER MAXTYP), then types NTYPES+1 through +*> MAXTYP will not be generated. If NTYPES is larger +*> than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) +*> will be ignored. +*> \endverbatim +*> +*> \param[in,out] ISEED +*> \verbatim +*> ISEED is INTEGER array, dimension (4) +*> On entry ISEED specifies the seed of the random number +*> generator. The array elements should be between 0 and 4095; +*> if not they will be reduced mod 4096. Also, ISEED(4) must +*> be odd. The random number generator uses a linear +*> congruential sequence limited to small integers, and so +*> should produce machine independent random numbers. The +*> values of ISEED are changed on exit, and can be used in the +*> next call to SCHKSBSTG to continue the same random number +*> sequence. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is REAL +*> A test will count as "failed" if the "error", computed as +*> described above, exceeds THRESH. Note that the error +*> is scaled to be O(1), so THRESH should be a reasonably +*> small multiple of 1, e.g., 10 or 100. In particular, +*> it should not depend on the precision (single vs. double) +*> or the size of the matrix. It must be at least zero. +*> \endverbatim +*> +*> \param[in] NOUNIT +*> \verbatim +*> NOUNIT is INTEGER +*> The FORTRAN unit number for printing out error messages +*> (e.g., if a routine returns IINFO not equal to 0.) +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension +*> (LDA, max(NN)) +*> Used to hold the matrix whose eigenvalues are to be +*> computed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. It must be at least 2 (not 1!) +*> and at least max( KK )+1. +*> \endverbatim +*> +*> \param[out] SD +*> \verbatim +*> SD is REAL array, dimension (max(NN)) +*> Used to hold the diagonal of the tridiagonal matrix computed +*> by SSBTRD. +*> \endverbatim +*> +*> \param[out] SE +*> \verbatim +*> SE is REAL array, dimension (max(NN)) +*> Used to hold the off-diagonal of the tridiagonal matrix +*> computed by SSBTRD. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is REAL array, dimension (LDU, max(NN)) +*> Used to hold the orthogonal matrix computed by SSBTRD. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of U. It must be at least 1 +*> and at least max( NN ). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The number of entries in WORK. This must be at least +*> max( LDA+1, max(NN)+1 )*max(NN). +*> \endverbatim +*> +*> \param[out] RESULT +*> \verbatim +*> RESULT is REAL array, dimension (4) +*> The values computed by the tests described above. +*> The values are currently limited to 1/ulp, to avoid +*> overflow. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> If 0, then everything ran OK. +*> +*>----------------------------------------------------------------------- +*> +*> Some Local Variables and Parameters: +*> ---- ----- --------- --- ---------- +*> ZERO, ONE Real 0 and 1. +*> MAXTYP The number of types defined. +*> NTEST The number of tests performed, or which can +*> be performed so far, for the current matrix. +*> NTESTT The total number of tests performed so far. +*> NMAX Largest value in NN. +*> NMATS The number of matrices generated so far. +*> NERRS The number of tests which have exceeded THRESH +*> so far. +*> COND, IMODE Values to be passed to the matrix generators. +*> ANORM Norm of A; passed to matrix generators. +*> +*> OVFL, UNFL Overflow and underflow thresholds. +*> ULP, ULPINV Finest relative precision and its inverse. +*> RTOVFL, RTUNFL Square roots of the previous 2 values. +*> The following four arrays decode JTYPE: +*> KTYPE(j) The general type (1-10) for type "j". +*> KMODE(j) The MODE value to be passed to the matrix +*> generator for type "j". +*> KMAGN(j) The order of magnitude ( O(1), +*> O(overflow^(1/2) ), O(underflow^(1/2) ) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup single_eig +* +* ===================================================================== + SUBROUTINE SCHKSB2STG( NSIZES, NN, NWDTHS, KK, NTYPES, DOTYPE, + $ ISEED, THRESH, NOUNIT, A, LDA, SD, SE, D1, + $ D2, D3, U, LDU, WORK, LWORK, RESULT, INFO ) +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDU, LWORK, NOUNIT, NSIZES, NTYPES, + $ NWDTHS + REAL THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER ISEED( 4 ), KK( * ), NN( * ) + REAL A( LDA, * ), RESULT( * ), SD( * ), SE( * ), + $ D1( * ), D2( * ), D3( * ), + $ U( LDU, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, TEN + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ TEN = 10.0E0 ) + REAL HALF + PARAMETER ( HALF = ONE / TWO ) + INTEGER MAXTYP + PARAMETER ( MAXTYP = 15 ) +* .. +* .. Local Scalars .. + LOGICAL BADNN, BADNNB + INTEGER I, IINFO, IMODE, ITYPE, J, JC, JCOL, JR, JSIZE, + $ JTYPE, JWIDTH, K, KMAX, MTYPES, N, NERRS, + $ NMATS, NMAX, NTEST, NTESTT + REAL ANINV, ANORM, COND, OVFL, RTOVFL, RTUNFL, + $ TEMP1, TEMP2, TEMP3, TEMP4, ULP, ULPINV, UNFL +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ), IOLDSD( 4 ), KMAGN( MAXTYP ), + $ KMODE( MAXTYP ), KTYPE( MAXTYP ) +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SLASET, SLASUM, SLATMR, SLATMS, SSBT21, + $ SSBTRD, XERBLA, SSBTRD_SB2ST, SSTEQR +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, MAX, MIN, SQRT +* .. +* .. Data statements .. + DATA KTYPE / 1, 2, 5*4, 5*5, 3*8 / + DATA KMAGN / 2*1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, + $ 2, 3 / + DATA KMODE / 2*0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, + $ 0, 0 / +* .. +* .. Executable Statements .. +* +* Check for errors +* + NTESTT = 0 + INFO = 0 +* +* Important constants +* + BADNN = .FALSE. + NMAX = 1 + DO 10 J = 1, NSIZES + NMAX = MAX( NMAX, NN( J ) ) + IF( NN( J ).LT.0 ) + $ BADNN = .TRUE. + 10 CONTINUE +* + BADNNB = .FALSE. + KMAX = 0 + DO 20 J = 1, NSIZES + KMAX = MAX( KMAX, KK( J ) ) + IF( KK( J ).LT.0 ) + $ BADNNB = .TRUE. + 20 CONTINUE + KMAX = MIN( NMAX-1, KMAX ) +* +* Check for errors +* + IF( NSIZES.LT.0 ) THEN + INFO = -1 + ELSE IF( BADNN ) THEN + INFO = -2 + ELSE IF( NWDTHS.LT.0 ) THEN + INFO = -3 + ELSE IF( BADNNB ) THEN + INFO = -4 + ELSE IF( NTYPES.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.KMAX+1 ) THEN + INFO = -11 + ELSE IF( LDU.LT.NMAX ) THEN + INFO = -15 + ELSE IF( ( MAX( LDA, NMAX )+1 )*NMAX.GT.LWORK ) THEN + INFO = -17 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SCHKSBSTG', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( NSIZES.EQ.0 .OR. NTYPES.EQ.0 .OR. NWDTHS.EQ.0 ) + $ RETURN +* +* More Important constants +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + ULP = SLAMCH( 'Epsilon' )*SLAMCH( 'Base' ) + ULPINV = ONE / ULP + RTUNFL = SQRT( UNFL ) + RTOVFL = SQRT( OVFL ) +* +* Loop over sizes, types +* + NERRS = 0 + NMATS = 0 +* + DO 190 JSIZE = 1, NSIZES + N = NN( JSIZE ) + ANINV = ONE / REAL( MAX( 1, N ) ) +* + DO 180 JWIDTH = 1, NWDTHS + K = KK( JWIDTH ) + IF( K.GT.N ) + $ GO TO 180 + K = MAX( 0, MIN( N-1, K ) ) +* + IF( NSIZES.NE.1 ) THEN + MTYPES = MIN( MAXTYP, NTYPES ) + ELSE + MTYPES = MIN( MAXTYP+1, NTYPES ) + END IF +* + DO 170 JTYPE = 1, MTYPES + IF( .NOT.DOTYPE( JTYPE ) ) + $ GO TO 170 + NMATS = NMATS + 1 + NTEST = 0 +* + DO 30 J = 1, 4 + IOLDSD( J ) = ISEED( J ) + 30 CONTINUE +* +* Compute "A". +* Store as "Upper"; later, we will copy to other format. +* +* Control parameters: +* +* KMAGN KMODE KTYPE +* =1 O(1) clustered 1 zero +* =2 large clustered 2 identity +* =3 small exponential (none) +* =4 arithmetic diagonal, (w/ eigenvalues) +* =5 random log symmetric, w/ eigenvalues +* =6 random (none) +* =7 random diagonal +* =8 random symmetric +* =9 positive definite +* =10 diagonally dominant tridiagonal +* + IF( MTYPES.GT.MAXTYP ) + $ GO TO 100 +* + ITYPE = KTYPE( JTYPE ) + IMODE = KMODE( JTYPE ) +* +* Compute norm +* + GO TO ( 40, 50, 60 )KMAGN( JTYPE ) +* + 40 CONTINUE + ANORM = ONE + GO TO 70 +* + 50 CONTINUE + ANORM = ( RTOVFL*ULP )*ANINV + GO TO 70 +* + 60 CONTINUE + ANORM = RTUNFL*N*ULPINV + GO TO 70 +* + 70 CONTINUE +* + CALL SLASET( 'Full', LDA, N, ZERO, ZERO, A, LDA ) + IINFO = 0 + IF( JTYPE.LE.15 ) THEN + COND = ULPINV + ELSE + COND = ULPINV*ANINV / TEN + END IF +* +* Special Matrices -- Identity & Jordan block +* +* Zero +* + IF( ITYPE.EQ.1 ) THEN + IINFO = 0 +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Identity +* + DO 80 JCOL = 1, N + A( K+1, JCOL ) = ANORM + 80 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Diagonal Matrix, [Eigen]values Specified +* + CALL SLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, 0, 0, 'Q', A( K+1, 1 ), LDA, + $ WORK( N+1 ), IINFO ) +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Symmetric, eigenvalues specified +* + CALL SLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, K, K, 'Q', A, LDA, WORK( N+1 ), + $ IINFO ) +* + ELSE IF( ITYPE.EQ.7 ) THEN +* +* Diagonal, random eigenvalues +* + CALL SLATMR( N, N, 'S', ISEED, 'S', WORK, 6, ONE, ONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, 0, 0, + $ ZERO, ANORM, 'Q', A( K+1, 1 ), LDA, + $ IDUMMA, IINFO ) +* + ELSE IF( ITYPE.EQ.8 ) THEN +* +* Symmetric, random eigenvalues +* + CALL SLATMR( N, N, 'S', ISEED, 'S', WORK, 6, ONE, ONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, K, K, + $ ZERO, ANORM, 'Q', A, LDA, IDUMMA, IINFO ) +* + ELSE IF( ITYPE.EQ.9 ) THEN +* +* Positive definite, eigenvalues specified. +* + CALL SLATMS( N, N, 'S', ISEED, 'P', WORK, IMODE, COND, + $ ANORM, K, K, 'Q', A, LDA, WORK( N+1 ), + $ IINFO ) +* + ELSE IF( ITYPE.EQ.10 ) THEN +* +* Positive definite tridiagonal, eigenvalues specified. +* + IF( N.GT.1 ) + $ K = MAX( 1, K ) + CALL SLATMS( N, N, 'S', ISEED, 'P', WORK, IMODE, COND, + $ ANORM, 1, 1, 'Q', A( K, 1 ), LDA, + $ WORK( N+1 ), IINFO ) + DO 90 I = 2, N + TEMP1 = ABS( A( K, I ) ) / + $ SQRT( ABS( A( K+1, I-1 )*A( K+1, I ) ) ) + IF( TEMP1.GT.HALF ) THEN + A( K, I ) = HALF*SQRT( ABS( A( K+1, + $ I-1 )*A( K+1, I ) ) ) + END IF + 90 CONTINUE +* + ELSE +* + IINFO = 1 + END IF +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'Generator', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + RETURN + END IF +* + 100 CONTINUE +* +* Call SSBTRD to compute S and U from upper triangle. +* + CALL SLACPY( ' ', K+1, N, A, LDA, WORK, LDA ) +* + NTEST = 1 + CALL SSBTRD( 'V', 'U', N, K, WORK, LDA, SD, SE, U, LDU, + $ WORK( LDA*N+1 ), IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSBTRD(U)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 1 ) = ULPINV + GO TO 150 + END IF + END IF +* +* Do tests 1 and 2 +* + CALL SSBT21( 'Upper', N, K, 1, A, LDA, SD, SE, U, LDU, + $ WORK, RESULT( 1 ) ) +* +* Before converting A into lower for SSBTRD, run SSYTRD_SB2ST +* otherwise matrix A will be converted to lower and then need +* to be converted back to upper in order to run the upper case +* ofSSYTRD_SB2ST +* +* Compute D1 the eigenvalues resulting from the tridiagonal +* form using the SSBTRD and used as reference to compare +* with the SSYTRD_SB2ST routine +* +* Compute D1 from the SSBTRD and used as reference for the +* SSYTRD_SB2ST +* + CALL SCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) +* + CALL SSTEQR( 'N', N, D1, WORK, WORK( N+1 ), LDU, + $ WORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEQR(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 5 ) = ULPINV + GO TO 150 + END IF + END IF +* +* SSYTRD_SB2ST Upper case is used to compute D2. +* Note to set SD and SE to zero to be sure not reusing +* the one from above. Compare it with D1 computed +* using the SSBTRD. +* + CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) + CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL SLACPY( ' ', K+1, N, A, LDA, U, LDU ) + LH = MAX(1, 4*N) + LW = LWORK - LH + CALL SSYTRD_SB2ST( 'N', 'N', "U", N, K, U, LDU, SD, SE, + $ WORK, LH, WORK( LH+1 ), LW, IINFO ) +* +* Compute D2 from the SSYTRD_SB2ST Upper case +* + CALL SCOPY( N, SD, 1, D2, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) +* + CALL SSTEQR( 'N', N, D2, WORK, WORK( N+1 ), LDU, + $ WORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEQR(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 5 ) = ULPINV + GO TO 150 + END IF + END IF +* +* Convert A from Upper-Triangle-Only storage to +* Lower-Triangle-Only storage. +* + DO 120 JC = 1, N + DO 110 JR = 0, MIN( K, N-JC ) + A( JR+1, JC ) = A( K+1-JR, JC+JR ) + 110 CONTINUE + 120 CONTINUE + DO 140 JC = N + 1 - K, N + DO 130 JR = MIN( K, N-JC ) + 1, K + A( JR+1, JC ) = ZERO + 130 CONTINUE + 140 CONTINUE +* +* Call SSBTRD to compute S and U from lower triangle +* + CALL SLACPY( ' ', K+1, N, A, LDA, WORK, LDA ) +* + NTEST = 3 + CALL SSBTRD( 'V', 'L', N, K, WORK, LDA, SD, SE, U, LDU, + $ WORK( LDA*N+1 ), IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSBTRD(L)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 3 ) = ULPINV + GO TO 150 + END IF + END IF + NTEST = 4 +* +* Do tests 3 and 4 +* + CALL SSBT21( 'Lower', N, K, 1, A, LDA, SD, SE, U, LDU, + $ WORK, RESULT( 3 ) ) +* +* SSYTRD_SB2ST Lower case is used to compute D3. +* Note to set SD and SE to zero to be sure not reusing +* the one from above. Compare it with D1 computed +* using the SSBTRD. +* + CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) + CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL SLACPY( ' ', K+1, N, A, LDA, U, LDU ) + LH = MAX(1, 4*N) + LW = LWORK - LH + CALL SSYTRD_SB2ST( 'N', 'N', "L", N, K, U, LDU, SD, SE, + $ WORK, LH, WORK( LH+1 ), LW, IINFO ) +* +* Compute D3 from the 2-stage Upper case +* + CALL SCOPY( N, SD, 1, D3, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) +* + CALL SSTEQR( 'N', N, D3, WORK, WORK( N+1 ), LDU, + $ WORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEQR(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 6 ) = ULPINV + GO TO 150 + END IF + END IF +* +* +* Do Tests 3 and 4 which are similar to 11 and 12 but with the +* D1 computed using the standard 1-stage reduction as reference +* + NTEST = 6 + TEMP1 = ZERO + TEMP2 = ZERO + TEMP3 = ZERO + TEMP4 = ZERO +* + DO 151 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + TEMP3 = MAX( TEMP3, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP4 = MAX( TEMP4, ABS( D1( J )-D3( J ) ) ) + 151 CONTINUE +* + RESULT(5) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) + RESULT(6) = TEMP4 / MAX( UNFL, ULP*MAX( TEMP3, TEMP4 ) ) +* +* End of Loop -- Check for RESULT(j) > THRESH +* + 150 CONTINUE + NTESTT = NTESTT + NTEST +* +* Print out tests which fail. +* + DO 160 JR = 1, NTEST + IF( RESULT( JR ).GE.THRESH ) THEN +* +* If this is the first test to fail, +* print a header to the data file. +* + IF( NERRS.EQ.0 ) THEN + WRITE( NOUNIT, FMT = 9998 )'SSB' + WRITE( NOUNIT, FMT = 9997 ) + WRITE( NOUNIT, FMT = 9996 ) + WRITE( NOUNIT, FMT = 9995 )'Symmetric' + WRITE( NOUNIT, FMT = 9994 )'orthogonal', '''', + $ 'transpose', ( '''', J = 1, 6 ) + END IF + NERRS = NERRS + 1 + WRITE( NOUNIT, FMT = 9993 )N, K, IOLDSD, JTYPE, + $ JR, RESULT( JR ) + END IF + 160 CONTINUE +* + 170 CONTINUE + 180 CONTINUE + 190 CONTINUE +* +* Summary +* + CALL SLASUM( 'SSB', NOUNIT, NERRS, NTESTT ) + RETURN +* + 9999 FORMAT( ' SCHKSBSTG: ', A, ' returned INFO=', I6, '.', / 9X, 'N=', + $ I6, ', JTYPE=', I6, ', ISEED=(', 3( I5, ',' ), I5, ')' ) +* + 9998 FORMAT( / 1X, A3, + $ ' -- Real Symmetric Banded Tridiagonal Reduction Routines' ) + 9997 FORMAT( ' Matrix types (see SCHKSBSTG for details): ' ) +* + 9996 FORMAT( / ' Special Matrices:', + $ / ' 1=Zero matrix. ', + $ ' 5=Diagonal: clustered entries.', + $ / ' 2=Identity matrix. ', + $ ' 6=Diagonal: large, evenly spaced.', + $ / ' 3=Diagonal: evenly spaced entries. ', + $ ' 7=Diagonal: small, evenly spaced.', + $ / ' 4=Diagonal: geometr. spaced entries.' ) + 9995 FORMAT( ' Dense ', A, ' Banded Matrices:', + $ / ' 8=Evenly spaced eigenvals. ', + $ ' 12=Small, evenly spaced eigenvals.', + $ / ' 9=Geometrically spaced eigenvals. ', + $ ' 13=Matrix with random O(1) entries.', + $ / ' 10=Clustered eigenvalues. ', + $ ' 14=Matrix with large random entries.', + $ / ' 11=Large, evenly spaced eigenvals. ', + $ ' 15=Matrix with small random entries.' ) +* + 9994 FORMAT( / ' Tests performed: (S is Tridiag, U is ', A, ',', + $ / 20X, A, ' means ', A, '.', / ' UPLO=''U'':', + $ / ' 1= | A - U S U', A1, ' | / ( |A| n ulp ) ', + $ ' 2= | I - U U', A1, ' | / ( n ulp )', / ' UPLO=''L'':', + $ / ' 3= | A - U S U', A1, ' | / ( |A| n ulp ) ', + $ ' 4= | I - U U', A1, ' | / ( n ulp )' / ' Eig check:', + $ /' 5= | D1 - D2', '', ' | / ( |D1| ulp ) ', + $ ' 6= | D1 - D3', '', ' | / ( |D1| ulp ) ' ) + 9993 FORMAT( ' N=', I5, ', K=', I4, ', seed=', 4( I4, ',' ), ' type ', + $ I2, ', test(', I2, ')=', G10.3 ) +* +* End of SCHKSBSTG +* + END diff --git a/TESTING/EIG/schkst2stg.f b/TESTING/EIG/schkst2stg.f new file mode 100644 index 00000000..8db1cf73 --- /dev/null +++ b/TESTING/EIG/schkst2stg.f @@ -0,0 +1,2120 @@ +*> \brief \b SCHKST2STG +* +* @generated from dchkst2stg.f, fortran d -> s, Sat Nov 5 22:51:30 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE SCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, +* NOUNIT, A, LDA, AP, SD, SE, D1, D2, D3, D4, D5, +* WA1, WA2, WA3, WR, U, LDU, V, VP, TAU, Z, WORK, +* LWORK, IWORK, LIWORK, RESULT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDU, LIWORK, LWORK, NOUNIT, NSIZES, +* $ NTYPES +* REAL THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER ISEED( 4 ), IWORK( * ), NN( * ) +* REAL A( LDA, * ), AP( * ), D1( * ), D2( * ), +* $ D3( * ), D4( * ), D5( * ), RESULT( * ), +* $ SD( * ), SE( * ), TAU( * ), U( LDU, * ), +* $ V( LDU, * ), VP( * ), WA1( * ), WA2( * ), +* $ WA3( * ), WORK( * ), WR( * ), Z( LDU, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SCHKST2STG checks the symmetric eigenvalue problem routines +*> using the 2-stage reduction techniques. Since the generation +*> of Q or the vectors is not available in this release, we only +*> compare the eigenvalue resulting when using the 2-stage to the +*> one considered as reference using the standard 1-stage reduction +*> SSYTRD. For that, we call the standard SSYTRD and compute D1 using +*> SSTEQR, then we call the 2-stage SSYTRD_2STAGE with Upper and Lower +*> and we compute D2 and D3 using SSTEQR and then we replaced tests +*> 3 and 4 by tests 11 and 12. test 1 and 2 remain to verify that +*> the 1-stage results are OK and can be trusted. +*> This testing routine will converge to the SCHKST in the next +*> release when vectors and generation of Q will be implemented. +*> +*> SSYTRD factors A as U S U' , where ' means transpose, +*> S is symmetric tridiagonal, and U is orthogonal. +*> SSYTRD can use either just the lower or just the upper triangle +*> of A; SCHKST2STG checks both cases. +*> U is represented as a product of Householder +*> transformations, whose vectors are stored in the first +*> n-1 columns of V, and whose scale factors are in TAU. +*> +*> SSPTRD does the same as SSYTRD, except that A and V are stored +*> in "packed" format. +*> +*> SORGTR constructs the matrix U from the contents of V and TAU. +*> +*> SOPGTR constructs the matrix U from the contents of VP and TAU. +*> +*> SSTEQR factors S as Z D1 Z' , where Z is the orthogonal +*> matrix of eigenvectors and D1 is a diagonal matrix with +*> the eigenvalues on the diagonal. D2 is the matrix of +*> eigenvalues computed when Z is not computed. +*> +*> SSTERF computes D3, the matrix of eigenvalues, by the +*> PWK method, which does not yield eigenvectors. +*> +*> SPTEQR factors S as Z4 D4 Z4' , for a +*> symmetric positive definite tridiagonal matrix. +*> D5 is the matrix of eigenvalues computed when Z is not +*> computed. +*> +*> SSTEBZ computes selected eigenvalues. WA1, WA2, and +*> WA3 will denote eigenvalues computed to high +*> absolute accuracy, with different range options. +*> WR will denote eigenvalues computed to high relative +*> accuracy. +*> +*> SSTEIN computes Y, the eigenvectors of S, given the +*> eigenvalues. +*> +*> SSTEDC factors S as Z D1 Z' , where Z is the orthogonal +*> matrix of eigenvectors and D1 is a diagonal matrix with +*> the eigenvalues on the diagonal ('I' option). It may also +*> update an input orthogonal matrix, usually the output +*> from SSYTRD/SORGTR or SSPTRD/SOPGTR ('V' option). It may +*> also just compute eigenvalues ('N' option). +*> +*> SSTEMR factors S as Z D1 Z' , where Z is the orthogonal +*> matrix of eigenvectors and D1 is a diagonal matrix with +*> the eigenvalues on the diagonal ('I' option). SSTEMR +*> uses the Relatively Robust Representation whenever possible. +*> +*> When SCHKST2STG is called, a number of matrix "sizes" ("n's") and a +*> number of matrix "types" are specified. For each size ("n") +*> and each type of matrix, one matrix will be generated and used +*> to test the symmetric eigenroutines. For each matrix, a number +*> of tests will be performed: +*> +*> (1) | A - V S V' | / ( |A| n ulp ) SSYTRD( UPLO='U', ... ) +*> +*> (2) | I - UV' | / ( n ulp ) SORGTR( UPLO='U', ... ) +*> +*> (3) | A - V S V' | / ( |A| n ulp ) SSYTRD( UPLO='L', ... ) +*> replaced by | D1 - D2 | / ( |D1| ulp ) where D1 is the +*> eigenvalue matrix computed using S and D2 is the +*> eigenvalue matrix computed using S_2stage the output of +*> SSYTRD_2STAGE("N", "U",....). D1 and D2 are computed +*> via SSTEQR('N',...) +*> +*> (4) | I - UV' | / ( n ulp ) SORGTR( UPLO='L', ... ) +*> replaced by | D1 - D3 | / ( |D1| ulp ) where D1 is the +*> eigenvalue matrix computed using S and D3 is the +*> eigenvalue matrix computed using S_2stage the output of +*> SSYTRD_2STAGE("N", "L",....). D1 and D3 are computed +*> via SSTEQR('N',...) +*> +*> (5-8) Same as 1-4, but for SSPTRD and SOPGTR. +*> +*> (9) | S - Z D Z' | / ( |S| n ulp ) SSTEQR('V',...) +*> +*> (10) | I - ZZ' | / ( n ulp ) SSTEQR('V',...) +*> +*> (11) | D1 - D2 | / ( |D1| ulp ) SSTEQR('N',...) +*> +*> (12) | D1 - D3 | / ( |D1| ulp ) SSTERF +*> +*> (13) 0 if the true eigenvalues (computed by sturm count) +*> of S are within THRESH of +*> those in D1. 2*THRESH if they are not. (Tested using +*> SSTECH) +*> +*> For S positive definite, +*> +*> (14) | S - Z4 D4 Z4' | / ( |S| n ulp ) SPTEQR('V',...) +*> +*> (15) | I - Z4 Z4' | / ( n ulp ) SPTEQR('V',...) +*> +*> (16) | D4 - D5 | / ( 100 |D4| ulp ) SPTEQR('N',...) +*> +*> When S is also diagonally dominant by the factor gamma < 1, +*> +*> (17) max | D4(i) - WR(i) | / ( |D4(i)| omega ) , +*> i +*> omega = 2 (2n-1) ULP (1 + 8 gamma**2) / (1 - gamma)**4 +*> SSTEBZ( 'A', 'E', ...) +*> +*> (18) | WA1 - D3 | / ( |D3| ulp ) SSTEBZ( 'A', 'E', ...) +*> +*> (19) ( max { min | WA2(i)-WA3(j) | } + +*> i j +*> max { min | WA3(i)-WA2(j) | } ) / ( |D3| ulp ) +*> i j +*> SSTEBZ( 'I', 'E', ...) +*> +*> (20) | S - Y WA1 Y' | / ( |S| n ulp ) SSTEBZ, SSTEIN +*> +*> (21) | I - Y Y' | / ( n ulp ) SSTEBZ, SSTEIN +*> +*> (22) | S - Z D Z' | / ( |S| n ulp ) SSTEDC('I') +*> +*> (23) | I - ZZ' | / ( n ulp ) SSTEDC('I') +*> +*> (24) | S - Z D Z' | / ( |S| n ulp ) SSTEDC('V') +*> +*> (25) | I - ZZ' | / ( n ulp ) SSTEDC('V') +*> +*> (26) | D1 - D2 | / ( |D1| ulp ) SSTEDC('V') and +*> SSTEDC('N') +*> +*> Test 27 is disabled at the moment because SSTEMR does not +*> guarantee high relatvie accuracy. +*> +*> (27) max | D6(i) - WR(i) | / ( |D6(i)| omega ) , +*> i +*> omega = 2 (2n-1) ULP (1 + 8 gamma**2) / (1 - gamma)**4 +*> SSTEMR('V', 'A') +*> +*> (28) max | D6(i) - WR(i) | / ( |D6(i)| omega ) , +*> i +*> omega = 2 (2n-1) ULP (1 + 8 gamma**2) / (1 - gamma)**4 +*> SSTEMR('V', 'I') +*> +*> Tests 29 through 34 are disable at present because SSTEMR +*> does not handle partial specturm requests. +*> +*> (29) | S - Z D Z' | / ( |S| n ulp ) SSTEMR('V', 'I') +*> +*> (30) | I - ZZ' | / ( n ulp ) SSTEMR('V', 'I') +*> +*> (31) ( max { min | WA2(i)-WA3(j) | } + +*> i j +*> max { min | WA3(i)-WA2(j) | } ) / ( |D3| ulp ) +*> i j +*> SSTEMR('N', 'I') vs. SSTEMR('V', 'I') +*> +*> (32) | S - Z D Z' | / ( |S| n ulp ) SSTEMR('V', 'V') +*> +*> (33) | I - ZZ' | / ( n ulp ) SSTEMR('V', 'V') +*> +*> (34) ( max { min | WA2(i)-WA3(j) | } + +*> i j +*> max { min | WA3(i)-WA2(j) | } ) / ( |D3| ulp ) +*> i j +*> SSTEMR('N', 'V') vs. SSTEMR('V', 'V') +*> +*> (35) | S - Z D Z' | / ( |S| n ulp ) SSTEMR('V', 'A') +*> +*> (36) | I - ZZ' | / ( n ulp ) SSTEMR('V', 'A') +*> +*> (37) ( max { min | WA2(i)-WA3(j) | } + +*> i j +*> max { min | WA3(i)-WA2(j) | } ) / ( |D3| ulp ) +*> i j +*> SSTEMR('N', 'A') vs. SSTEMR('V', 'A') +*> +*> The "sizes" are specified by an array NN(1:NSIZES); the value of +*> each element NN(j) specifies one size. +*> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); +*> if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. +*> Currently, the list of possible types is: +*> +*> (1) The zero matrix. +*> (2) The identity matrix. +*> +*> (3) A diagonal matrix with evenly spaced entries +*> 1, ..., ULP and random signs. +*> (ULP = (first number larger than 1) - 1 ) +*> (4) A diagonal matrix with geometrically spaced entries +*> 1, ..., ULP and random signs. +*> (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP +*> and random signs. +*> +*> (6) Same as (4), but multiplied by SQRT( overflow threshold ) +*> (7) Same as (4), but multiplied by SQRT( underflow threshold ) +*> +*> (8) A matrix of the form U' D U, where U is orthogonal and +*> D has evenly spaced entries 1, ..., ULP with random signs +*> on the diagonal. +*> +*> (9) A matrix of the form U' D U, where U is orthogonal and +*> D has geometrically spaced entries 1, ..., ULP with random +*> signs on the diagonal. +*> +*> (10) A matrix of the form U' D U, where U is orthogonal and +*> D has "clustered" entries 1, ULP,..., ULP with random +*> signs on the diagonal. +*> +*> (11) Same as (8), but multiplied by SQRT( overflow threshold ) +*> (12) Same as (8), but multiplied by SQRT( underflow threshold ) +*> +*> (13) Symmetric matrix with random entries chosen from (-1,1). +*> (14) Same as (13), but multiplied by SQRT( overflow threshold ) +*> (15) Same as (13), but multiplied by SQRT( underflow threshold ) +*> (16) Same as (8), but diagonal elements are all positive. +*> (17) Same as (9), but diagonal elements are all positive. +*> (18) Same as (10), but diagonal elements are all positive. +*> (19) Same as (16), but multiplied by SQRT( overflow threshold ) +*> (20) Same as (16), but multiplied by SQRT( underflow threshold ) +*> (21) A diagonally dominant tridiagonal matrix with geometrically +*> spaced diagonal entries 1, ..., ULP. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NSIZES +*> \verbatim +*> NSIZES is INTEGER +*> The number of sizes of matrices to use. If it is zero, +*> SCHKST2STG does nothing. It must be at least zero. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER array, dimension (NSIZES) +*> An array containing the sizes to be used for the matrices. +*> Zero values will be skipped. The values must be at least +*> zero. +*> \endverbatim +*> +*> \param[in] NTYPES +*> \verbatim +*> NTYPES is INTEGER +*> The number of elements in DOTYPE. If it is zero, SCHKST2STG +*> does nothing. It must be at least zero. If it is MAXTYP+1 +*> and NSIZES is 1, then an additional type, MAXTYP+1 is +*> defined, which is to use whatever matrix is in A. This +*> is only useful if DOTYPE(1:MAXTYP) is .FALSE. and +*> DOTYPE(MAXTYP+1) is .TRUE. . +*> \endverbatim +*> +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> If DOTYPE(j) is .TRUE., then for each size in NN a +*> matrix of that size and of type j will be generated. +*> If NTYPES is smaller than the maximum number of types +*> defined (PARAMETER MAXTYP), then types NTYPES+1 through +*> MAXTYP will not be generated. If NTYPES is larger +*> than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) +*> will be ignored. +*> \endverbatim +*> +*> \param[in,out] ISEED +*> \verbatim +*> ISEED is INTEGER array, dimension (4) +*> On entry ISEED specifies the seed of the random number +*> generator. The array elements should be between 0 and 4095; +*> if not they will be reduced mod 4096. Also, ISEED(4) must +*> be odd. The random number generator uses a linear +*> congruential sequence limited to small integers, and so +*> should produce machine independent random numbers. The +*> values of ISEED are changed on exit, and can be used in the +*> next call to SCHKST2STG to continue the same random number +*> sequence. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is REAL +*> A test will count as "failed" if the "error", computed as +*> described above, exceeds THRESH. Note that the error +*> is scaled to be O(1), so THRESH should be a reasonably +*> small multiple of 1, e.g., 10 or 100. In particular, +*> it should not depend on the precision (single vs. double) +*> or the size of the matrix. It must be at least zero. +*> \endverbatim +*> +*> \param[in] NOUNIT +*> \verbatim +*> NOUNIT is INTEGER +*> The FORTRAN unit number for printing out error messages +*> (e.g., if a routine returns IINFO not equal to 0.) +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array of +*> dimension ( LDA , max(NN) ) +*> Used to hold the matrix whose eigenvalues are to be +*> computed. On exit, A contains the last matrix actually +*> used. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. It must be at +*> least 1 and at least max( NN ). +*> \endverbatim +*> +*> \param[out] AP +*> \verbatim +*> AP is REAL array of +*> dimension( max(NN)*max(NN+1)/2 ) +*> The matrix A stored in packed format. +*> \endverbatim +*> +*> \param[out] SD +*> \verbatim +*> SD is REAL array of +*> dimension( max(NN) ) +*> The diagonal of the tridiagonal matrix computed by SSYTRD. +*> On exit, SD and SE contain the tridiagonal form of the +*> matrix in A. +*> \endverbatim +*> +*> \param[out] SE +*> \verbatim +*> SE is REAL array of +*> dimension( max(NN) ) +*> The off-diagonal of the tridiagonal matrix computed by +*> SSYTRD. On exit, SD and SE contain the tridiagonal form of +*> the matrix in A. +*> \endverbatim +*> +*> \param[out] D1 +*> \verbatim +*> D1 is REAL array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by SSTEQR simlutaneously +*> with Z. On exit, the eigenvalues in D1 correspond with the +*> matrix in A. +*> \endverbatim +*> +*> \param[out] D2 +*> \verbatim +*> D2 is REAL array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by SSTEQR if Z is not +*> computed. On exit, the eigenvalues in D2 correspond with +*> the matrix in A. +*> \endverbatim +*> +*> \param[out] D3 +*> \verbatim +*> D3 is REAL array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by SSTERF. On exit, the +*> eigenvalues in D3 correspond with the matrix in A. +*> \endverbatim +*> +*> \param[out] D4 +*> \verbatim +*> D4 is REAL array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by SPTEQR(V). +*> SPTEQR factors S as Z4 D4 Z4* +*> On exit, the eigenvalues in D4 correspond with the matrix in A. +*> \endverbatim +*> +*> \param[out] D5 +*> \verbatim +*> D5 is REAL array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by SPTEQR(N) +*> when Z is not computed. On exit, the +*> eigenvalues in D4 correspond with the matrix in A. +*> \endverbatim +*> +*> \param[out] WA1 +*> \verbatim +*> WA1 is REAL array of +*> dimension( max(NN) ) +*> All eigenvalues of A, computed to high +*> absolute accuracy, with different range options. +*> as computed by SSTEBZ. +*> \endverbatim +*> +*> \param[out] WA2 +*> \verbatim +*> WA2 is REAL array of +*> dimension( max(NN) ) +*> Selected eigenvalues of A, computed to high +*> absolute accuracy, with different range options. +*> as computed by SSTEBZ. +*> Choose random values for IL and IU, and ask for the +*> IL-th through IU-th eigenvalues. +*> \endverbatim +*> +*> \param[out] WA3 +*> \verbatim +*> WA3 is REAL array of +*> dimension( max(NN) ) +*> Selected eigenvalues of A, computed to high +*> absolute accuracy, with different range options. +*> as computed by SSTEBZ. +*> Determine the values VL and VU of the IL-th and IU-th +*> eigenvalues and ask for all eigenvalues in this range. +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is REAL array of +*> dimension( max(NN) ) +*> All eigenvalues of A, computed to high +*> absolute accuracy, with different options. +*> as computed by SSTEBZ. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is REAL array of +*> dimension( LDU, max(NN) ). +*> The orthogonal matrix computed by SSYTRD + SORGTR. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of U, Z, and V. It must be at least 1 +*> and at least max( NN ). +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is REAL array of +*> dimension( LDU, max(NN) ). +*> The Housholder vectors computed by SSYTRD in reducing A to +*> tridiagonal form. The vectors computed with UPLO='U' are +*> in the upper triangle, and the vectors computed with UPLO='L' +*> are in the lower triangle. (As described in SSYTRD, the +*> sub- and superdiagonal are not set to 1, although the +*> true Householder vector has a 1 in that position. The +*> routines that use V, such as SORGTR, set those entries to +*> 1 before using them, and then restore them later.) +*> \endverbatim +*> +*> \param[out] VP +*> \verbatim +*> VP is REAL array of +*> dimension( max(NN)*max(NN+1)/2 ) +*> The matrix V stored in packed format. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array of +*> dimension( max(NN) ) +*> The Householder factors computed by SSYTRD in reducing A +*> to tridiagonal form. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array of +*> dimension( LDU, max(NN) ). +*> The orthogonal matrix of eigenvectors computed by SSTEQR, +*> SPTEQR, and SSTEIN. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array of +*> dimension( LWORK ) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The number of entries in WORK. This must be at least +*> 1 + 4 * Nmax + 2 * Nmax * lg Nmax + 3 * Nmax**2 +*> where Nmax = max( NN(j), 2 ) and lg = log base 2. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, +*> Workspace. +*> \endverbatim +*> +*> \param[out] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The number of entries in IWORK. This must be at least +*> 6 + 6*Nmax + 5 * Nmax * lg Nmax +*> where Nmax = max( NN(j), 2 ) and lg = log base 2. +*> \endverbatim +*> +*> \param[out] RESULT +*> \verbatim +*> RESULT is REAL array, dimension (26) +*> The values computed by the tests described above. +*> The values are currently limited to 1/ulp, to avoid +*> overflow. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> If 0, then everything ran OK. +*> -1: NSIZES < 0 +*> -2: Some NN(j) < 0 +*> -3: NTYPES < 0 +*> -5: THRESH < 0 +*> -9: LDA < 1 or LDA < NMAX, where NMAX is max( NN(j) ). +*> -23: LDU < 1 or LDU < NMAX. +*> -29: LWORK too small. +*> If SLATMR, SLATMS, SSYTRD, SORGTR, SSTEQR, SSTERF, +*> or SORMC2 returns an error code, the +*> absolute value of it is returned. +*> +*>----------------------------------------------------------------------- +*> +*> Some Local Variables and Parameters: +*> ---- ----- --------- --- ---------- +*> ZERO, ONE Real 0 and 1. +*> MAXTYP The number of types defined. +*> NTEST The number of tests performed, or which can +*> be performed so far, for the current matrix. +*> NTESTT The total number of tests performed so far. +*> NBLOCK Blocksize as returned by ENVIR. +*> NMAX Largest value in NN. +*> NMATS The number of matrices generated so far. +*> NERRS The number of tests which have exceeded THRESH +*> so far. +*> COND, IMODE Values to be passed to the matrix generators. +*> ANORM Norm of A; passed to matrix generators. +*> +*> OVFL, UNFL Overflow and underflow thresholds. +*> ULP, ULPINV Finest relative precision and its inverse. +*> RTOVFL, RTUNFL Square roots of the previous 2 values. +*> The following four arrays decode JTYPE: +*> KTYPE(j) The general type (1-10) for type "j". +*> KMODE(j) The MODE value to be passed to the matrix +*> generator for type "j". +*> KMAGN(j) The order of magnitude ( O(1), +*> O(overflow^(1/2) ), O(underflow^(1/2) ) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup single_eig +* +* ===================================================================== + SUBROUTINE SCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, + $ NOUNIT, A, LDA, AP, SD, SE, D1, D2, D3, D4, D5, + $ WA1, WA2, WA3, WR, U, LDU, V, VP, TAU, Z, WORK, + $ LWORK, IWORK, LIWORK, RESULT, INFO ) +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDU, LIWORK, LWORK, NOUNIT, NSIZES, + $ NTYPES + REAL THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER ISEED( 4 ), IWORK( * ), NN( * ) + REAL A( LDA, * ), AP( * ), D1( * ), D2( * ), + $ D3( * ), D4( * ), D5( * ), RESULT( * ), + $ SD( * ), SE( * ), TAU( * ), U( LDU, * ), + $ V( LDU, * ), VP( * ), WA1( * ), WA2( * ), + $ WA3( * ), WORK( * ), WR( * ), Z( LDU, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, EIGHT, TEN, HUN + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ EIGHT = 8.0E0, TEN = 10.0E0, HUN = 100.0E0 ) + REAL HALF + PARAMETER ( HALF = ONE / TWO ) + INTEGER MAXTYP + PARAMETER ( MAXTYP = 21 ) + LOGICAL SRANGE + PARAMETER ( SRANGE = .FALSE. ) + LOGICAL SREL + PARAMETER ( SREL = .FALSE. ) +* .. +* .. Local Scalars .. + LOGICAL BADNN, TRYRAC + INTEGER I, IINFO, IL, IMODE, ITEMP, ITYPE, IU, J, JC, + $ JR, JSIZE, JTYPE, LGN, LIWEDC, LOG2UI, LWEDC, + $ M, M2, M3, MTYPES, N, NAP, NBLOCK, NERRS, + $ NMATS, NMAX, NSPLIT, NTEST, NTESTT, LH, LW + REAL ABSTOL, ANINV, ANORM, COND, OVFL, RTOVFL, + $ RTUNFL, TEMP1, TEMP2, TEMP3, TEMP4, ULP, + $ ULPINV, UNFL, VL, VU +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ), IOLDSD( 4 ), ISEED2( 4 ), + $ KMAGN( MAXTYP ), KMODE( MAXTYP ), + $ KTYPE( MAXTYP ) + REAL DUMMA( 1 ) +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SLAMCH, SLARND, SSXT1 + EXTERNAL ILAENV, SLAMCH, SLARND, SSXT1 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLABAD, SLACPY, SLASET, SLASUM, SLATMR, + $ SLATMS, SOPGTR, SORGTR, SPTEQR, SSPT21, SSPTRD, + $ SSTEBZ, SSTECH, SSTEDC, SSTEMR, SSTEIN, SSTEQR, + $ SSTERF, SSTT21, SSTT22, SSYT21, SSYTRD, XERBLA, + $ SSYTRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, INT, LOG, MAX, MIN, SQRT +* .. +* .. Data statements .. + DATA KTYPE / 1, 2, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 8, + $ 8, 8, 9, 9, 9, 9, 9, 10 / + DATA KMAGN / 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, + $ 2, 3, 1, 1, 1, 2, 3, 1 / + DATA KMODE / 0, 0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, + $ 0, 0, 4, 3, 1, 4, 4, 3 / +* .. +* .. Executable Statements .. +* +* Keep ftnchek happy + IDUMMA( 1 ) = 1 +* +* Check for errors +* + NTESTT = 0 + INFO = 0 +* +* Important constants +* + BADNN = .FALSE. + TRYRAC = .TRUE. + NMAX = 1 + DO 10 J = 1, NSIZES + NMAX = MAX( NMAX, NN( J ) ) + IF( NN( J ).LT.0 ) + $ BADNN = .TRUE. + 10 CONTINUE +* + NBLOCK = ILAENV( 1, 'SSYTRD', 'L', NMAX, -1, -1, -1 ) + NBLOCK = MIN( NMAX, MAX( 1, NBLOCK ) ) +* +* Check for errors +* + IF( NSIZES.LT.0 ) THEN + INFO = -1 + ELSE IF( BADNN ) THEN + INFO = -2 + ELSE IF( NTYPES.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.NMAX ) THEN + INFO = -9 + ELSE IF( LDU.LT.NMAX ) THEN + INFO = -23 + ELSE IF( 2*MAX( 2, NMAX )**2.GT.LWORK ) THEN + INFO = -29 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SCHKST2STG', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( NSIZES.EQ.0 .OR. NTYPES.EQ.0 ) + $ RETURN +* +* More Important constants +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL SLABAD( UNFL, OVFL ) + ULP = SLAMCH( 'Epsilon' )*SLAMCH( 'Base' ) + ULPINV = ONE / ULP + LOG2UI = INT( LOG( ULPINV ) / LOG( TWO ) ) + RTUNFL = SQRT( UNFL ) + RTOVFL = SQRT( OVFL ) +* +* Loop over sizes, types +* + DO 20 I = 1, 4 + ISEED2( I ) = ISEED( I ) + 20 CONTINUE + NERRS = 0 + NMATS = 0 +* + DO 310 JSIZE = 1, NSIZES + N = NN( JSIZE ) + IF( N.GT.0 ) THEN + LGN = INT( LOG( REAL( N ) ) / LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + LWEDC = 1 + 4*N + 2*N*LGN + 4*N**2 + LIWEDC = 6 + 6*N + 5*N*LGN + ELSE + LWEDC = 8 + LIWEDC = 12 + END IF + NAP = ( N*( N+1 ) ) / 2 + ANINV = ONE / REAL( MAX( 1, N ) ) +* + IF( NSIZES.NE.1 ) THEN + MTYPES = MIN( MAXTYP, NTYPES ) + ELSE + MTYPES = MIN( MAXTYP+1, NTYPES ) + END IF +* + DO 300 JTYPE = 1, MTYPES + IF( .NOT.DOTYPE( JTYPE ) ) + $ GO TO 300 + NMATS = NMATS + 1 + NTEST = 0 +* + DO 30 J = 1, 4 + IOLDSD( J ) = ISEED( J ) + 30 CONTINUE +* +* Compute "A" +* +* Control parameters: +* +* KMAGN KMODE KTYPE +* =1 O(1) clustered 1 zero +* =2 large clustered 2 identity +* =3 small exponential (none) +* =4 arithmetic diagonal, (w/ eigenvalues) +* =5 random log symmetric, w/ eigenvalues +* =6 random (none) +* =7 random diagonal +* =8 random symmetric +* =9 positive definite +* =10 diagonally dominant tridiagonal +* + IF( MTYPES.GT.MAXTYP ) + $ GO TO 100 +* + ITYPE = KTYPE( JTYPE ) + IMODE = KMODE( JTYPE ) +* +* Compute norm +* + GO TO ( 40, 50, 60 )KMAGN( JTYPE ) +* + 40 CONTINUE + ANORM = ONE + GO TO 70 +* + 50 CONTINUE + ANORM = ( RTOVFL*ULP )*ANINV + GO TO 70 +* + 60 CONTINUE + ANORM = RTUNFL*N*ULPINV + GO TO 70 +* + 70 CONTINUE +* + CALL SLASET( 'Full', LDA, N, ZERO, ZERO, A, LDA ) + IINFO = 0 + IF( JTYPE.LE.15 ) THEN + COND = ULPINV + ELSE + COND = ULPINV*ANINV / TEN + END IF +* +* Special Matrices -- Identity & Jordan block +* +* Zero +* + IF( ITYPE.EQ.1 ) THEN + IINFO = 0 +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Identity +* + DO 80 JC = 1, N + A( JC, JC ) = ANORM + 80 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Diagonal Matrix, [Eigen]values Specified +* + CALL SLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, 0, 0, 'N', A, LDA, WORK( N+1 ), + $ IINFO ) +* +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Symmetric, eigenvalues specified +* + CALL SLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, N, N, 'N', A, LDA, WORK( N+1 ), + $ IINFO ) +* + ELSE IF( ITYPE.EQ.7 ) THEN +* +* Diagonal, random eigenvalues +* + CALL SLATMR( N, N, 'S', ISEED, 'S', WORK, 6, ONE, ONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, 0, 0, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.8 ) THEN +* +* Symmetric, random eigenvalues +* + CALL SLATMR( N, N, 'S', ISEED, 'S', WORK, 6, ONE, ONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, N, N, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.9 ) THEN +* +* Positive definite, eigenvalues specified. +* + CALL SLATMS( N, N, 'S', ISEED, 'P', WORK, IMODE, COND, + $ ANORM, N, N, 'N', A, LDA, WORK( N+1 ), + $ IINFO ) +* + ELSE IF( ITYPE.EQ.10 ) THEN +* +* Positive definite tridiagonal, eigenvalues specified. +* + CALL SLATMS( N, N, 'S', ISEED, 'P', WORK, IMODE, COND, + $ ANORM, 1, 1, 'N', A, LDA, WORK( N+1 ), + $ IINFO ) + DO 90 I = 2, N + TEMP1 = ABS( A( I-1, I ) ) / + $ SQRT( ABS( A( I-1, I-1 )*A( I, I ) ) ) + IF( TEMP1.GT.HALF ) THEN + A( I-1, I ) = HALF*SQRT( ABS( A( I-1, I-1 )*A( I, + $ I ) ) ) + A( I, I-1 ) = A( I-1, I ) + END IF + 90 CONTINUE +* + ELSE +* + IINFO = 1 + END IF +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'Generator', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + RETURN + END IF +* + 100 CONTINUE +* +* Call SSYTRD and SORGTR to compute S and U from +* upper triangle. +* + CALL SLACPY( 'U', N, N, A, LDA, V, LDU ) +* + NTEST = 1 + CALL SSYTRD( 'U', N, V, LDU, SD, SE, TAU, WORK, LWORK, + $ IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSYTRD(U)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 1 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL SLACPY( 'U', N, N, V, LDU, U, LDU ) +* + NTEST = 2 + CALL SORGTR( 'U', N, U, LDU, TAU, WORK, LWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SORGTR(U)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 2 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do tests 1 and 2 +* + CALL SSYT21( 2, 'Upper', N, 1, A, LDA, SD, SE, U, LDU, V, + $ LDU, TAU, WORK, RESULT( 1 ) ) + CALL SSYT21( 3, 'Upper', N, 1, A, LDA, SD, SE, U, LDU, V, + $ LDU, TAU, WORK, RESULT( 2 ) ) +* +* Compute D1 the eigenvalues resulting from the tridiagonal +* form using the standard 1-stage algorithm and use it as a +* reference to compare with the 2-stage technique +* +* Compute D1 from the 1-stage and used as reference for the +* 2-stage +* + CALL SCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) +* + CALL SSTEQR( 'N', N, D1, WORK, WORK( N+1 ), LDU, + $ WORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEQR(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 3 ) = ULPINV + GO TO 280 + END IF + END IF +* +* 2-STAGE TRD Upper case is used to compute D2. +* Note to set SD and SE to zero to be sure not reusing +* the one from above. Compare it with D1 computed +* using the 1-stage. +* + CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) + CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL SLACPY( "U", N, N, A, LDA, V, LDU ) + LH = MAX(1, 4*N) + LW = LWORK - LH + CALL SSYTRD_2STAGE( 'N', "U", N, V, LDU, SD, SE, TAU, + $ WORK, LH, WORK( LH+1 ), LW, IINFO ) +* +* Compute D2 from the 2-stage Upper case +* + CALL SCOPY( N, SD, 1, D2, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) +* + CALL SSTEQR( 'N', N, D2, WORK, WORK( N+1 ), LDU, + $ WORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEQR(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 3 ) = ULPINV + GO TO 280 + END IF + END IF +* +* 2-STAGE TRD Lower case is used to compute D3. +* Note to set SD and SE to zero to be sure not reusing +* the one from above. Compare it with D1 computed +* using the 1-stage. +* + CALL SLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) + CALL SLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL SLACPY( "L", N, N, A, LDA, V, LDU ) + CALL SSYTRD_2STAGE( 'N', "L", N, V, LDU, SD, SE, TAU, + $ WORK, LH, WORK( LH+1 ), LW, IINFO ) +* +* Compute D3 from the 2-stage Upper case +* + CALL SCOPY( N, SD, 1, D3, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) +* + CALL SSTEQR( 'N', N, D3, WORK, WORK( N+1 ), LDU, + $ WORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEQR(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 4 ) = ULPINV + GO TO 280 + END IF + END IF +* +* +* Do Tests 3 and 4 which are similar to 11 and 12 but with the +* D1 computed using the standard 1-stage reduction as reference +* + NTEST = 4 + TEMP1 = ZERO + TEMP2 = ZERO + TEMP3 = ZERO + TEMP4 = ZERO +* + DO 151 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + TEMP3 = MAX( TEMP3, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP4 = MAX( TEMP4, ABS( D1( J )-D3( J ) ) ) + 151 CONTINUE +* + RESULT( 3 ) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) + RESULT( 4 ) = TEMP4 / MAX( UNFL, ULP*MAX( TEMP3, TEMP4 ) ) +* +* Skip the SSYTRD for lower that since we replaced its testing +* 3 and 4 by the 2-stage one. + GOTO 101 +* +* Call SSYTRD and SORGTR to compute S and U from +* lower triangle, do tests. +* + CALL SLACPY( 'L', N, N, A, LDA, V, LDU ) +* + NTEST = 3 + CALL SSYTRD( 'L', N, V, LDU, SD, SE, TAU, WORK, LWORK, + $ IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSYTRD(L)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 3 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL SLACPY( 'L', N, N, V, LDU, U, LDU ) +* + NTEST = 4 + CALL SORGTR( 'L', N, U, LDU, TAU, WORK, LWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SORGTR(L)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 4 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL SSYT21( 2, 'Lower', N, 1, A, LDA, SD, SE, U, LDU, V, + $ LDU, TAU, WORK, RESULT( 3 ) ) + CALL SSYT21( 3, 'Lower', N, 1, A, LDA, SD, SE, U, LDU, V, + $ LDU, TAU, WORK, RESULT( 4 ) ) +* +*after skipping old tests 3 4 back to the normal +* + 101 CONTINUE +* +* Store the upper triangle of A in AP +* + I = 0 + DO 120 JC = 1, N + DO 110 JR = 1, JC + I = I + 1 + AP( I ) = A( JR, JC ) + 110 CONTINUE + 120 CONTINUE +* +* Call SSPTRD and SOPGTR to compute S and U from AP +* + CALL SCOPY( NAP, AP, 1, VP, 1 ) +* + NTEST = 5 + CALL SSPTRD( 'U', N, VP, SD, SE, TAU, IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSPTRD(U)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 5 ) = ULPINV + GO TO 280 + END IF + END IF +* + NTEST = 6 + CALL SOPGTR( 'U', N, VP, TAU, U, LDU, WORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SOPGTR(U)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 6 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do tests 5 and 6 +* + CALL SSPT21( 2, 'Upper', N, 1, AP, SD, SE, U, LDU, VP, TAU, + $ WORK, RESULT( 5 ) ) + CALL SSPT21( 3, 'Upper', N, 1, AP, SD, SE, U, LDU, VP, TAU, + $ WORK, RESULT( 6 ) ) +* +* Store the lower triangle of A in AP +* + I = 0 + DO 140 JC = 1, N + DO 130 JR = JC, N + I = I + 1 + AP( I ) = A( JR, JC ) + 130 CONTINUE + 140 CONTINUE +* +* Call SSPTRD and SOPGTR to compute S and U from AP +* + CALL SCOPY( NAP, AP, 1, VP, 1 ) +* + NTEST = 7 + CALL SSPTRD( 'L', N, VP, SD, SE, TAU, IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSPTRD(L)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 7 ) = ULPINV + GO TO 280 + END IF + END IF +* + NTEST = 8 + CALL SOPGTR( 'L', N, VP, TAU, U, LDU, WORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SOPGTR(L)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 8 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL SSPT21( 2, 'Lower', N, 1, AP, SD, SE, U, LDU, VP, TAU, + $ WORK, RESULT( 7 ) ) + CALL SSPT21( 3, 'Lower', N, 1, AP, SD, SE, U, LDU, VP, TAU, + $ WORK, RESULT( 8 ) ) +* +* Call SSTEQR to compute D1, D2, and Z, do tests. +* +* Compute D1 and Z +* + CALL SCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) + CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDU ) +* + NTEST = 9 + CALL SSTEQR( 'V', N, D1, WORK, Z, LDU, WORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEQR(V)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 9 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Compute D2 +* + CALL SCOPY( N, SD, 1, D2, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) +* + NTEST = 11 + CALL SSTEQR( 'N', N, D2, WORK, WORK( N+1 ), LDU, + $ WORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEQR(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 11 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Compute D3 (using PWK method) +* + CALL SCOPY( N, SD, 1, D3, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) +* + NTEST = 12 + CALL SSTERF( N, D3, WORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTERF', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 12 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 9 and 10 +* + CALL SSTT21( N, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, + $ RESULT( 9 ) ) +* +* Do Tests 11 and 12 +* + TEMP1 = ZERO + TEMP2 = ZERO + TEMP3 = ZERO + TEMP4 = ZERO +* + DO 150 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + TEMP3 = MAX( TEMP3, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP4 = MAX( TEMP4, ABS( D1( J )-D3( J ) ) ) + 150 CONTINUE +* + RESULT( 11 ) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) + RESULT( 12 ) = TEMP4 / MAX( UNFL, ULP*MAX( TEMP3, TEMP4 ) ) +* +* Do Test 13 -- Sturm Sequence Test of Eigenvalues +* Go up by factors of two until it succeeds +* + NTEST = 13 + TEMP1 = THRESH*( HALF-ULP ) +* + DO 160 J = 0, LOG2UI + CALL SSTECH( N, SD, SE, D1, TEMP1, WORK, IINFO ) + IF( IINFO.EQ.0 ) + $ GO TO 170 + TEMP1 = TEMP1*TWO + 160 CONTINUE +* + 170 CONTINUE + RESULT( 13 ) = TEMP1 +* +* For positive definite matrices ( JTYPE.GT.15 ) call SPTEQR +* and do tests 14, 15, and 16 . +* + IF( JTYPE.GT.15 ) THEN +* +* Compute D4 and Z4 +* + CALL SCOPY( N, SD, 1, D4, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) + CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDU ) +* + NTEST = 14 + CALL SPTEQR( 'V', N, D4, WORK, Z, LDU, WORK( N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SPTEQR(V)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 14 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 14 and 15 +* + CALL SSTT21( N, 0, SD, SE, D4, DUMMA, Z, LDU, WORK, + $ RESULT( 14 ) ) +* +* Compute D5 +* + CALL SCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) +* + NTEST = 16 + CALL SPTEQR( 'N', N, D5, WORK, Z, LDU, WORK( N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SPTEQR(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 16 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 16 +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 180 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D4( J ) ), ABS( D5( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D4( J )-D5( J ) ) ) + 180 CONTINUE +* + RESULT( 16 ) = TEMP2 / MAX( UNFL, + $ HUN*ULP*MAX( TEMP1, TEMP2 ) ) + ELSE + RESULT( 14 ) = ZERO + RESULT( 15 ) = ZERO + RESULT( 16 ) = ZERO + END IF +* +* Call SSTEBZ with different options and do tests 17-18. +* +* If S is positive definite and diagonally dominant, +* ask for all eigenvalues with high relative accuracy. +* + VL = ZERO + VU = ZERO + IL = 0 + IU = 0 + IF( JTYPE.EQ.21 ) THEN + NTEST = 17 + ABSTOL = UNFL + UNFL + CALL SSTEBZ( 'A', 'E', N, VL, VU, IL, IU, ABSTOL, SD, SE, + $ M, NSPLIT, WR, IWORK( 1 ), IWORK( N+1 ), + $ WORK, IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEBZ(A,rel)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 17 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do test 17 +* + TEMP2 = TWO*( TWO*N-ONE )*ULP*( ONE+EIGHT*HALF**2 ) / + $ ( ONE-HALF )**4 +* + TEMP1 = ZERO + DO 190 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D4( J )-WR( N-J+1 ) ) / + $ ( ABSTOL+ABS( D4( J ) ) ) ) + 190 CONTINUE +* + RESULT( 17 ) = TEMP1 / TEMP2 + ELSE + RESULT( 17 ) = ZERO + END IF +* +* Now ask for all eigenvalues with high absolute accuracy. +* + NTEST = 18 + ABSTOL = UNFL + UNFL + CALL SSTEBZ( 'A', 'E', N, VL, VU, IL, IU, ABSTOL, SD, SE, M, + $ NSPLIT, WA1, IWORK( 1 ), IWORK( N+1 ), WORK, + $ IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEBZ(A)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 18 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do test 18 +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 200 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D3( J ) ), ABS( WA1( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D3( J )-WA1( J ) ) ) + 200 CONTINUE +* + RESULT( 18 ) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) +* +* Choose random values for IL and IU, and ask for the +* IL-th through IU-th eigenvalues. +* + NTEST = 19 + IF( N.LE.1 ) THEN + IL = 1 + IU = N + ELSE + IL = 1 + ( N-1 )*INT( SLARND( 1, ISEED2 ) ) + IU = 1 + ( N-1 )*INT( SLARND( 1, ISEED2 ) ) + IF( IU.LT.IL ) THEN + ITEMP = IU + IU = IL + IL = ITEMP + END IF + END IF +* + CALL SSTEBZ( 'I', 'E', N, VL, VU, IL, IU, ABSTOL, SD, SE, + $ M2, NSPLIT, WA2, IWORK( 1 ), IWORK( N+1 ), + $ WORK, IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEBZ(I)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 19 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Determine the values VL and VU of the IL-th and IU-th +* eigenvalues and ask for all eigenvalues in this range. +* + IF( N.GT.0 ) THEN + IF( IL.NE.1 ) THEN + VL = WA1( IL ) - MAX( HALF*( WA1( IL )-WA1( IL-1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + ELSE + VL = WA1( 1 ) - MAX( HALF*( WA1( N )-WA1( 1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = WA1( IU ) + MAX( HALF*( WA1( IU+1 )-WA1( IU ) ), + $ ULP*ANORM, TWO*RTUNFL ) + ELSE + VU = WA1( N ) + MAX( HALF*( WA1( N )-WA1( 1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + END IF + ELSE + VL = ZERO + VU = ONE + END IF +* + CALL SSTEBZ( 'V', 'E', N, VL, VU, IL, IU, ABSTOL, SD, SE, + $ M3, NSPLIT, WA3, IWORK( 1 ), IWORK( N+1 ), + $ WORK, IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEBZ(V)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 19 ) = ULPINV + GO TO 280 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.NE.0 ) THEN + RESULT( 19 ) = ULPINV + GO TO 280 + END IF +* +* Do test 19 +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( N ) ), ABS( WA1( 1 ) ) ) + ELSE + TEMP3 = ZERO + END IF +* + RESULT( 19 ) = ( TEMP1+TEMP2 ) / MAX( UNFL, TEMP3*ULP ) +* +* Call SSTEIN to compute eigenvectors corresponding to +* eigenvalues in WA1. (First call SSTEBZ again, to make sure +* it returns these eigenvalues in the correct order.) +* + NTEST = 21 + CALL SSTEBZ( 'A', 'B', N, VL, VU, IL, IU, ABSTOL, SD, SE, M, + $ NSPLIT, WA1, IWORK( 1 ), IWORK( N+1 ), WORK, + $ IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEBZ(A,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 20 ) = ULPINV + RESULT( 21 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL SSTEIN( N, SD, SE, M, WA1, IWORK( 1 ), IWORK( N+1 ), Z, + $ LDU, WORK, IWORK( 2*N+1 ), IWORK( 3*N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEIN', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 20 ) = ULPINV + RESULT( 21 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do tests 20 and 21 +* + CALL SSTT21( N, 0, SD, SE, WA1, DUMMA, Z, LDU, WORK, + $ RESULT( 20 ) ) +* +* Call SSTEDC(I) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + CALL SCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) + CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDU ) +* + NTEST = 22 + CALL SSTEDC( 'I', N, D1, WORK, Z, LDU, WORK( N+1 ), LWEDC-N, + $ IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEDC(I)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 22 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 22 and 23 +* + CALL SSTT21( N, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, + $ RESULT( 22 ) ) +* +* Call SSTEDC(V) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + CALL SCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) + CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDU ) +* + NTEST = 24 + CALL SSTEDC( 'V', N, D1, WORK, Z, LDU, WORK( N+1 ), LWEDC-N, + $ IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEDC(V)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 24 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 24 and 25 +* + CALL SSTT21( N, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, + $ RESULT( 24 ) ) +* +* Call SSTEDC(N) to compute D2, do tests. +* +* Compute D2 +* + CALL SCOPY( N, SD, 1, D2, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) + CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDU ) +* + NTEST = 26 + CALL SSTEDC( 'N', N, D2, WORK, Z, LDU, WORK( N+1 ), LWEDC-N, + $ IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEDC(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 26 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 26 +* + TEMP1 = ZERO + TEMP2 = ZERO +* + DO 210 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + 210 CONTINUE +* + RESULT( 26 ) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) +* +* Only test SSTEMR if IEEE compliant +* + IF( ILAENV( 10, 'SSTEMR', 'VA', 1, 0, 0, 0 ).EQ.1 .AND. + $ ILAENV( 11, 'SSTEMR', 'VA', 1, 0, 0, 0 ).EQ.1 ) THEN +* +* Call SSTEMR, do test 27 (relative eigenvalue accuracy) +* +* If S is positive definite and diagonally dominant, +* ask for all eigenvalues with high relative accuracy. +* + VL = ZERO + VU = ZERO + IL = 0 + IU = 0 + IF( JTYPE.EQ.21 .AND. SREL ) THEN + NTEST = 27 + ABSTOL = UNFL + UNFL + CALL SSTEMR( 'V', 'A', N, SD, SE, VL, VU, IL, IU, + $ M, WR, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ WORK, LWORK, IWORK( 2*N+1 ), LWORK-2*N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEMR(V,A,rel)', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 27 ) = ULPINV + GO TO 270 + END IF + END IF +* +* Do test 27 +* + TEMP2 = TWO*( TWO*N-ONE )*ULP*( ONE+EIGHT*HALF**2 ) / + $ ( ONE-HALF )**4 +* + TEMP1 = ZERO + DO 220 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D4( J )-WR( N-J+1 ) ) / + $ ( ABSTOL+ABS( D4( J ) ) ) ) + 220 CONTINUE +* + RESULT( 27 ) = TEMP1 / TEMP2 +* + IL = 1 + ( N-1 )*INT( SLARND( 1, ISEED2 ) ) + IU = 1 + ( N-1 )*INT( SLARND( 1, ISEED2 ) ) + IF( IU.LT.IL ) THEN + ITEMP = IU + IU = IL + IL = ITEMP + END IF +* + IF( SRANGE ) THEN + NTEST = 28 + ABSTOL = UNFL + UNFL + CALL SSTEMR( 'V', 'I', N, SD, SE, VL, VU, IL, IU, + $ M, WR, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ WORK, LWORK, IWORK( 2*N+1 ), + $ LWORK-2*N, IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEMR(V,I,rel)', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 28 ) = ULPINV + GO TO 270 + END IF + END IF +* +* +* Do test 28 +* + TEMP2 = TWO*( TWO*N-ONE )*ULP* + $ ( ONE+EIGHT*HALF**2 ) / ( ONE-HALF )**4 +* + TEMP1 = ZERO + DO 230 J = IL, IU + TEMP1 = MAX( TEMP1, ABS( WR( J-IL+1 )-D4( N-J+ + $ 1 ) ) / ( ABSTOL+ABS( WR( J-IL+1 ) ) ) ) + 230 CONTINUE +* + RESULT( 28 ) = TEMP1 / TEMP2 + ELSE + RESULT( 28 ) = ZERO + END IF + ELSE + RESULT( 27 ) = ZERO + RESULT( 28 ) = ZERO + END IF +* +* Call SSTEMR(V,I) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + CALL SCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) + CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDU ) +* + IF( SRANGE ) THEN + NTEST = 29 + IL = 1 + ( N-1 )*INT( SLARND( 1, ISEED2 ) ) + IU = 1 + ( N-1 )*INT( SLARND( 1, ISEED2 ) ) + IF( IU.LT.IL ) THEN + ITEMP = IU + IU = IL + IL = ITEMP + END IF + CALL SSTEMR( 'V', 'I', N, D5, WORK, VL, VU, IL, IU, + $ M, D1, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ WORK( N+1 ), LWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEMR(V,I)', IINFO, + $ N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 29 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 29 and 30 +* + CALL SSTT22( N, M, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, + $ M, RESULT( 29 ) ) +* +* Call SSTEMR to compute D2, do tests. +* +* Compute D2 +* + CALL SCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) +* + NTEST = 31 + CALL SSTEMR( 'N', 'I', N, D5, WORK, VL, VU, IL, IU, + $ M, D2, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ WORK( N+1 ), LWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEMR(N,I)', IINFO, + $ N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 31 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 31 +* + TEMP1 = ZERO + TEMP2 = ZERO +* + DO 240 J = 1, IU - IL + 1 + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), + $ ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + 240 CONTINUE +* + RESULT( 31 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* +* +* Call SSTEMR(V,V) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + CALL SCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) + CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDU ) +* + NTEST = 32 +* + IF( N.GT.0 ) THEN + IF( IL.NE.1 ) THEN + VL = D2( IL ) - MAX( HALF* + $ ( D2( IL )-D2( IL-1 ) ), ULP*ANORM, + $ TWO*RTUNFL ) + ELSE + VL = D2( 1 ) - MAX( HALF*( D2( N )-D2( 1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = D2( IU ) + MAX( HALF* + $ ( D2( IU+1 )-D2( IU ) ), ULP*ANORM, + $ TWO*RTUNFL ) + ELSE + VU = D2( N ) + MAX( HALF*( D2( N )-D2( 1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + END IF + ELSE + VL = ZERO + VU = ONE + END IF +* + CALL SSTEMR( 'V', 'V', N, D5, WORK, VL, VU, IL, IU, + $ M, D1, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ WORK( N+1 ), LWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEMR(V,V)', IINFO, + $ N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 32 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 32 and 33 +* + CALL SSTT22( N, M, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, + $ M, RESULT( 32 ) ) +* +* Call SSTEMR to compute D2, do tests. +* +* Compute D2 +* + CALL SCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) +* + NTEST = 34 + CALL SSTEMR( 'N', 'V', N, D5, WORK, VL, VU, IL, IU, + $ M, D2, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ WORK( N+1 ), LWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEMR(N,V)', IINFO, + $ N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 34 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 34 +* + TEMP1 = ZERO + TEMP2 = ZERO +* + DO 250 J = 1, IU - IL + 1 + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), + $ ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + 250 CONTINUE +* + RESULT( 34 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) + ELSE + RESULT( 29 ) = ZERO + RESULT( 30 ) = ZERO + RESULT( 31 ) = ZERO + RESULT( 32 ) = ZERO + RESULT( 33 ) = ZERO + RESULT( 34 ) = ZERO + END IF +* +* +* Call SSTEMR(V,A) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + CALL SCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) +* + NTEST = 35 +* + CALL SSTEMR( 'V', 'A', N, D5, WORK, VL, VU, IL, IU, + $ M, D1, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ WORK( N+1 ), LWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEMR(V,A)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 35 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 35 and 36 +* + CALL SSTT22( N, M, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, M, + $ RESULT( 35 ) ) +* +* Call SSTEMR to compute D2, do tests. +* +* Compute D2 +* + CALL SCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL SCOPY( N-1, SE, 1, WORK, 1 ) +* + NTEST = 37 + CALL SSTEMR( 'N', 'A', N, D5, WORK, VL, VU, IL, IU, + $ M, D2, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ WORK( N+1 ), LWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEMR(N,A)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 37 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 34 +* + TEMP1 = ZERO + TEMP2 = ZERO +* + DO 260 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + 260 CONTINUE +* + RESULT( 37 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) + END IF + 270 CONTINUE + 280 CONTINUE + NTESTT = NTESTT + NTEST +* +* End of Loop -- Check for RESULT(j) > THRESH +* +* +* Print out tests which fail. +* + DO 290 JR = 1, NTEST + IF( RESULT( JR ).GE.THRESH ) THEN +* +* If this is the first test to fail, +* print a header to the data file. +* + IF( NERRS.EQ.0 ) THEN + WRITE( NOUNIT, FMT = 9998 )'SST' + WRITE( NOUNIT, FMT = 9997 ) + WRITE( NOUNIT, FMT = 9996 ) + WRITE( NOUNIT, FMT = 9995 )'Symmetric' + WRITE( NOUNIT, FMT = 9994 ) +* +* Tests performed +* + WRITE( NOUNIT, FMT = 9988 ) + END IF + NERRS = NERRS + 1 + WRITE( NOUNIT, FMT = 9990 )N, IOLDSD, JTYPE, JR, + $ RESULT( JR ) + END IF + 290 CONTINUE + 300 CONTINUE + 310 CONTINUE +* +* Summary +* + CALL SLASUM( 'SST', NOUNIT, NERRS, NTESTT ) + RETURN +* + 9999 FORMAT( ' SCHKST2STG: ', A, ' returned INFO=', I6, '.', / 9X, + $ 'N=', I6, ', JTYPE=', I6, ', ISEED=(', 3( I5, ',' ), I5, ')' ) +* + 9998 FORMAT( / 1X, A3, ' -- Real Symmetric eigenvalue problem' ) + 9997 FORMAT( ' Matrix types (see SCHKST2STG for details): ' ) +* + 9996 FORMAT( / ' Special Matrices:', + $ / ' 1=Zero matrix. ', + $ ' 5=Diagonal: clustered entries.', + $ / ' 2=Identity matrix. ', + $ ' 6=Diagonal: large, evenly spaced.', + $ / ' 3=Diagonal: evenly spaced entries. ', + $ ' 7=Diagonal: small, evenly spaced.', + $ / ' 4=Diagonal: geometr. spaced entries.' ) + 9995 FORMAT( ' Dense ', A, ' Matrices:', + $ / ' 8=Evenly spaced eigenvals. ', + $ ' 12=Small, evenly spaced eigenvals.', + $ / ' 9=Geometrically spaced eigenvals. ', + $ ' 13=Matrix with random O(1) entries.', + $ / ' 10=Clustered eigenvalues. ', + $ ' 14=Matrix with large random entries.', + $ / ' 11=Large, evenly spaced eigenvals. ', + $ ' 15=Matrix with small random entries.' ) + 9994 FORMAT( ' 16=Positive definite, evenly spaced eigenvalues', + $ / ' 17=Positive definite, geometrically spaced eigenvlaues', + $ / ' 18=Positive definite, clustered eigenvalues', + $ / ' 19=Positive definite, small evenly spaced eigenvalues', + $ / ' 20=Positive definite, large evenly spaced eigenvalues', + $ / ' 21=Diagonally dominant tridiagonal, geometrically', + $ ' spaced eigenvalues' ) +* + 9990 FORMAT( ' N=', I5, ', seed=', 4( I4, ',' ), ' type ', I2, + $ ', test(', I2, ')=', G10.3 ) +* + 9988 FORMAT( / 'Test performed: see SCHKST2STG for details.', / ) +* End of SCHKST2STG +* + END diff --git a/TESTING/EIG/sdrvsg2stg.f b/TESTING/EIG/sdrvsg2stg.f new file mode 100644 index 00000000..c39af7fd --- /dev/null +++ b/TESTING/EIG/sdrvsg2stg.f @@ -0,0 +1,1365 @@ +*> \brief \b SDRVSG2STG +* +* @generated from ddrvsg2stg.f, fortran d -> s, Sun Nov 6 13:47:49 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE SDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, +* NOUNIT, A, LDA, B, LDB, D, D2, Z, LDZ, AB, +* BB, AP, BP, WORK, NWORK, IWORK, LIWORK, +* RESULT, INFO ) +* +* IMPLICIT NONE +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDZ, LIWORK, NOUNIT, NSIZES, +* $ NTYPES, NWORK +* REAL THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER ISEED( 4 ), IWORK( * ), NN( * ) +* REAL A( LDA, * ), AB( LDA, * ), AP( * ), +* $ B( LDB, * ), BB( LDB, * ), BP( * ), D( * ), +* $ RESULT( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SDRVSG2STG checks the real symmetric generalized eigenproblem +*> drivers. +*> +*> SSYGV computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric-definite generalized +*> eigenproblem. +*> +*> SSYGVD computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric-definite generalized +*> eigenproblem using a divide and conquer algorithm. +*> +*> SSYGVX computes selected eigenvalues and, optionally, +*> eigenvectors of a real symmetric-definite generalized +*> eigenproblem. +*> +*> SSPGV computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric-definite generalized +*> eigenproblem in packed storage. +*> +*> SSPGVD computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric-definite generalized +*> eigenproblem in packed storage using a divide and +*> conquer algorithm. +*> +*> SSPGVX computes selected eigenvalues and, optionally, +*> eigenvectors of a real symmetric-definite generalized +*> eigenproblem in packed storage. +*> +*> SSBGV computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric-definite banded +*> generalized eigenproblem. +*> +*> SSBGVD computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric-definite banded +*> generalized eigenproblem using a divide and conquer +*> algorithm. +*> +*> SSBGVX computes selected eigenvalues and, optionally, +*> eigenvectors of a real symmetric-definite banded +*> generalized eigenproblem. +*> +*> When SDRVSG2STG is called, a number of matrix "sizes" ("n's") and a +*> number of matrix "types" are specified. For each size ("n") +*> and each type of matrix, one matrix A of the given type will be +*> generated; a random well-conditioned matrix B is also generated +*> and the pair (A,B) is used to test the drivers. +*> +*> For each pair (A,B), the following tests are performed: +*> +*> (1) SSYGV with ITYPE = 1 and UPLO ='U': +*> +*> | A Z - B Z D | / ( |A| |Z| n ulp ) +*> | D - D2 | / ( |D| ulp ) where D is computed by +*> SSYGV and D2 is computed by +*> SSYGV_2STAGE. This test is +*> only performed for SSYGV +*> +*> (2) as (1) but calling SSPGV +*> (3) as (1) but calling SSBGV +*> (4) as (1) but with UPLO = 'L' +*> (5) as (4) but calling SSPGV +*> (6) as (4) but calling SSBGV +*> +*> (7) SSYGV with ITYPE = 2 and UPLO ='U': +*> +*> | A B Z - Z D | / ( |A| |Z| n ulp ) +*> +*> (8) as (7) but calling SSPGV +*> (9) as (7) but with UPLO = 'L' +*> (10) as (9) but calling SSPGV +*> +*> (11) SSYGV with ITYPE = 3 and UPLO ='U': +*> +*> | B A Z - Z D | / ( |A| |Z| n ulp ) +*> +*> (12) as (11) but calling SSPGV +*> (13) as (11) but with UPLO = 'L' +*> (14) as (13) but calling SSPGV +*> +*> SSYGVD, SSPGVD and SSBGVD performed the same 14 tests. +*> +*> SSYGVX, SSPGVX and SSBGVX performed the above 14 tests with +*> the parameter RANGE = 'A', 'N' and 'I', respectively. +*> +*> The "sizes" are specified by an array NN(1:NSIZES); the value +*> of each element NN(j) specifies one size. +*> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); +*> if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. +*> This type is used for the matrix A which has half-bandwidth KA. +*> B is generated as a well-conditioned positive definite matrix +*> with half-bandwidth KB (<= KA). +*> Currently, the list of possible types for A is: +*> +*> (1) The zero matrix. +*> (2) The identity matrix. +*> +*> (3) A diagonal matrix with evenly spaced entries +*> 1, ..., ULP and random signs. +*> (ULP = (first number larger than 1) - 1 ) +*> (4) A diagonal matrix with geometrically spaced entries +*> 1, ..., ULP and random signs. +*> (5) A diagonal matrix with "clustered" entries +*> 1, ULP, ..., ULP and random signs. +*> +*> (6) Same as (4), but multiplied by SQRT( overflow threshold ) +*> (7) Same as (4), but multiplied by SQRT( underflow threshold ) +*> +*> (8) A matrix of the form U* D U, where U is orthogonal and +*> D has evenly spaced entries 1, ..., ULP with random signs +*> on the diagonal. +*> +*> (9) A matrix of the form U* D U, where U is orthogonal and +*> D has geometrically spaced entries 1, ..., ULP with random +*> signs on the diagonal. +*> +*> (10) A matrix of the form U* D U, where U is orthogonal and +*> D has "clustered" entries 1, ULP,..., ULP with random +*> signs on the diagonal. +*> +*> (11) Same as (8), but multiplied by SQRT( overflow threshold ) +*> (12) Same as (8), but multiplied by SQRT( underflow threshold ) +*> +*> (13) symmetric matrix with random entries chosen from (-1,1). +*> (14) Same as (13), but multiplied by SQRT( overflow threshold ) +*> (15) Same as (13), but multiplied by SQRT( underflow threshold) +*> +*> (16) Same as (8), but with KA = 1 and KB = 1 +*> (17) Same as (8), but with KA = 2 and KB = 1 +*> (18) Same as (8), but with KA = 2 and KB = 2 +*> (19) Same as (8), but with KA = 3 and KB = 1 +*> (20) Same as (8), but with KA = 3 and KB = 2 +*> (21) Same as (8), but with KA = 3 and KB = 3 +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> NSIZES INTEGER +*> The number of sizes of matrices to use. If it is zero, +*> SDRVSG2STG does nothing. It must be at least zero. +*> Not modified. +*> +*> NN INTEGER array, dimension (NSIZES) +*> An array containing the sizes to be used for the matrices. +*> Zero values will be skipped. The values must be at least +*> zero. +*> Not modified. +*> +*> NTYPES INTEGER +*> The number of elements in DOTYPE. If it is zero, SDRVSG2STG +*> does nothing. It must be at least zero. If it is MAXTYP+1 +*> and NSIZES is 1, then an additional type, MAXTYP+1 is +*> defined, which is to use whatever matrix is in A. This +*> is only useful if DOTYPE(1:MAXTYP) is .FALSE. and +*> DOTYPE(MAXTYP+1) is .TRUE. . +*> Not modified. +*> +*> DOTYPE LOGICAL array, dimension (NTYPES) +*> If DOTYPE(j) is .TRUE., then for each size in NN a +*> matrix of that size and of type j will be generated. +*> If NTYPES is smaller than the maximum number of types +*> defined (PARAMETER MAXTYP), then types NTYPES+1 through +*> MAXTYP will not be generated. If NTYPES is larger +*> than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) +*> will be ignored. +*> Not modified. +*> +*> ISEED INTEGER array, dimension (4) +*> On entry ISEED specifies the seed of the random number +*> generator. The array elements should be between 0 and 4095; +*> if not they will be reduced mod 4096. Also, ISEED(4) must +*> be odd. The random number generator uses a linear +*> congruential sequence limited to small integers, and so +*> should produce machine independent random numbers. The +*> values of ISEED are changed on exit, and can be used in the +*> next call to SDRVSG2STG to continue the same random number +*> sequence. +*> Modified. +*> +*> THRESH REAL +*> A test will count as "failed" if the "error", computed as +*> described above, exceeds THRESH. Note that the error +*> is scaled to be O(1), so THRESH should be a reasonably +*> small multiple of 1, e.g., 10 or 100. In particular, +*> it should not depend on the precision (single vs. real) +*> or the size of the matrix. It must be at least zero. +*> Not modified. +*> +*> NOUNIT INTEGER +*> The FORTRAN unit number for printing out error messages +*> (e.g., if a routine returns IINFO not equal to 0.) +*> Not modified. +*> +*> A REAL array, dimension (LDA , max(NN)) +*> Used to hold the matrix whose eigenvalues are to be +*> computed. On exit, A contains the last matrix actually +*> used. +*> Modified. +*> +*> LDA INTEGER +*> The leading dimension of A and AB. It must be at +*> least 1 and at least max( NN ). +*> Not modified. +*> +*> B REAL array, dimension (LDB , max(NN)) +*> Used to hold the symmetric positive definite matrix for +*> the generailzed problem. +*> On exit, B contains the last matrix actually +*> used. +*> Modified. +*> +*> LDB INTEGER +*> The leading dimension of B and BB. It must be at +*> least 1 and at least max( NN ). +*> Not modified. +*> +*> D REAL array, dimension (max(NN)) +*> The eigenvalues of A. On exit, the eigenvalues in D +*> correspond with the matrix in A. +*> Modified. +*> +*> Z REAL array, dimension (LDZ, max(NN)) +*> The matrix of eigenvectors. +*> Modified. +*> +*> LDZ INTEGER +*> The leading dimension of Z. It must be at least 1 and +*> at least max( NN ). +*> Not modified. +*> +*> AB REAL array, dimension (LDA, max(NN)) +*> Workspace. +*> Modified. +*> +*> BB REAL array, dimension (LDB, max(NN)) +*> Workspace. +*> Modified. +*> +*> AP REAL array, dimension (max(NN)**2) +*> Workspace. +*> Modified. +*> +*> BP REAL array, dimension (max(NN)**2) +*> Workspace. +*> Modified. +*> +*> WORK REAL array, dimension (NWORK) +*> Workspace. +*> Modified. +*> +*> NWORK INTEGER +*> The number of entries in WORK. This must be at least +*> 1+5*N+2*N*lg(N)+3*N**2 where N = max( NN(j) ) and +*> lg( N ) = smallest integer k such that 2**k >= N. +*> Not modified. +*> +*> IWORK INTEGER array, dimension (LIWORK) +*> Workspace. +*> Modified. +*> +*> LIWORK INTEGER +*> The number of entries in WORK. This must be at least 6*N. +*> Not modified. +*> +*> RESULT REAL array, dimension (70) +*> The values computed by the 70 tests described above. +*> Modified. +*> +*> INFO INTEGER +*> If 0, then everything ran OK. +*> -1: NSIZES < 0 +*> -2: Some NN(j) < 0 +*> -3: NTYPES < 0 +*> -5: THRESH < 0 +*> -9: LDA < 1 or LDA < NMAX, where NMAX is max( NN(j) ). +*> -16: LDZ < 1 or LDZ < NMAX. +*> -21: NWORK too small. +*> -23: LIWORK too small. +*> If SLATMR, SLATMS, SSYGV, SSPGV, SSBGV, SSYGVD, SSPGVD, +*> SSBGVD, SSYGVX, SSPGVX or SSBGVX returns an error code, +*> the absolute value of it is returned. +*> Modified. +*> +*> ---------------------------------------------------------------------- +*> +*> Some Local Variables and Parameters: +*> ---- ----- --------- --- ---------- +*> ZERO, ONE Real 0 and 1. +*> MAXTYP The number of types defined. +*> NTEST The number of tests that have been run +*> on this matrix. +*> NTESTT The total number of tests for this call. +*> NMAX Largest value in NN. +*> NMATS The number of matrices generated so far. +*> NERRS The number of tests which have exceeded THRESH +*> so far (computed by SLAFTS). +*> COND, IMODE Values to be passed to the matrix generators. +*> ANORM Norm of A; passed to matrix generators. +*> +*> OVFL, UNFL Overflow and underflow thresholds. +*> ULP, ULPINV Finest relative precision and its inverse. +*> RTOVFL, RTUNFL Square roots of the previous 2 values. +*> The following four arrays decode JTYPE: +*> KTYPE(j) The general type (1-10) for type "j". +*> KMODE(j) The MODE value to be passed to the matrix +*> generator for type "j". +*> KMAGN(j) The order of magnitude ( O(1), +*> O(overflow^(1/2) ), O(underflow^(1/2) ) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup real_eig +* +* ===================================================================== + SUBROUTINE SDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, + $ NOUNIT, A, LDA, B, LDB, D, D2, Z, LDZ, AB, + $ BB, AP, BP, WORK, NWORK, IWORK, LIWORK, + $ RESULT, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDZ, LIWORK, NOUNIT, NSIZES, + $ NTYPES, NWORK + REAL THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER ISEED( 4 ), IWORK( * ), NN( * ) + REAL A( LDA, * ), AB( LDA, * ), AP( * ), + $ B( LDB, * ), BB( LDB, * ), BP( * ), D( * ), + $ D2( * ), RESULT( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TEN + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TEN = 10.0E0 ) + INTEGER MAXTYP + PARAMETER ( MAXTYP = 21 ) +* .. +* .. Local Scalars .. + LOGICAL BADNN + CHARACTER UPLO + INTEGER I, IBTYPE, IBUPLO, IINFO, IJ, IL, IMODE, ITEMP, + $ ITYPE, IU, J, JCOL, JSIZE, JTYPE, KA, KA9, KB, + $ KB9, M, MTYPES, N, NERRS, NMATS, NMAX, NTEST, + $ NTESTT + REAL ABSTOL, ANINV, ANORM, COND, OVFL, RTOVFL, + $ RTUNFL, ULP, ULPINV, UNFL, VL, VU, TEMP1, TEMP2 +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ), IOLDSD( 4 ), ISEED2( 4 ), + $ KMAGN( MAXTYP ), KMODE( MAXTYP ), + $ KTYPE( MAXTYP ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLARND + EXTERNAL LSAME, SLAMCH, SLARND +* .. +* .. External Subroutines .. + EXTERNAL SLABAD, SLACPY, SLAFTS, SLASET, SLASUM, SLATMR, + $ SLATMS, SSBGV, SSBGVD, SSBGVX, SSGT01, SSPGV, + $ SSPGVD, SSPGVX, SSYGV, SSYGVD, SSYGVX, XERBLA, + $ SSYGV_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, MAX, MIN, SQRT +* .. +* .. Data statements .. + DATA KTYPE / 1, 2, 5*4, 5*5, 3*8, 6*9 / + DATA KMAGN / 2*1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, + $ 2, 3, 6*1 / + DATA KMODE / 2*0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, + $ 0, 0, 6*4 / +* .. +* .. Executable Statements .. +* +* 1) Check for errors +* + NTESTT = 0 + INFO = 0 +* + BADNN = .FALSE. + NMAX = 0 + DO 10 J = 1, NSIZES + NMAX = MAX( NMAX, NN( J ) ) + IF( NN( J ).LT.0 ) + $ BADNN = .TRUE. + 10 CONTINUE +* +* Check for errors +* + IF( NSIZES.LT.0 ) THEN + INFO = -1 + ELSE IF( BADNN ) THEN + INFO = -2 + ELSE IF( NTYPES.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LE.1 .OR. LDA.LT.NMAX ) THEN + INFO = -9 + ELSE IF( LDZ.LE.1 .OR. LDZ.LT.NMAX ) THEN + INFO = -16 + ELSE IF( 2*MAX( NMAX, 3 )**2.GT.NWORK ) THEN + INFO = -21 + ELSE IF( 2*MAX( NMAX, 3 )**2.GT.LIWORK ) THEN + INFO = -23 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SDRVSG2STG', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( NSIZES.EQ.0 .OR. NTYPES.EQ.0 ) + $ RETURN +* +* More Important constants +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = SLAMCH( 'Overflow' ) + CALL SLABAD( UNFL, OVFL ) + ULP = SLAMCH( 'Epsilon' )*SLAMCH( 'Base' ) + ULPINV = ONE / ULP + RTUNFL = SQRT( UNFL ) + RTOVFL = SQRT( OVFL ) +* + DO 20 I = 1, 4 + ISEED2( I ) = ISEED( I ) + 20 CONTINUE +* +* Loop over sizes, types +* + NERRS = 0 + NMATS = 0 +* + DO 650 JSIZE = 1, NSIZES + N = NN( JSIZE ) + ANINV = ONE / REAL( MAX( 1, N ) ) +* + IF( NSIZES.NE.1 ) THEN + MTYPES = MIN( MAXTYP, NTYPES ) + ELSE + MTYPES = MIN( MAXTYP+1, NTYPES ) + END IF +* + KA9 = 0 + KB9 = 0 + DO 640 JTYPE = 1, MTYPES + IF( .NOT.DOTYPE( JTYPE ) ) + $ GO TO 640 + NMATS = NMATS + 1 + NTEST = 0 +* + DO 30 J = 1, 4 + IOLDSD( J ) = ISEED( J ) + 30 CONTINUE +* +* 2) Compute "A" +* +* Control parameters: +* +* KMAGN KMODE KTYPE +* =1 O(1) clustered 1 zero +* =2 large clustered 2 identity +* =3 small exponential (none) +* =4 arithmetic diagonal, w/ eigenvalues +* =5 random log hermitian, w/ eigenvalues +* =6 random (none) +* =7 random diagonal +* =8 random hermitian +* =9 banded, w/ eigenvalues +* + IF( MTYPES.GT.MAXTYP ) + $ GO TO 90 +* + ITYPE = KTYPE( JTYPE ) + IMODE = KMODE( JTYPE ) +* +* Compute norm +* + GO TO ( 40, 50, 60 )KMAGN( JTYPE ) +* + 40 CONTINUE + ANORM = ONE + GO TO 70 +* + 50 CONTINUE + ANORM = ( RTOVFL*ULP )*ANINV + GO TO 70 +* + 60 CONTINUE + ANORM = RTUNFL*N*ULPINV + GO TO 70 +* + 70 CONTINUE +* + IINFO = 0 + COND = ULPINV +* +* Special Matrices -- Identity & Jordan block +* + IF( ITYPE.EQ.1 ) THEN +* +* Zero +* + KA = 0 + KB = 0 + CALL SLASET( 'Full', LDA, N, ZERO, ZERO, A, LDA ) +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Identity +* + KA = 0 + KB = 0 + CALL SLASET( 'Full', LDA, N, ZERO, ZERO, A, LDA ) + DO 80 JCOL = 1, N + A( JCOL, JCOL ) = ANORM + 80 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Diagonal Matrix, [Eigen]values Specified +* + KA = 0 + KB = 0 + CALL SLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, 0, 0, 'N', A, LDA, WORK( N+1 ), + $ IINFO ) +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* symmetric, eigenvalues specified +* + KA = MAX( 0, N-1 ) + KB = KA + CALL SLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, N, N, 'N', A, LDA, WORK( N+1 ), + $ IINFO ) +* + ELSE IF( ITYPE.EQ.7 ) THEN +* +* Diagonal, random eigenvalues +* + KA = 0 + KB = 0 + CALL SLATMR( N, N, 'S', ISEED, 'S', WORK, 6, ONE, ONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, 0, 0, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.8 ) THEN +* +* symmetric, random eigenvalues +* + KA = MAX( 0, N-1 ) + KB = KA + CALL SLATMR( N, N, 'S', ISEED, 'H', WORK, 6, ONE, ONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, N, N, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.9 ) THEN +* +* symmetric banded, eigenvalues specified +* +* The following values are used for the half-bandwidths: +* +* ka = 1 kb = 1 +* ka = 2 kb = 1 +* ka = 2 kb = 2 +* ka = 3 kb = 1 +* ka = 3 kb = 2 +* ka = 3 kb = 3 +* + KB9 = KB9 + 1 + IF( KB9.GT.KA9 ) THEN + KA9 = KA9 + 1 + KB9 = 1 + END IF + KA = MAX( 0, MIN( N-1, KA9 ) ) + KB = MAX( 0, MIN( N-1, KB9 ) ) + CALL SLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, KA, KA, 'N', A, LDA, WORK( N+1 ), + $ IINFO ) +* + ELSE +* + IINFO = 1 + END IF +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'Generator', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + RETURN + END IF +* + 90 CONTINUE +* + ABSTOL = UNFL + UNFL + IF( N.LE.1 ) THEN + IL = 1 + IU = N + ELSE + IL = 1 + INT( ( N-1 )*SLARND( 1, ISEED2 ) ) + IU = 1 + INT( ( N-1 )*SLARND( 1, ISEED2 ) ) + IF( IL.GT.IU ) THEN + ITEMP = IL + IL = IU + IU = ITEMP + END IF + END IF +* +* 3) Call SSYGV, SSPGV, SSBGV, SSYGVD, SSPGVD, SSBGVD, +* SSYGVX, SSPGVX, and SSBGVX, do tests. +* +* loop over the three generalized problems +* IBTYPE = 1: A*x = (lambda)*B*x +* IBTYPE = 2: A*B*x = (lambda)*x +* IBTYPE = 3: B*A*x = (lambda)*x +* + DO 630 IBTYPE = 1, 3 +* +* loop over the setting UPLO +* + DO 620 IBUPLO = 1, 2 + IF( IBUPLO.EQ.1 ) + $ UPLO = 'U' + IF( IBUPLO.EQ.2 ) + $ UPLO = 'L' +* +* Generate random well-conditioned positive definite +* matrix B, of bandwidth not greater than that of A. +* + CALL SLATMS( N, N, 'U', ISEED, 'P', WORK, 5, TEN, ONE, + $ KB, KB, UPLO, B, LDB, WORK( N+1 ), + $ IINFO ) +* +* Test SSYGV +* + NTEST = NTEST + 1 +* + CALL SLACPY( ' ', N, N, A, LDA, Z, LDZ ) + CALL SLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL SSYGV( IBTYPE, 'V', UPLO, N, Z, LDZ, BB, LDB, D, + $ WORK, NWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSYGV(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL SSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* +* Test SSYGV_2STAGE +* + NTEST = NTEST + 1 +* + CALL SLACPY( ' ', N, N, A, LDA, Z, LDZ ) + CALL SLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL SSYGV_2STAGE( IBTYPE, 'N', UPLO, N, Z, LDZ, + $ BB, LDB, D2, WORK, NWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'SSYGV_2STAGE(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* +C CALL SSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, +C $ LDZ, D, WORK, RESULT( NTEST ) ) +* +* +* Do Tests | D1 - D2 | / ( |D1| ulp ) +* D1 computed using the standard 1-stage reduction as reference +* D2 computed using the 2-stage reduction +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 151 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D( J ) ), + $ ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D( J )-D2( J ) ) ) + 151 CONTINUE +* + RESULT( NTEST ) = TEMP2 / + $ MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) +* +* Test SSYGVD +* + NTEST = NTEST + 1 +* + CALL SLACPY( ' ', N, N, A, LDA, Z, LDZ ) + CALL SLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL SSYGVD( IBTYPE, 'V', UPLO, N, Z, LDZ, BB, LDB, D, + $ WORK, NWORK, IWORK, LIWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSYGVD(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL SSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* +* Test SSYGVX +* + NTEST = NTEST + 1 +* + CALL SLACPY( ' ', N, N, A, LDA, AB, LDA ) + CALL SLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL SSYGVX( IBTYPE, 'V', 'A', UPLO, N, AB, LDA, BB, + $ LDB, VL, VU, IL, IU, ABSTOL, M, D, Z, + $ LDZ, WORK, NWORK, IWORK( N+1 ), IWORK, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSYGVX(V,A' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL SSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* + CALL SLACPY( ' ', N, N, A, LDA, AB, LDA ) + CALL SLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* +* since we do not know the exact eigenvalues of this +* eigenpair, we just set VL and VU as constants. +* It is quite possible that there are no eigenvalues +* in this interval. +* + VL = ZERO + VU = ANORM + CALL SSYGVX( IBTYPE, 'V', 'V', UPLO, N, AB, LDA, BB, + $ LDB, VL, VU, IL, IU, ABSTOL, M, D, Z, + $ LDZ, WORK, NWORK, IWORK( N+1 ), IWORK, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSYGVX(V,V,' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL SSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* + CALL SLACPY( ' ', N, N, A, LDA, AB, LDA ) + CALL SLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL SSYGVX( IBTYPE, 'V', 'I', UPLO, N, AB, LDA, BB, + $ LDB, VL, VU, IL, IU, ABSTOL, M, D, Z, + $ LDZ, WORK, NWORK, IWORK( N+1 ), IWORK, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSYGVX(V,I,' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL SSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* + 100 CONTINUE +* +* Test SSPGV +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 120 J = 1, N + DO 110 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 110 CONTINUE + 120 CONTINUE + ELSE + IJ = 1 + DO 140 J = 1, N + DO 130 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 130 CONTINUE + 140 CONTINUE + END IF +* + CALL SSPGV( IBTYPE, 'V', UPLO, N, AP, BP, D, Z, LDZ, + $ WORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSPGV(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL SSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* +* Test SSPGVD +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 160 J = 1, N + DO 150 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 150 CONTINUE + 160 CONTINUE + ELSE + IJ = 1 + DO 180 J = 1, N + DO 170 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 170 CONTINUE + 180 CONTINUE + END IF +* + CALL SSPGVD( IBTYPE, 'V', UPLO, N, AP, BP, D, Z, LDZ, + $ WORK, NWORK, IWORK, LIWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSPGVD(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL SSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* +* Test SSPGVX +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 200 J = 1, N + DO 190 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 190 CONTINUE + 200 CONTINUE + ELSE + IJ = 1 + DO 220 J = 1, N + DO 210 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 210 CONTINUE + 220 CONTINUE + END IF +* + CALL SSPGVX( IBTYPE, 'V', 'A', UPLO, N, AP, BP, VL, + $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, + $ IWORK( N+1 ), IWORK, INFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSPGVX(V,A' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL SSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 240 J = 1, N + DO 230 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 230 CONTINUE + 240 CONTINUE + ELSE + IJ = 1 + DO 260 J = 1, N + DO 250 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 250 CONTINUE + 260 CONTINUE + END IF +* + VL = ZERO + VU = ANORM + CALL SSPGVX( IBTYPE, 'V', 'V', UPLO, N, AP, BP, VL, + $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, + $ IWORK( N+1 ), IWORK, INFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSPGVX(V,V' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL SSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 280 J = 1, N + DO 270 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 270 CONTINUE + 280 CONTINUE + ELSE + IJ = 1 + DO 300 J = 1, N + DO 290 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 290 CONTINUE + 300 CONTINUE + END IF +* + CALL SSPGVX( IBTYPE, 'V', 'I', UPLO, N, AP, BP, VL, + $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, + $ IWORK( N+1 ), IWORK, INFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSPGVX(V,I' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL SSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* + 310 CONTINUE +* + IF( IBTYPE.EQ.1 ) THEN +* +* TEST SSBGV +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 340 J = 1, N + DO 320 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 320 CONTINUE + DO 330 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 330 CONTINUE + 340 CONTINUE + ELSE + DO 370 J = 1, N + DO 350 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 350 CONTINUE + DO 360 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 360 CONTINUE + 370 CONTINUE + END IF +* + CALL SSBGV( 'V', UPLO, N, KA, KB, AB, LDA, BB, LDB, + $ D, Z, LDZ, WORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSBGV(V,' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL SSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* +* TEST SSBGVD +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 400 J = 1, N + DO 380 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 380 CONTINUE + DO 390 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 390 CONTINUE + 400 CONTINUE + ELSE + DO 430 J = 1, N + DO 410 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 410 CONTINUE + DO 420 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 420 CONTINUE + 430 CONTINUE + END IF +* + CALL SSBGVD( 'V', UPLO, N, KA, KB, AB, LDA, BB, + $ LDB, D, Z, LDZ, WORK, NWORK, IWORK, + $ LIWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSBGVD(V,' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL SSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* +* Test SSBGVX +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 460 J = 1, N + DO 440 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 440 CONTINUE + DO 450 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 450 CONTINUE + 460 CONTINUE + ELSE + DO 490 J = 1, N + DO 470 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 470 CONTINUE + DO 480 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 480 CONTINUE + 490 CONTINUE + END IF +* + CALL SSBGVX( 'V', 'A', UPLO, N, KA, KB, AB, LDA, + $ BB, LDB, BP, MAX( 1, N ), VL, VU, IL, + $ IU, ABSTOL, M, D, Z, LDZ, WORK, + $ IWORK( N+1 ), IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSBGVX(V,A' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL SSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 520 J = 1, N + DO 500 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 500 CONTINUE + DO 510 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 510 CONTINUE + 520 CONTINUE + ELSE + DO 550 J = 1, N + DO 530 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 530 CONTINUE + DO 540 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 540 CONTINUE + 550 CONTINUE + END IF +* + VL = ZERO + VU = ANORM + CALL SSBGVX( 'V', 'V', UPLO, N, KA, KB, AB, LDA, + $ BB, LDB, BP, MAX( 1, N ), VL, VU, IL, + $ IU, ABSTOL, M, D, Z, LDZ, WORK, + $ IWORK( N+1 ), IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSBGVX(V,V' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL SSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 580 J = 1, N + DO 560 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 560 CONTINUE + DO 570 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 570 CONTINUE + 580 CONTINUE + ELSE + DO 610 J = 1, N + DO 590 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 590 CONTINUE + DO 600 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 600 CONTINUE + 610 CONTINUE + END IF +* + CALL SSBGVX( 'V', 'I', UPLO, N, KA, KB, AB, LDA, + $ BB, LDB, BP, MAX( 1, N ), VL, VU, IL, + $ IU, ABSTOL, M, D, Z, LDZ, WORK, + $ IWORK( N+1 ), IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSBGVX(V,I' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL SSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RESULT( NTEST ) ) +* + END IF +* + 620 CONTINUE + 630 CONTINUE +* +* End of Loop -- Check for RESULT(j) > THRESH +* + NTESTT = NTESTT + NTEST + CALL SLAFTS( 'SSG', N, N, JTYPE, NTEST, RESULT, IOLDSD, + $ THRESH, NOUNIT, NERRS ) + 640 CONTINUE + 650 CONTINUE +* +* Summary +* + CALL SLASUM( 'SSG', NOUNIT, NERRS, NTESTT ) +* + RETURN +* +* End of SDRVSG2STG +* + 9999 FORMAT( ' SDRVSG2STG: ', A, ' returned INFO=', I6, '.', / 9X, + $ 'N=', I6, ', JTYPE=', I6, ', ISEED=(', 3( I5, ',' ), I5, ')' ) + END diff --git a/TESTING/EIG/sdrvst2stg.f b/TESTING/EIG/sdrvst2stg.f new file mode 100644 index 00000000..727706a8 --- /dev/null +++ b/TESTING/EIG/sdrvst2stg.f @@ -0,0 +1,2874 @@ +*> \brief \b SDRVST2STG +* +* @generated from ddrvst2stg.f, fortran d -> s, Sun Nov 6 00:06:01 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE SDRVST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, +* NOUNIT, A, LDA, D1, D2, D3, D4, EVEIGS, WA1, +* WA2, WA3, U, LDU, V, TAU, Z, WORK, LWORK, +* IWORK, LIWORK, RESULT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDU, LIWORK, LWORK, NOUNIT, NSIZES, +* $ NTYPES +* REAL THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER ISEED( 4 ), IWORK( * ), NN( * ) +* REAL A( LDA, * ), D1( * ), D2( * ), D3( * ), +* $ D4( * ), EVEIGS( * ), RESULT( * ), TAU( * ), +* $ U( LDU, * ), V( LDU, * ), WA1( * ), WA2( * ), +* $ WA3( * ), WORK( * ), Z( LDU, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SDRVST2STG checks the symmetric eigenvalue problem drivers. +*> +*> SSTEV computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric tridiagonal matrix. +*> +*> SSTEVX computes selected eigenvalues and, optionally, +*> eigenvectors of a real symmetric tridiagonal matrix. +*> +*> SSTEVR computes selected eigenvalues and, optionally, +*> eigenvectors of a real symmetric tridiagonal matrix +*> using the Relatively Robust Representation where it can. +*> +*> SSYEV computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric matrix. +*> +*> SSYEVX computes selected eigenvalues and, optionally, +*> eigenvectors of a real symmetric matrix. +*> +*> SSYEVR computes selected eigenvalues and, optionally, +*> eigenvectors of a real symmetric matrix +*> using the Relatively Robust Representation where it can. +*> +*> SSPEV computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric matrix in packed +*> storage. +*> +*> SSPEVX computes selected eigenvalues and, optionally, +*> eigenvectors of a real symmetric matrix in packed +*> storage. +*> +*> SSBEV computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric band matrix. +*> +*> SSBEVX computes selected eigenvalues and, optionally, +*> eigenvectors of a real symmetric band matrix. +*> +*> SSYEVD computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric matrix using +*> a divide and conquer algorithm. +*> +*> SSPEVD computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric matrix in packed +*> storage, using a divide and conquer algorithm. +*> +*> SSBEVD computes all eigenvalues and, optionally, +*> eigenvectors of a real symmetric band matrix, +*> using a divide and conquer algorithm. +*> +*> When SDRVST2STG is called, a number of matrix "sizes" ("n's") and a +*> number of matrix "types" are specified. For each size ("n") +*> and each type of matrix, one matrix will be generated and used +*> to test the appropriate drivers. For each matrix and each +*> driver routine called, the following tests will be performed: +*> +*> (1) | A - Z D Z' | / ( |A| n ulp ) +*> +*> (2) | I - Z Z' | / ( n ulp ) +*> +*> (3) | D1 - D2 | / ( |D1| ulp ) +*> +*> where Z is the matrix of eigenvectors returned when the +*> eigenvector option is given and D1 and D2 are the eigenvalues +*> returned with and without the eigenvector option. +*> +*> The "sizes" are specified by an array NN(1:NSIZES); the value of +*> each element NN(j) specifies one size. +*> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); +*> if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. +*> Currently, the list of possible types is: +*> +*> (1) The zero matrix. +*> (2) The identity matrix. +*> +*> (3) A diagonal matrix with evenly spaced eigenvalues +*> 1, ..., ULP and random signs. +*> (ULP = (first number larger than 1) - 1 ) +*> (4) A diagonal matrix with geometrically spaced eigenvalues +*> 1, ..., ULP and random signs. +*> (5) A diagonal matrix with "clustered" eigenvalues +*> 1, ULP, ..., ULP and random signs. +*> +*> (6) Same as (4), but multiplied by SQRT( overflow threshold ) +*> (7) Same as (4), but multiplied by SQRT( underflow threshold ) +*> +*> (8) A matrix of the form U' D U, where U is orthogonal and +*> D has evenly spaced entries 1, ..., ULP with random signs +*> on the diagonal. +*> +*> (9) A matrix of the form U' D U, where U is orthogonal and +*> D has geometrically spaced entries 1, ..., ULP with random +*> signs on the diagonal. +*> +*> (10) A matrix of the form U' D U, where U is orthogonal and +*> D has "clustered" entries 1, ULP,..., ULP with random +*> signs on the diagonal. +*> +*> (11) Same as (8), but multiplied by SQRT( overflow threshold ) +*> (12) Same as (8), but multiplied by SQRT( underflow threshold ) +*> +*> (13) Symmetric matrix with random entries chosen from (-1,1). +*> (14) Same as (13), but multiplied by SQRT( overflow threshold ) +*> (15) Same as (13), but multiplied by SQRT( underflow threshold ) +*> (16) A band matrix with half bandwidth randomly chosen between +*> 0 and N-1, with evenly spaced eigenvalues 1, ..., ULP +*> with random signs. +*> (17) Same as (16), but multiplied by SQRT( overflow threshold ) +*> (18) Same as (16), but multiplied by SQRT( underflow threshold ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> NSIZES INTEGER +*> The number of sizes of matrices to use. If it is zero, +*> SDRVST2STG does nothing. It must be at least zero. +*> Not modified. +*> +*> NN INTEGER array, dimension (NSIZES) +*> An array containing the sizes to be used for the matrices. +*> Zero values will be skipped. The values must be at least +*> zero. +*> Not modified. +*> +*> NTYPES INTEGER +*> The number of elements in DOTYPE. If it is zero, SDRVST2STG +*> does nothing. It must be at least zero. If it is MAXTYP+1 +*> and NSIZES is 1, then an additional type, MAXTYP+1 is +*> defined, which is to use whatever matrix is in A. This +*> is only useful if DOTYPE(1:MAXTYP) is .FALSE. and +*> DOTYPE(MAXTYP+1) is .TRUE. . +*> Not modified. +*> +*> DOTYPE LOGICAL array, dimension (NTYPES) +*> If DOTYPE(j) is .TRUE., then for each size in NN a +*> matrix of that size and of type j will be generated. +*> If NTYPES is smaller than the maximum number of types +*> defined (PARAMETER MAXTYP), then types NTYPES+1 through +*> MAXTYP will not be generated. If NTYPES is larger +*> than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) +*> will be ignored. +*> Not modified. +*> +*> ISEED INTEGER array, dimension (4) +*> On entry ISEED specifies the seed of the random number +*> generator. The array elements should be between 0 and 4095; +*> if not they will be reduced mod 4096. Also, ISEED(4) must +*> be odd. The random number generator uses a linear +*> congruential sequence limited to small integers, and so +*> should produce machine independent random numbers. The +*> values of ISEED are changed on exit, and can be used in the +*> next call to SDRVST2STG to continue the same random number +*> sequence. +*> Modified. +*> +*> THRESH REAL +*> A test will count as "failed" if the "error", computed as +*> described above, exceeds THRESH. Note that the error +*> is scaled to be O(1), so THRESH should be a reasonably +*> small multiple of 1, e.g., 10 or 100. In particular, +*> it should not depend on the precision (single vs. double) +*> or the size of the matrix. It must be at least zero. +*> Not modified. +*> +*> NOUNIT INTEGER +*> The FORTRAN unit number for printing out error messages +*> (e.g., if a routine returns IINFO not equal to 0.) +*> Not modified. +*> +*> A REAL array, dimension (LDA , max(NN)) +*> Used to hold the matrix whose eigenvalues are to be +*> computed. On exit, A contains the last matrix actually +*> used. +*> Modified. +*> +*> LDA INTEGER +*> The leading dimension of A. It must be at +*> least 1 and at least max( NN ). +*> Not modified. +*> +*> D1 REAL array, dimension (max(NN)) +*> The eigenvalues of A, as computed by SSTEQR simlutaneously +*> with Z. On exit, the eigenvalues in D1 correspond with the +*> matrix in A. +*> Modified. +*> +*> D2 REAL array, dimension (max(NN)) +*> The eigenvalues of A, as computed by SSTEQR if Z is not +*> computed. On exit, the eigenvalues in D2 correspond with +*> the matrix in A. +*> Modified. +*> +*> D3 REAL array, dimension (max(NN)) +*> The eigenvalues of A, as computed by SSTERF. On exit, the +*> eigenvalues in D3 correspond with the matrix in A. +*> Modified. +*> +*> D4 REAL array, dimension +*> +*> EVEIGS REAL array, dimension (max(NN)) +*> The eigenvalues as computed by SSTEV('N', ... ) +*> (I reserve the right to change this to the output of +*> whichever algorithm computes the most accurate eigenvalues). +*> +*> WA1 REAL array, dimension +*> +*> WA2 REAL array, dimension +*> +*> WA3 REAL array, dimension +*> +*> U REAL array, dimension (LDU, max(NN)) +*> The orthogonal matrix computed by SSYTRD + SORGTR. +*> Modified. +*> +*> LDU INTEGER +*> The leading dimension of U, Z, and V. It must be at +*> least 1 and at least max( NN ). +*> Not modified. +*> +*> V REAL array, dimension (LDU, max(NN)) +*> The Housholder vectors computed by SSYTRD in reducing A to +*> tridiagonal form. +*> Modified. +*> +*> TAU REAL array, dimension (max(NN)) +*> The Householder factors computed by SSYTRD in reducing A +*> to tridiagonal form. +*> Modified. +*> +*> Z REAL array, dimension (LDU, max(NN)) +*> The orthogonal matrix of eigenvectors computed by SSTEQR, +*> SPTEQR, and SSTEIN. +*> Modified. +*> +*> WORK REAL array, dimension (LWORK) +*> Workspace. +*> Modified. +*> +*> LWORK INTEGER +*> The number of entries in WORK. This must be at least +*> 1 + 4 * Nmax + 2 * Nmax * lg Nmax + 4 * Nmax**2 +*> where Nmax = max( NN(j), 2 ) and lg = log base 2. +*> Not modified. +*> +*> IWORK INTEGER array, +*> dimension (6 + 6*Nmax + 5 * Nmax * lg Nmax ) +*> where Nmax = max( NN(j), 2 ) and lg = log base 2. +*> Workspace. +*> Modified. +*> +*> RESULT REAL array, dimension (105) +*> The values computed by the tests described above. +*> The values are currently limited to 1/ulp, to avoid +*> overflow. +*> Modified. +*> +*> INFO INTEGER +*> If 0, then everything ran OK. +*> -1: NSIZES < 0 +*> -2: Some NN(j) < 0 +*> -3: NTYPES < 0 +*> -5: THRESH < 0 +*> -9: LDA < 1 or LDA < NMAX, where NMAX is max( NN(j) ). +*> -16: LDU < 1 or LDU < NMAX. +*> -21: LWORK too small. +*> If SLATMR, SLATMS, SSYTRD, SORGTR, SSTEQR, SSTERF, +*> or SORMTR returns an error code, the +*> absolute value of it is returned. +*> Modified. +*> +*>----------------------------------------------------------------------- +*> +*> Some Local Variables and Parameters: +*> ---- ----- --------- --- ---------- +*> ZERO, ONE Real 0 and 1. +*> MAXTYP The number of types defined. +*> NTEST The number of tests performed, or which can +*> be performed so far, for the current matrix. +*> NTESTT The total number of tests performed so far. +*> NMAX Largest value in NN. +*> NMATS The number of matrices generated so far. +*> NERRS The number of tests which have exceeded THRESH +*> so far (computed by SLAFTS). +*> COND, IMODE Values to be passed to the matrix generators. +*> ANORM Norm of A; passed to matrix generators. +*> +*> OVFL, UNFL Overflow and underflow thresholds. +*> ULP, ULPINV Finest relative precision and its inverse. +*> RTOVFL, RTUNFL Square roots of the previous 2 values. +*> The following four arrays decode JTYPE: +*> KTYPE(j) The general type (1-10) for type "j". +*> KMODE(j) The MODE value to be passed to the matrix +*> generator for type "j". +*> KMAGN(j) The order of magnitude ( O(1), +*> O(overflow^(1/2) ), O(underflow^(1/2) ) +*> +*> The tests performed are: Routine tested +*> 1= | A - U S U' | / ( |A| n ulp ) SSTEV('V', ... ) +*> 2= | I - U U' | / ( n ulp ) SSTEV('V', ... ) +*> 3= |D(with Z) - D(w/o Z)| / (|D| ulp) SSTEV('N', ... ) +*> 4= | A - U S U' | / ( |A| n ulp ) SSTEVX('V','A', ... ) +*> 5= | I - U U' | / ( n ulp ) SSTEVX('V','A', ... ) +*> 6= |D(with Z) - EVEIGS| / (|D| ulp) SSTEVX('N','A', ... ) +*> 7= | A - U S U' | / ( |A| n ulp ) SSTEVR('V','A', ... ) +*> 8= | I - U U' | / ( n ulp ) SSTEVR('V','A', ... ) +*> 9= |D(with Z) - EVEIGS| / (|D| ulp) SSTEVR('N','A', ... ) +*> 10= | A - U S U' | / ( |A| n ulp ) SSTEVX('V','I', ... ) +*> 11= | I - U U' | / ( n ulp ) SSTEVX('V','I', ... ) +*> 12= |D(with Z) - D(w/o Z)| / (|D| ulp) SSTEVX('N','I', ... ) +*> 13= | A - U S U' | / ( |A| n ulp ) SSTEVX('V','V', ... ) +*> 14= | I - U U' | / ( n ulp ) SSTEVX('V','V', ... ) +*> 15= |D(with Z) - D(w/o Z)| / (|D| ulp) SSTEVX('N','V', ... ) +*> 16= | A - U S U' | / ( |A| n ulp ) SSTEVD('V', ... ) +*> 17= | I - U U' | / ( n ulp ) SSTEVD('V', ... ) +*> 18= |D(with Z) - EVEIGS| / (|D| ulp) SSTEVD('N', ... ) +*> 19= | A - U S U' | / ( |A| n ulp ) SSTEVR('V','I', ... ) +*> 20= | I - U U' | / ( n ulp ) SSTEVR('V','I', ... ) +*> 21= |D(with Z) - D(w/o Z)| / (|D| ulp) SSTEVR('N','I', ... ) +*> 22= | A - U S U' | / ( |A| n ulp ) SSTEVR('V','V', ... ) +*> 23= | I - U U' | / ( n ulp ) SSTEVR('V','V', ... ) +*> 24= |D(with Z) - D(w/o Z)| / (|D| ulp) SSTEVR('N','V', ... ) +*> +*> 25= | A - U S U' | / ( |A| n ulp ) SSYEV('L','V', ... ) +*> 26= | I - U U' | / ( n ulp ) SSYEV('L','V', ... ) +*> 27= |D(with Z) - D(w/o Z)| / (|D| ulp) SSYEV_2STAGE('L','N', ... ) +*> 28= | A - U S U' | / ( |A| n ulp ) SSYEVX('L','V','A', ... ) +*> 29= | I - U U' | / ( n ulp ) SSYEVX('L','V','A', ... ) +*> 30= |D(with Z) - D(w/o Z)| / (|D| ulp) SSYEVX_2STAGE('L','N','A', ... ) +*> 31= | A - U S U' | / ( |A| n ulp ) SSYEVX('L','V','I', ... ) +*> 32= | I - U U' | / ( n ulp ) SSYEVX('L','V','I', ... ) +*> 33= |D(with Z) - D(w/o Z)| / (|D| ulp) SSYEVX_2STAGE('L','N','I', ... ) +*> 34= | A - U S U' | / ( |A| n ulp ) SSYEVX('L','V','V', ... ) +*> 35= | I - U U' | / ( n ulp ) SSYEVX('L','V','V', ... ) +*> 36= |D(with Z) - D(w/o Z)| / (|D| ulp) SSYEVX_2STAGE('L','N','V', ... ) +*> 37= | A - U S U' | / ( |A| n ulp ) SSPEV('L','V', ... ) +*> 38= | I - U U' | / ( n ulp ) SSPEV('L','V', ... ) +*> 39= |D(with Z) - D(w/o Z)| / (|D| ulp) SSPEV('L','N', ... ) +*> 40= | A - U S U' | / ( |A| n ulp ) SSPEVX('L','V','A', ... ) +*> 41= | I - U U' | / ( n ulp ) SSPEVX('L','V','A', ... ) +*> 42= |D(with Z) - D(w/o Z)| / (|D| ulp) SSPEVX('L','N','A', ... ) +*> 43= | A - U S U' | / ( |A| n ulp ) SSPEVX('L','V','I', ... ) +*> 44= | I - U U' | / ( n ulp ) SSPEVX('L','V','I', ... ) +*> 45= |D(with Z) - D(w/o Z)| / (|D| ulp) SSPEVX('L','N','I', ... ) +*> 46= | A - U S U' | / ( |A| n ulp ) SSPEVX('L','V','V', ... ) +*> 47= | I - U U' | / ( n ulp ) SSPEVX('L','V','V', ... ) +*> 48= |D(with Z) - D(w/o Z)| / (|D| ulp) SSPEVX('L','N','V', ... ) +*> 49= | A - U S U' | / ( |A| n ulp ) SSBEV('L','V', ... ) +*> 50= | I - U U' | / ( n ulp ) SSBEV('L','V', ... ) +*> 51= |D(with Z) - D(w/o Z)| / (|D| ulp) SSBEV_2STAGE('L','N', ... ) +*> 52= | A - U S U' | / ( |A| n ulp ) SSBEVX('L','V','A', ... ) +*> 53= | I - U U' | / ( n ulp ) SSBEVX('L','V','A', ... ) +*> 54= |D(with Z) - D(w/o Z)| / (|D| ulp) SSBEVX_2STAGE('L','N','A', ... ) +*> 55= | A - U S U' | / ( |A| n ulp ) SSBEVX('L','V','I', ... ) +*> 56= | I - U U' | / ( n ulp ) SSBEVX('L','V','I', ... ) +*> 57= |D(with Z) - D(w/o Z)| / (|D| ulp) SSBEVX_2STAGE('L','N','I', ... ) +*> 58= | A - U S U' | / ( |A| n ulp ) SSBEVX('L','V','V', ... ) +*> 59= | I - U U' | / ( n ulp ) SSBEVX('L','V','V', ... ) +*> 60= |D(with Z) - D(w/o Z)| / (|D| ulp) SSBEVX_2STAGE('L','N','V', ... ) +*> 61= | A - U S U' | / ( |A| n ulp ) SSYEVD('L','V', ... ) +*> 62= | I - U U' | / ( n ulp ) SSYEVD('L','V', ... ) +*> 63= |D(with Z) - D(w/o Z)| / (|D| ulp) SSYEVD_2STAGE('L','N', ... ) +*> 64= | A - U S U' | / ( |A| n ulp ) SSPEVD('L','V', ... ) +*> 65= | I - U U' | / ( n ulp ) SSPEVD('L','V', ... ) +*> 66= |D(with Z) - D(w/o Z)| / (|D| ulp) SSPEVD('L','N', ... ) +*> 67= | A - U S U' | / ( |A| n ulp ) SSBEVD('L','V', ... ) +*> 68= | I - U U' | / ( n ulp ) SSBEVD('L','V', ... ) +*> 69= |D(with Z) - D(w/o Z)| / (|D| ulp) SSBEVD_2STAGE('L','N', ... ) +*> 70= | A - U S U' | / ( |A| n ulp ) SSYEVR('L','V','A', ... ) +*> 71= | I - U U' | / ( n ulp ) SSYEVR('L','V','A', ... ) +*> 72= |D(with Z) - D(w/o Z)| / (|D| ulp) SSYEVR_2STAGE('L','N','A', ... ) +*> 73= | A - U S U' | / ( |A| n ulp ) SSYEVR('L','V','I', ... ) +*> 74= | I - U U' | / ( n ulp ) SSYEVR('L','V','I', ... ) +*> 75= |D(with Z) - D(w/o Z)| / (|D| ulp) SSYEVR_2STAGE('L','N','I', ... ) +*> 76= | A - U S U' | / ( |A| n ulp ) SSYEVR('L','V','V', ... ) +*> 77= | I - U U' | / ( n ulp ) SSYEVR('L','V','V', ... ) +*> 78= |D(with Z) - D(w/o Z)| / (|D| ulp) SSYEVR_2STAGE('L','N','V', ... ) +*> +*> Tests 25 through 78 are repeated (as tests 79 through 132) +*> with UPLO='U' +*> +*> To be added in 1999 +*> +*> 79= | A - U S U' | / ( |A| n ulp ) SSPEVR('L','V','A', ... ) +*> 80= | I - U U' | / ( n ulp ) SSPEVR('L','V','A', ... ) +*> 81= |D(with Z) - D(w/o Z)| / (|D| ulp) SSPEVR('L','N','A', ... ) +*> 82= | A - U S U' | / ( |A| n ulp ) SSPEVR('L','V','I', ... ) +*> 83= | I - U U' | / ( n ulp ) SSPEVR('L','V','I', ... ) +*> 84= |D(with Z) - D(w/o Z)| / (|D| ulp) SSPEVR('L','N','I', ... ) +*> 85= | A - U S U' | / ( |A| n ulp ) SSPEVR('L','V','V', ... ) +*> 86= | I - U U' | / ( n ulp ) SSPEVR('L','V','V', ... ) +*> 87= |D(with Z) - D(w/o Z)| / (|D| ulp) SSPEVR('L','N','V', ... ) +*> 88= | A - U S U' | / ( |A| n ulp ) SSBEVR('L','V','A', ... ) +*> 89= | I - U U' | / ( n ulp ) SSBEVR('L','V','A', ... ) +*> 90= |D(with Z) - D(w/o Z)| / (|D| ulp) SSBEVR('L','N','A', ... ) +*> 91= | A - U S U' | / ( |A| n ulp ) SSBEVR('L','V','I', ... ) +*> 92= | I - U U' | / ( n ulp ) SSBEVR('L','V','I', ... ) +*> 93= |D(with Z) - D(w/o Z)| / (|D| ulp) SSBEVR('L','N','I', ... ) +*> 94= | A - U S U' | / ( |A| n ulp ) SSBEVR('L','V','V', ... ) +*> 95= | I - U U' | / ( n ulp ) SSBEVR('L','V','V', ... ) +*> 96= |D(with Z) - D(w/o Z)| / (|D| ulp) SSBEVR('L','N','V', ... ) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup single_eig +* +* ===================================================================== + SUBROUTINE SDRVST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, + $ NOUNIT, A, LDA, D1, D2, D3, D4, EVEIGS, WA1, + $ WA2, WA3, U, LDU, V, TAU, Z, WORK, LWORK, + $ IWORK, LIWORK, RESULT, INFO ) +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDU, LIWORK, LWORK, NOUNIT, NSIZES, + $ NTYPES + REAL THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER ISEED( 4 ), IWORK( * ), NN( * ) + REAL A( LDA, * ), D1( * ), D2( * ), D3( * ), + $ D4( * ), EVEIGS( * ), RESULT( * ), TAU( * ), + $ U( LDU, * ), V( LDU, * ), WA1( * ), WA2( * ), + $ WA3( * ), WORK( * ), Z( LDU, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, TEN + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ TEN = 10.0E0 ) + REAL HALF + PARAMETER ( HALF = 0.5E+0 ) + INTEGER MAXTYP + PARAMETER ( MAXTYP = 18 ) +* .. +* .. Local Scalars .. + LOGICAL BADNN + CHARACTER UPLO + INTEGER I, IDIAG, IHBW, IINFO, IL, IMODE, INDX, IROW, + $ ITEMP, ITYPE, IU, IUPLO, J, J1, J2, JCOL, + $ JSIZE, JTYPE, KD, LGN, LIWEDC, LWEDC, M, M2, + $ M3, MTYPES, N, NERRS, NMATS, NMAX, NTEST, + $ NTESTT + REAL ABSTOL, ANINV, ANORM, COND, OVFL, RTOVFL, + $ RTUNFL, TEMP1, TEMP2, TEMP3, ULP, ULPINV, UNFL, + $ VL, VU +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ), IOLDSD( 4 ), ISEED2( 4 ), + $ ISEED3( 4 ), KMAGN( MAXTYP ), KMODE( MAXTYP ), + $ KTYPE( MAXTYP ) +* .. +* .. External Functions .. + REAL SLAMCH, SLARND, SSXT1 + EXTERNAL SLAMCH, SLARND, SSXT1 +* .. +* .. External Subroutines .. + EXTERNAL ALASVM, SLABAD, SLACPY, SLAFTS, SLASET, SLATMR, + $ SLATMS, SSBEV, SSBEVD, SSBEVX, SSPEV, SSPEVD, + $ SSPEVX, SSTEV, SSTEVD, SSTEVR, SSTEVX, SSTT21, + $ SSTT22, SSYEV, SSYEVD, SSYEVR, SSYEVX, SSYT21, + $ SSYEVD_2STAGE, SSYEVR_2STAGE, SSYEVX_2STAGE, + $ SSYEV_2STAGE, SSBEV_2STAGE, SSBEVD_2STAGE, + $ SSBEVX_2STAGE, SSYTRD_2STAGE, SSYTRD_SY2SB, + $ SSYTRD_SB2ST, SSYT22, XERBLA +* .. +* .. Scalars in Common .. + CHARACTER*32 SRNAMT +* .. +* .. Common blocks .. + COMMON / SRNAMC / SRNAMT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, INT, LOG, MAX, MIN, SQRT +* .. +* .. Data statements .. + DATA KTYPE / 1, 2, 5*4, 5*5, 3*8, 3*9 / + DATA KMAGN / 2*1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, + $ 2, 3, 1, 2, 3 / + DATA KMODE / 2*0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, + $ 0, 0, 4, 4, 4 / +* .. +* .. Executable Statements .. +* +* Keep ftrnchek happy +* + VL = ZERO + VU = ZERO +* +* 1) Check for errors +* + NTESTT = 0 + INFO = 0 +* + BADNN = .FALSE. + NMAX = 1 + DO 10 J = 1, NSIZES + NMAX = MAX( NMAX, NN( J ) ) + IF( NN( J ).LT.0 ) + $ BADNN = .TRUE. + 10 CONTINUE +* +* Check for errors +* + IF( NSIZES.LT.0 ) THEN + INFO = -1 + ELSE IF( BADNN ) THEN + INFO = -2 + ELSE IF( NTYPES.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.NMAX ) THEN + INFO = -9 + ELSE IF( LDU.LT.NMAX ) THEN + INFO = -16 + ELSE IF( 2*MAX( 2, NMAX )**2.GT.LWORK ) THEN + INFO = -21 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SDRVST2STG', -INFO ) + RETURN + END IF +* +* Quick return if nothing to do +* + IF( NSIZES.EQ.0 .OR. NTYPES.EQ.0 ) + $ RETURN +* +* More Important constants +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = SLAMCH( 'Overflow' ) + CALL SLABAD( UNFL, OVFL ) + ULP = SLAMCH( 'Epsilon' )*SLAMCH( 'Base' ) + ULPINV = ONE / ULP + RTUNFL = SQRT( UNFL ) + RTOVFL = SQRT( OVFL ) +* +* Loop over sizes, types +* + DO 20 I = 1, 4 + ISEED2( I ) = ISEED( I ) + ISEED3( I ) = ISEED( I ) + 20 CONTINUE +* + NERRS = 0 + NMATS = 0 +* +* + DO 1740 JSIZE = 1, NSIZES + N = NN( JSIZE ) + IF( N.GT.0 ) THEN + LGN = INT( LOG( REAL( N ) ) / LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + LWEDC = 1 + 4*N + 2*N*LGN + 4*N**2 +c LIWEDC = 6 + 6*N + 5*N*LGN + LIWEDC = 3 + 5*N + ELSE + LWEDC = 9 +c LIWEDC = 12 + LIWEDC = 8 + END IF + ANINV = ONE / REAL( MAX( 1, N ) ) +* + IF( NSIZES.NE.1 ) THEN + MTYPES = MIN( MAXTYP, NTYPES ) + ELSE + MTYPES = MIN( MAXTYP+1, NTYPES ) + END IF +* + DO 1730 JTYPE = 1, MTYPES +* + IF( .NOT.DOTYPE( JTYPE ) ) + $ GO TO 1730 + NMATS = NMATS + 1 + NTEST = 0 +* + DO 30 J = 1, 4 + IOLDSD( J ) = ISEED( J ) + 30 CONTINUE +* +* 2) Compute "A" +* +* Control parameters: +* +* KMAGN KMODE KTYPE +* =1 O(1) clustered 1 zero +* =2 large clustered 2 identity +* =3 small exponential (none) +* =4 arithmetic diagonal, (w/ eigenvalues) +* =5 random log symmetric, w/ eigenvalues +* =6 random (none) +* =7 random diagonal +* =8 random symmetric +* =9 band symmetric, w/ eigenvalues +* + IF( MTYPES.GT.MAXTYP ) + $ GO TO 110 +* + ITYPE = KTYPE( JTYPE ) + IMODE = KMODE( JTYPE ) +* +* Compute norm +* + GO TO ( 40, 50, 60 )KMAGN( JTYPE ) +* + 40 CONTINUE + ANORM = ONE + GO TO 70 +* + 50 CONTINUE + ANORM = ( RTOVFL*ULP )*ANINV + GO TO 70 +* + 60 CONTINUE + ANORM = RTUNFL*N*ULPINV + GO TO 70 +* + 70 CONTINUE +* + CALL SLASET( 'Full', LDA, N, ZERO, ZERO, A, LDA ) + IINFO = 0 + COND = ULPINV +* +* Special Matrices -- Identity & Jordan block +* +* Zero +* + IF( ITYPE.EQ.1 ) THEN + IINFO = 0 +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Identity +* + DO 80 JCOL = 1, N + A( JCOL, JCOL ) = ANORM + 80 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Diagonal Matrix, [Eigen]values Specified +* + CALL SLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, 0, 0, 'N', A, LDA, WORK( N+1 ), + $ IINFO ) +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Symmetric, eigenvalues specified +* + CALL SLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, N, N, 'N', A, LDA, WORK( N+1 ), + $ IINFO ) +* + ELSE IF( ITYPE.EQ.7 ) THEN +* +* Diagonal, random eigenvalues +* + IDUMMA( 1 ) = 1 + CALL SLATMR( N, N, 'S', ISEED, 'S', WORK, 6, ONE, ONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, 0, 0, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.8 ) THEN +* +* Symmetric, random eigenvalues +* + IDUMMA( 1 ) = 1 + CALL SLATMR( N, N, 'S', ISEED, 'S', WORK, 6, ONE, ONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, N, N, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.9 ) THEN +* +* Symmetric banded, eigenvalues specified +* + IHBW = INT( ( N-1 )*SLARND( 1, ISEED3 ) ) + CALL SLATMS( N, N, 'S', ISEED, 'S', WORK, IMODE, COND, + $ ANORM, IHBW, IHBW, 'Z', U, LDU, WORK( N+1 ), + $ IINFO ) +* +* Store as dense matrix for most routines. +* + CALL SLASET( 'Full', LDA, N, ZERO, ZERO, A, LDA ) + DO 100 IDIAG = -IHBW, IHBW + IROW = IHBW - IDIAG + 1 + J1 = MAX( 1, IDIAG+1 ) + J2 = MIN( N, N+IDIAG ) + DO 90 J = J1, J2 + I = J - IDIAG + A( I, J ) = U( IROW, J ) + 90 CONTINUE + 100 CONTINUE + ELSE + IINFO = 1 + END IF +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'Generator', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + RETURN + END IF +* + 110 CONTINUE +* + ABSTOL = UNFL + UNFL + IF( N.LE.1 ) THEN + IL = 1 + IU = N + ELSE + IL = 1 + INT( ( N-1 )*SLARND( 1, ISEED2 ) ) + IU = 1 + INT( ( N-1 )*SLARND( 1, ISEED2 ) ) + IF( IL.GT.IU ) THEN + ITEMP = IL + IL = IU + IU = ITEMP + END IF + END IF +* +* 3) If matrix is tridiagonal, call SSTEV and SSTEVX. +* + IF( JTYPE.LE.7 ) THEN + NTEST = 1 + DO 120 I = 1, N + D1( I ) = REAL( A( I, I ) ) + 120 CONTINUE + DO 130 I = 1, N - 1 + D2( I ) = REAL( A( I+1, I ) ) + 130 CONTINUE + SRNAMT = 'SSTEV' + CALL SSTEV( 'V', N, D1, D2, Z, LDU, WORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEV(V)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 1 ) = ULPINV + RESULT( 2 ) = ULPINV + RESULT( 3 ) = ULPINV + GO TO 180 + END IF + END IF +* +* Do tests 1 and 2. +* + DO 140 I = 1, N + D3( I ) = REAL( A( I, I ) ) + 140 CONTINUE + DO 150 I = 1, N - 1 + D4( I ) = REAL( A( I+1, I ) ) + 150 CONTINUE + CALL SSTT21( N, 0, D3, D4, D1, D2, Z, LDU, WORK, + $ RESULT( 1 ) ) +* + NTEST = 3 + DO 160 I = 1, N - 1 + D4( I ) = REAL( A( I+1, I ) ) + 160 CONTINUE + SRNAMT = 'SSTEV' + CALL SSTEV( 'N', N, D3, D4, Z, LDU, WORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEV(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 3 ) = ULPINV + GO TO 180 + END IF + END IF +* +* Do test 3. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 170 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 170 CONTINUE + RESULT( 3 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 180 CONTINUE +* + NTEST = 4 + DO 190 I = 1, N + EVEIGS( I ) = D3( I ) + D1( I ) = REAL( A( I, I ) ) + 190 CONTINUE + DO 200 I = 1, N - 1 + D2( I ) = REAL( A( I+1, I ) ) + 200 CONTINUE + SRNAMT = 'SSTEVX' + CALL SSTEVX( 'V', 'A', N, D1, D2, VL, VU, IL, IU, ABSTOL, + $ M, WA1, Z, LDU, WORK, IWORK, IWORK( 5*N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEVX(V,A)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 4 ) = ULPINV + RESULT( 5 ) = ULPINV + RESULT( 6 ) = ULPINV + GO TO 250 + END IF + END IF + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF +* +* Do tests 4 and 5. +* + DO 210 I = 1, N + D3( I ) = REAL( A( I, I ) ) + 210 CONTINUE + DO 220 I = 1, N - 1 + D4( I ) = REAL( A( I+1, I ) ) + 220 CONTINUE + CALL SSTT21( N, 0, D3, D4, WA1, D2, Z, LDU, WORK, + $ RESULT( 4 ) ) +* + NTEST = 6 + DO 230 I = 1, N - 1 + D4( I ) = REAL( A( I+1, I ) ) + 230 CONTINUE + SRNAMT = 'SSTEVX' + CALL SSTEVX( 'N', 'A', N, D3, D4, VL, VU, IL, IU, ABSTOL, + $ M2, WA2, Z, LDU, WORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEVX(N,A)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 6 ) = ULPINV + GO TO 250 + END IF + END IF +* +* Do test 6. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 240 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA2( J ) ), + $ ABS( EVEIGS( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA2( J )-EVEIGS( J ) ) ) + 240 CONTINUE + RESULT( 6 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 250 CONTINUE +* + NTEST = 7 + DO 260 I = 1, N + D1( I ) = REAL( A( I, I ) ) + 260 CONTINUE + DO 270 I = 1, N - 1 + D2( I ) = REAL( A( I+1, I ) ) + 270 CONTINUE + SRNAMT = 'SSTEVR' + CALL SSTEVR( 'V', 'A', N, D1, D2, VL, VU, IL, IU, ABSTOL, + $ M, WA1, Z, LDU, IWORK, WORK, LWORK, + $ IWORK(2*N+1), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEVR(V,A)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 7 ) = ULPINV + RESULT( 8 ) = ULPINV + GO TO 320 + END IF + END IF + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF +* +* Do tests 7 and 8. +* + DO 280 I = 1, N + D3( I ) = REAL( A( I, I ) ) + 280 CONTINUE + DO 290 I = 1, N - 1 + D4( I ) = REAL( A( I+1, I ) ) + 290 CONTINUE + CALL SSTT21( N, 0, D3, D4, WA1, D2, Z, LDU, WORK, + $ RESULT( 7 ) ) +* + NTEST = 9 + DO 300 I = 1, N - 1 + D4( I ) = REAL( A( I+1, I ) ) + 300 CONTINUE + SRNAMT = 'SSTEVR' + CALL SSTEVR( 'N', 'A', N, D3, D4, VL, VU, IL, IU, ABSTOL, + $ M2, WA2, Z, LDU, IWORK, WORK, LWORK, + $ IWORK(2*N+1), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEVR(N,A)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 9 ) = ULPINV + GO TO 320 + END IF + END IF +* +* Do test 9. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 310 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA2( J ) ), + $ ABS( EVEIGS( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA2( J )-EVEIGS( J ) ) ) + 310 CONTINUE + RESULT( 9 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 320 CONTINUE +* +* + NTEST = 10 + DO 330 I = 1, N + D1( I ) = REAL( A( I, I ) ) + 330 CONTINUE + DO 340 I = 1, N - 1 + D2( I ) = REAL( A( I+1, I ) ) + 340 CONTINUE + SRNAMT = 'SSTEVX' + CALL SSTEVX( 'V', 'I', N, D1, D2, VL, VU, IL, IU, ABSTOL, + $ M2, WA2, Z, LDU, WORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEVX(V,I)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 10 ) = ULPINV + RESULT( 11 ) = ULPINV + RESULT( 12 ) = ULPINV + GO TO 380 + END IF + END IF +* +* Do tests 10 and 11. +* + DO 350 I = 1, N + D3( I ) = REAL( A( I, I ) ) + 350 CONTINUE + DO 360 I = 1, N - 1 + D4( I ) = REAL( A( I+1, I ) ) + 360 CONTINUE + CALL SSTT22( N, M2, 0, D3, D4, WA2, D2, Z, LDU, WORK, + $ MAX( 1, M2 ), RESULT( 10 ) ) +* +* + NTEST = 12 + DO 370 I = 1, N - 1 + D4( I ) = REAL( A( I+1, I ) ) + 370 CONTINUE + SRNAMT = 'SSTEVX' + CALL SSTEVX( 'N', 'I', N, D3, D4, VL, VU, IL, IU, ABSTOL, + $ M3, WA3, Z, LDU, WORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEVX(N,I)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 12 ) = ULPINV + GO TO 380 + END IF + END IF +* +* Do test 12. +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + RESULT( 12 ) = ( TEMP1+TEMP2 ) / MAX( UNFL, ULP*TEMP3 ) +* + 380 CONTINUE +* + NTEST = 12 + IF( N.GT.0 ) THEN + IF( IL.NE.1 ) THEN + VL = WA1( IL ) - MAX( HALF* + $ ( WA1( IL )-WA1( IL-1 ) ), TEN*ULP*TEMP3, + $ TEN*RTUNFL ) + ELSE + VL = WA1( 1 ) - MAX( HALF*( WA1( N )-WA1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = WA1( IU ) + MAX( HALF* + $ ( WA1( IU+1 )-WA1( IU ) ), TEN*ULP*TEMP3, + $ TEN*RTUNFL ) + ELSE + VU = WA1( N ) + MAX( HALF*( WA1( N )-WA1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + ELSE + VL = ZERO + VU = ONE + END IF +* + DO 390 I = 1, N + D1( I ) = REAL( A( I, I ) ) + 390 CONTINUE + DO 400 I = 1, N - 1 + D2( I ) = REAL( A( I+1, I ) ) + 400 CONTINUE + SRNAMT = 'SSTEVX' + CALL SSTEVX( 'V', 'V', N, D1, D2, VL, VU, IL, IU, ABSTOL, + $ M2, WA2, Z, LDU, WORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEVX(V,V)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 13 ) = ULPINV + RESULT( 14 ) = ULPINV + RESULT( 15 ) = ULPINV + GO TO 440 + END IF + END IF +* + IF( M2.EQ.0 .AND. N.GT.0 ) THEN + RESULT( 13 ) = ULPINV + RESULT( 14 ) = ULPINV + RESULT( 15 ) = ULPINV + GO TO 440 + END IF +* +* Do tests 13 and 14. +* + DO 410 I = 1, N + D3( I ) = REAL( A( I, I ) ) + 410 CONTINUE + DO 420 I = 1, N - 1 + D4( I ) = REAL( A( I+1, I ) ) + 420 CONTINUE + CALL SSTT22( N, M2, 0, D3, D4, WA2, D2, Z, LDU, WORK, + $ MAX( 1, M2 ), RESULT( 13 ) ) +* + NTEST = 15 + DO 430 I = 1, N - 1 + D4( I ) = REAL( A( I+1, I ) ) + 430 CONTINUE + SRNAMT = 'SSTEVX' + CALL SSTEVX( 'N', 'V', N, D3, D4, VL, VU, IL, IU, ABSTOL, + $ M3, WA3, Z, LDU, WORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEVX(N,V)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 15 ) = ULPINV + GO TO 440 + END IF + END IF +* +* Do test 15. +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + RESULT( 15 ) = ( TEMP1+TEMP2 ) / MAX( UNFL, TEMP3*ULP ) +* + 440 CONTINUE +* + NTEST = 16 + DO 450 I = 1, N + D1( I ) = REAL( A( I, I ) ) + 450 CONTINUE + DO 460 I = 1, N - 1 + D2( I ) = REAL( A( I+1, I ) ) + 460 CONTINUE + SRNAMT = 'SSTEVD' + CALL SSTEVD( 'V', N, D1, D2, Z, LDU, WORK, LWEDC, IWORK, + $ LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEVD(V)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 16 ) = ULPINV + RESULT( 17 ) = ULPINV + RESULT( 18 ) = ULPINV + GO TO 510 + END IF + END IF +* +* Do tests 16 and 17. +* + DO 470 I = 1, N + D3( I ) = REAL( A( I, I ) ) + 470 CONTINUE + DO 480 I = 1, N - 1 + D4( I ) = REAL( A( I+1, I ) ) + 480 CONTINUE + CALL SSTT21( N, 0, D3, D4, D1, D2, Z, LDU, WORK, + $ RESULT( 16 ) ) +* + NTEST = 18 + DO 490 I = 1, N - 1 + D4( I ) = REAL( A( I+1, I ) ) + 490 CONTINUE + SRNAMT = 'SSTEVD' + CALL SSTEVD( 'N', N, D3, D4, Z, LDU, WORK, LWEDC, IWORK, + $ LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEVD(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 18 ) = ULPINV + GO TO 510 + END IF + END IF +* +* Do test 18. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 500 J = 1, N + TEMP1 = MAX( TEMP1, ABS( EVEIGS( J ) ), + $ ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( EVEIGS( J )-D3( J ) ) ) + 500 CONTINUE + RESULT( 18 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 510 CONTINUE +* + NTEST = 19 + DO 520 I = 1, N + D1( I ) = REAL( A( I, I ) ) + 520 CONTINUE + DO 530 I = 1, N - 1 + D2( I ) = REAL( A( I+1, I ) ) + 530 CONTINUE + SRNAMT = 'SSTEVR' + CALL SSTEVR( 'V', 'I', N, D1, D2, VL, VU, IL, IU, ABSTOL, + $ M2, WA2, Z, LDU, IWORK, WORK, LWORK, + $ IWORK(2*N+1), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEVR(V,I)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 19 ) = ULPINV + RESULT( 20 ) = ULPINV + RESULT( 21 ) = ULPINV + GO TO 570 + END IF + END IF +* +* DO tests 19 and 20. +* + DO 540 I = 1, N + D3( I ) = REAL( A( I, I ) ) + 540 CONTINUE + DO 550 I = 1, N - 1 + D4( I ) = REAL( A( I+1, I ) ) + 550 CONTINUE + CALL SSTT22( N, M2, 0, D3, D4, WA2, D2, Z, LDU, WORK, + $ MAX( 1, M2 ), RESULT( 19 ) ) +* +* + NTEST = 21 + DO 560 I = 1, N - 1 + D4( I ) = REAL( A( I+1, I ) ) + 560 CONTINUE + SRNAMT = 'SSTEVR' + CALL SSTEVR( 'N', 'I', N, D3, D4, VL, VU, IL, IU, ABSTOL, + $ M3, WA3, Z, LDU, IWORK, WORK, LWORK, + $ IWORK(2*N+1), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEVR(N,I)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 21 ) = ULPINV + GO TO 570 + END IF + END IF +* +* Do test 21. +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + RESULT( 21 ) = ( TEMP1+TEMP2 ) / MAX( UNFL, ULP*TEMP3 ) +* + 570 CONTINUE +* + NTEST = 21 + IF( N.GT.0 ) THEN + IF( IL.NE.1 ) THEN + VL = WA1( IL ) - MAX( HALF* + $ ( WA1( IL )-WA1( IL-1 ) ), TEN*ULP*TEMP3, + $ TEN*RTUNFL ) + ELSE + VL = WA1( 1 ) - MAX( HALF*( WA1( N )-WA1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = WA1( IU ) + MAX( HALF* + $ ( WA1( IU+1 )-WA1( IU ) ), TEN*ULP*TEMP3, + $ TEN*RTUNFL ) + ELSE + VU = WA1( N ) + MAX( HALF*( WA1( N )-WA1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + ELSE + VL = ZERO + VU = ONE + END IF +* + DO 580 I = 1, N + D1( I ) = REAL( A( I, I ) ) + 580 CONTINUE + DO 590 I = 1, N - 1 + D2( I ) = REAL( A( I+1, I ) ) + 590 CONTINUE + SRNAMT = 'SSTEVR' + CALL SSTEVR( 'V', 'V', N, D1, D2, VL, VU, IL, IU, ABSTOL, + $ M2, WA2, Z, LDU, IWORK, WORK, LWORK, + $ IWORK(2*N+1), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEVR(V,V)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 22 ) = ULPINV + RESULT( 23 ) = ULPINV + RESULT( 24 ) = ULPINV + GO TO 630 + END IF + END IF +* + IF( M2.EQ.0 .AND. N.GT.0 ) THEN + RESULT( 22 ) = ULPINV + RESULT( 23 ) = ULPINV + RESULT( 24 ) = ULPINV + GO TO 630 + END IF +* +* Do tests 22 and 23. +* + DO 600 I = 1, N + D3( I ) = REAL( A( I, I ) ) + 600 CONTINUE + DO 610 I = 1, N - 1 + D4( I ) = REAL( A( I+1, I ) ) + 610 CONTINUE + CALL SSTT22( N, M2, 0, D3, D4, WA2, D2, Z, LDU, WORK, + $ MAX( 1, M2 ), RESULT( 22 ) ) +* + NTEST = 24 + DO 620 I = 1, N - 1 + D4( I ) = REAL( A( I+1, I ) ) + 620 CONTINUE + SRNAMT = 'SSTEVR' + CALL SSTEVR( 'N', 'V', N, D3, D4, VL, VU, IL, IU, ABSTOL, + $ M3, WA3, Z, LDU, IWORK, WORK, LWORK, + $ IWORK(2*N+1), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSTEVR(N,V)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 24 ) = ULPINV + GO TO 630 + END IF + END IF +* +* Do test 24. +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + RESULT( 24 ) = ( TEMP1+TEMP2 ) / MAX( UNFL, TEMP3*ULP ) +* + 630 CONTINUE +* +* +* + ELSE +* + DO 640 I = 1, 24 + RESULT( I ) = ZERO + 640 CONTINUE + NTEST = 24 + END IF +* +* Perform remaining tests storing upper or lower triangular +* part of matrix. +* + DO 1720 IUPLO = 0, 1 + IF( IUPLO.EQ.0 ) THEN + UPLO = 'L' + ELSE + UPLO = 'U' + END IF +* +* 4) Call SSYEV and SSYEVX. +* + CALL SLACPY( ' ', N, N, A, LDA, V, LDU ) +* + NTEST = NTEST + 1 + SRNAMT = 'SSYEV' + CALL SSYEV( 'V', UPLO, N, A, LDU, D1, WORK, LWORK, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSYEV(V,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 660 + END IF + END IF +* +* Do tests 25 and 26 (or +54) +* + CALL SSYT21( 1, UPLO, N, 0, V, LDU, D1, D2, A, LDU, Z, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) +* + NTEST = NTEST + 2 + SRNAMT = 'SSYEV_2STAGE' + CALL SSYEV_2STAGE( 'N', UPLO, N, A, LDU, D3, WORK, LWORK, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'SSYEV_2STAGE(N,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 660 + END IF + END IF +* +* Do test 27 (or +54) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 650 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 650 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 660 CONTINUE + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) +* + NTEST = NTEST + 1 +* + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( D1( 1 ) ), ABS( D1( N ) ) ) + IF( IL.NE.1 ) THEN + VL = D1( IL ) - MAX( HALF*( D1( IL )-D1( IL-1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + ELSE IF( N.GT.0 ) THEN + VL = D1( 1 ) - MAX( HALF*( D1( N )-D1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = D1( IU ) + MAX( HALF*( D1( IU+1 )-D1( IU ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + ELSE IF( N.GT.0 ) THEN + VU = D1( N ) + MAX( HALF*( D1( N )-D1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + ELSE + TEMP3 = ZERO + VL = ZERO + VU = ONE + END IF +* + SRNAMT = 'SSYEVX' + CALL SSYEVX( 'V', 'A', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M, WA1, Z, LDU, WORK, LWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSYEVX(V,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 680 + END IF + END IF +* +* Do tests 28 and 29 (or +54) +* + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL SSYT21( 1, UPLO, N, 0, A, LDU, D1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + SRNAMT = 'SSYEVX_2STAGE' + CALL SSYEVX_2STAGE( 'N', 'A', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M2, WA2, Z, LDU, WORK, + $ LWORK, IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'SSYEVX_2STAGE(N,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 680 + END IF + END IF +* +* Do test 30 (or +54) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 670 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA1( J ) ), ABS( WA2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA1( J )-WA2( J ) ) ) + 670 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 680 CONTINUE +* + NTEST = NTEST + 1 + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) + SRNAMT = 'SSYEVX' + CALL SSYEVX( 'V', 'I', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, WORK, LWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSYEVX(V,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 690 + END IF + END IF +* +* Do tests 31 and 32 (or +54) +* + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL SSYT22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) + SRNAMT = 'SSYEVX_2STAGE' + CALL SSYEVX_2STAGE( 'N', 'I', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M3, WA3, Z, LDU, WORK, + $ LWORK, IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'SSYEVX_2STAGE(N,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 690 + END IF + END IF +* +* Do test 33 (or +54) +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, ULP*TEMP3 ) + 690 CONTINUE +* + NTEST = NTEST + 1 + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) + SRNAMT = 'SSYEVX' + CALL SSYEVX( 'V', 'V', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, WORK, LWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSYEVX(V,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 700 + END IF + END IF +* +* Do tests 34 and 35 (or +54) +* + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL SSYT22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) + SRNAMT = 'SSYEVX_2STAGE' + CALL SSYEVX_2STAGE( 'N', 'V', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M3, WA3, Z, LDU, WORK, + $ LWORK, IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'SSYEVX_2STAGE(N,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 700 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.GT.0 ) THEN + RESULT( NTEST ) = ULPINV + GO TO 700 + END IF +* +* Do test 36 (or +54) +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 700 CONTINUE +* +* 5) Call SSPEV and SSPEVX. +* + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) +* +* Load array WORK with the upper or lower triangular +* part of the matrix in packed form. +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 720 J = 1, N + DO 710 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 710 CONTINUE + 720 CONTINUE + ELSE + INDX = 1 + DO 740 J = 1, N + DO 730 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 730 CONTINUE + 740 CONTINUE + END IF +* + NTEST = NTEST + 1 + SRNAMT = 'SSPEV' + CALL SSPEV( 'V', UPLO, N, WORK, D1, Z, LDU, V, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSPEV(V,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 800 + END IF + END IF +* +* Do tests 37 and 38 (or +54) +* + CALL SSYT21( 1, UPLO, N, 0, A, LDA, D1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 760 J = 1, N + DO 750 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 750 CONTINUE + 760 CONTINUE + ELSE + INDX = 1 + DO 780 J = 1, N + DO 770 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 770 CONTINUE + 780 CONTINUE + END IF +* + NTEST = NTEST + 2 + SRNAMT = 'SSPEV' + CALL SSPEV( 'N', UPLO, N, WORK, D3, Z, LDU, V, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSPEV(N,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 800 + END IF + END IF +* +* Do test 39 (or +54) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 790 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 790 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* +* Load array WORK with the upper or lower triangular part +* of the matrix in packed form. +* + 800 CONTINUE + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 820 J = 1, N + DO 810 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 810 CONTINUE + 820 CONTINUE + ELSE + INDX = 1 + DO 840 J = 1, N + DO 830 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 830 CONTINUE + 840 CONTINUE + END IF +* + NTEST = NTEST + 1 +* + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( D1( 1 ) ), ABS( D1( N ) ) ) + IF( IL.NE.1 ) THEN + VL = D1( IL ) - MAX( HALF*( D1( IL )-D1( IL-1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + ELSE IF( N.GT.0 ) THEN + VL = D1( 1 ) - MAX( HALF*( D1( N )-D1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = D1( IU ) + MAX( HALF*( D1( IU+1 )-D1( IU ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + ELSE IF( N.GT.0 ) THEN + VU = D1( N ) + MAX( HALF*( D1( N )-D1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + ELSE + TEMP3 = ZERO + VL = ZERO + VU = ONE + END IF +* + SRNAMT = 'SSPEVX' + CALL SSPEVX( 'V', 'A', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M, WA1, Z, LDU, V, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSPEVX(V,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 900 + END IF + END IF +* +* Do tests 40 and 41 (or +54) +* + CALL SSYT21( 1, UPLO, N, 0, A, LDU, WA1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 860 J = 1, N + DO 850 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 850 CONTINUE + 860 CONTINUE + ELSE + INDX = 1 + DO 880 J = 1, N + DO 870 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 870 CONTINUE + 880 CONTINUE + END IF +* + SRNAMT = 'SSPEVX' + CALL SSPEVX( 'N', 'A', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, V, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSPEVX(N,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 900 + END IF + END IF +* +* Do test 42 (or +54) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 890 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA1( J ) ), ABS( WA2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA1( J )-WA2( J ) ) ) + 890 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 900 CONTINUE + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 920 J = 1, N + DO 910 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 910 CONTINUE + 920 CONTINUE + ELSE + INDX = 1 + DO 940 J = 1, N + DO 930 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 930 CONTINUE + 940 CONTINUE + END IF +* + NTEST = NTEST + 1 +* + SRNAMT = 'SSPEVX' + CALL SSPEVX( 'V', 'I', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, V, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSPEVX(V,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 990 + END IF + END IF +* +* Do tests 43 and 44 (or +54) +* + CALL SSYT22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 960 J = 1, N + DO 950 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 950 CONTINUE + 960 CONTINUE + ELSE + INDX = 1 + DO 980 J = 1, N + DO 970 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 970 CONTINUE + 980 CONTINUE + END IF +* + SRNAMT = 'SSPEVX' + CALL SSPEVX( 'N', 'I', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M3, WA3, Z, LDU, V, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSPEVX(N,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 990 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.GT.0 ) THEN + RESULT( NTEST ) = ULPINV + GO TO 990 + END IF +* +* Do test 45 (or +54) +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 990 CONTINUE + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 1010 J = 1, N + DO 1000 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1000 CONTINUE + 1010 CONTINUE + ELSE + INDX = 1 + DO 1030 J = 1, N + DO 1020 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1020 CONTINUE + 1030 CONTINUE + END IF +* + NTEST = NTEST + 1 +* + SRNAMT = 'SSPEVX' + CALL SSPEVX( 'V', 'V', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, V, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSPEVX(V,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1080 + END IF + END IF +* +* Do tests 46 and 47 (or +54) +* + CALL SSYT22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 1050 J = 1, N + DO 1040 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1040 CONTINUE + 1050 CONTINUE + ELSE + INDX = 1 + DO 1070 J = 1, N + DO 1060 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1060 CONTINUE + 1070 CONTINUE + END IF +* + SRNAMT = 'SSPEVX' + CALL SSPEVX( 'N', 'V', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M3, WA3, Z, LDU, V, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSPEVX(N,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1080 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.GT.0 ) THEN + RESULT( NTEST ) = ULPINV + GO TO 1080 + END IF +* +* Do test 48 (or +54) +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 1080 CONTINUE +* +* 6) Call SSBEV and SSBEVX. +* + IF( JTYPE.LE.7 ) THEN + KD = 1 + ELSE IF( JTYPE.GE.8 .AND. JTYPE.LE.15 ) THEN + KD = MAX( N-1, 0 ) + ELSE + KD = IHBW + END IF +* +* Load array V with the upper or lower triangular part +* of the matrix in band form. +* + IF( IUPLO.EQ.1 ) THEN + DO 1100 J = 1, N + DO 1090 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1090 CONTINUE + 1100 CONTINUE + ELSE + DO 1120 J = 1, N + DO 1110 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1110 CONTINUE + 1120 CONTINUE + END IF +* + NTEST = NTEST + 1 + SRNAMT = 'SSBEV' + CALL SSBEV( 'V', UPLO, N, KD, V, LDU, D1, Z, LDU, WORK, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSBEV(V,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1180 + END IF + END IF +* +* Do tests 49 and 50 (or ... ) +* + CALL SSYT21( 1, UPLO, N, 0, A, LDA, D1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + IF( IUPLO.EQ.1 ) THEN + DO 1140 J = 1, N + DO 1130 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1130 CONTINUE + 1140 CONTINUE + ELSE + DO 1160 J = 1, N + DO 1150 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1150 CONTINUE + 1160 CONTINUE + END IF +* + NTEST = NTEST + 2 + SRNAMT = 'SSBEV_2STAGE' + CALL SSBEV_2STAGE( 'N', UPLO, N, KD, V, LDU, D3, Z, LDU, + $ WORK, LWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'SSBEV_2STAGE(N,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1180 + END IF + END IF +* +* Do test 51 (or +54) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 1170 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 1170 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* +* Load array V with the upper or lower triangular part +* of the matrix in band form. +* + 1180 CONTINUE + IF( IUPLO.EQ.1 ) THEN + DO 1200 J = 1, N + DO 1190 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1190 CONTINUE + 1200 CONTINUE + ELSE + DO 1220 J = 1, N + DO 1210 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1210 CONTINUE + 1220 CONTINUE + END IF +* + NTEST = NTEST + 1 + SRNAMT = 'SSBEVX' + CALL SSBEVX( 'V', 'A', UPLO, N, KD, V, LDU, U, LDU, VL, + $ VU, IL, IU, ABSTOL, M, WA2, Z, LDU, WORK, + $ IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSBEVX(V,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1280 + END IF + END IF +* +* Do tests 52 and 53 (or +54) +* + CALL SSYT21( 1, UPLO, N, 0, A, LDU, WA2, D2, Z, LDU, V, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + DO 1240 J = 1, N + DO 1230 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1230 CONTINUE + 1240 CONTINUE + ELSE + DO 1260 J = 1, N + DO 1250 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1250 CONTINUE + 1260 CONTINUE + END IF +* + SRNAMT = 'SSBEVX_2STAGE' + CALL SSBEVX_2STAGE( 'N', 'A', UPLO, N, KD, V, LDU, + $ U, LDU, VL, VU, IL, IU, ABSTOL, M3, WA3, + $ Z, LDU, WORK, LWORK, IWORK, IWORK( 5*N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'SSBEVX_2STAGE(N,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1280 + END IF + END IF +* +* Do test 54 (or +54) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 1270 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA2( J ) ), ABS( WA3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA2( J )-WA3( J ) ) ) + 1270 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 1280 CONTINUE + NTEST = NTEST + 1 + IF( IUPLO.EQ.1 ) THEN + DO 1300 J = 1, N + DO 1290 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1290 CONTINUE + 1300 CONTINUE + ELSE + DO 1320 J = 1, N + DO 1310 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1310 CONTINUE + 1320 CONTINUE + END IF +* + SRNAMT = 'SSBEVX' + CALL SSBEVX( 'V', 'I', UPLO, N, KD, V, LDU, U, LDU, VL, + $ VU, IL, IU, ABSTOL, M2, WA2, Z, LDU, WORK, + $ IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSBEVX(V,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1370 + END IF + END IF +* +* Do tests 55 and 56 (or +54) +* + CALL SSYT22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + DO 1340 J = 1, N + DO 1330 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1330 CONTINUE + 1340 CONTINUE + ELSE + DO 1360 J = 1, N + DO 1350 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1350 CONTINUE + 1360 CONTINUE + END IF +* + SRNAMT = 'SSBEVX_2STAGE' + CALL SSBEVX_2STAGE( 'N', 'I', UPLO, N, KD, V, LDU, + $ U, LDU, VL, VU, IL, IU, ABSTOL, M3, WA3, + $ Z, LDU, WORK, LWORK, IWORK, IWORK( 5*N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'SSBEVX_2STAGE(N,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1370 + END IF + END IF +* +* Do test 57 (or +54) +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 1370 CONTINUE + NTEST = NTEST + 1 + IF( IUPLO.EQ.1 ) THEN + DO 1390 J = 1, N + DO 1380 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1380 CONTINUE + 1390 CONTINUE + ELSE + DO 1410 J = 1, N + DO 1400 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1400 CONTINUE + 1410 CONTINUE + END IF +* + SRNAMT = 'SSBEVX' + CALL SSBEVX( 'V', 'V', UPLO, N, KD, V, LDU, U, LDU, VL, + $ VU, IL, IU, ABSTOL, M2, WA2, Z, LDU, WORK, + $ IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSBEVX(V,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1460 + END IF + END IF +* +* Do tests 58 and 59 (or +54) +* + CALL SSYT22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + DO 1430 J = 1, N + DO 1420 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1420 CONTINUE + 1430 CONTINUE + ELSE + DO 1450 J = 1, N + DO 1440 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1440 CONTINUE + 1450 CONTINUE + END IF +* + SRNAMT = 'SSBEVX_2STAGE' + CALL SSBEVX_2STAGE( 'N', 'V', UPLO, N, KD, V, LDU, + $ U, LDU, VL, VU, IL, IU, ABSTOL, M3, WA3, + $ Z, LDU, WORK, LWORK, IWORK, IWORK( 5*N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'SSBEVX_2STAGE(N,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1460 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.GT.0 ) THEN + RESULT( NTEST ) = ULPINV + GO TO 1460 + END IF +* +* Do test 60 (or +54) +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 1460 CONTINUE +* +* 7) Call SSYEVD +* + CALL SLACPY( ' ', N, N, A, LDA, V, LDU ) +* + NTEST = NTEST + 1 + SRNAMT = 'SSYEVD' + CALL SSYEVD( 'V', UPLO, N, A, LDU, D1, WORK, LWEDC, + $ IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSYEVD(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1480 + END IF + END IF +* +* Do tests 61 and 62 (or +54) +* + CALL SSYT21( 1, UPLO, N, 0, V, LDU, D1, D2, A, LDU, Z, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) +* + NTEST = NTEST + 2 + SRNAMT = 'SSYEVD_2STAGE' + CALL SSYEVD_2STAGE( 'N', UPLO, N, A, LDU, D3, WORK, + $ LWORK, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'SSYEVD_2STAGE(N,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1480 + END IF + END IF +* +* Do test 63 (or +54) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 1470 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 1470 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 1480 CONTINUE +* +* 8) Call SSPEVD. +* + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) +* +* Load array WORK with the upper or lower triangular +* part of the matrix in packed form. +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 1500 J = 1, N + DO 1490 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1490 CONTINUE + 1500 CONTINUE + ELSE + INDX = 1 + DO 1520 J = 1, N + DO 1510 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1510 CONTINUE + 1520 CONTINUE + END IF +* + NTEST = NTEST + 1 + SRNAMT = 'SSPEVD' + CALL SSPEVD( 'V', UPLO, N, WORK, D1, Z, LDU, + $ WORK( INDX ), LWEDC-INDX+1, IWORK, LIWEDC, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSPEVD(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1580 + END IF + END IF +* +* Do tests 64 and 65 (or +54) +* + CALL SSYT21( 1, UPLO, N, 0, A, LDA, D1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 1540 J = 1, N + DO 1530 I = 1, J +* + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1530 CONTINUE + 1540 CONTINUE + ELSE + INDX = 1 + DO 1560 J = 1, N + DO 1550 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1550 CONTINUE + 1560 CONTINUE + END IF +* + NTEST = NTEST + 2 + SRNAMT = 'SSPEVD' + CALL SSPEVD( 'N', UPLO, N, WORK, D3, Z, LDU, + $ WORK( INDX ), LWEDC-INDX+1, IWORK, LIWEDC, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSPEVD(N,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1580 + END IF + END IF +* +* Do test 66 (or +54) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 1570 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 1570 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) + 1580 CONTINUE +* +* 9) Call SSBEVD. +* + IF( JTYPE.LE.7 ) THEN + KD = 1 + ELSE IF( JTYPE.GE.8 .AND. JTYPE.LE.15 ) THEN + KD = MAX( N-1, 0 ) + ELSE + KD = IHBW + END IF +* +* Load array V with the upper or lower triangular part +* of the matrix in band form. +* + IF( IUPLO.EQ.1 ) THEN + DO 1600 J = 1, N + DO 1590 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1590 CONTINUE + 1600 CONTINUE + ELSE + DO 1620 J = 1, N + DO 1610 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1610 CONTINUE + 1620 CONTINUE + END IF +* + NTEST = NTEST + 1 + SRNAMT = 'SSBEVD' + CALL SSBEVD( 'V', UPLO, N, KD, V, LDU, D1, Z, LDU, WORK, + $ LWEDC, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSBEVD(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1680 + END IF + END IF +* +* Do tests 67 and 68 (or +54) +* + CALL SSYT21( 1, UPLO, N, 0, A, LDA, D1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + IF( IUPLO.EQ.1 ) THEN + DO 1640 J = 1, N + DO 1630 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1630 CONTINUE + 1640 CONTINUE + ELSE + DO 1660 J = 1, N + DO 1650 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1650 CONTINUE + 1660 CONTINUE + END IF +* + NTEST = NTEST + 2 + SRNAMT = 'SSBEVD_2STAGE' + CALL SSBEVD_2STAGE( 'N', UPLO, N, KD, V, LDU, D3, Z, LDU, + $ WORK, LWORK, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'SSBEVD_2STAGE(N,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1680 + END IF + END IF +* +* Do test 69 (or +54) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 1670 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 1670 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 1680 CONTINUE +* +* + CALL SLACPY( ' ', N, N, A, LDA, V, LDU ) + NTEST = NTEST + 1 + SRNAMT = 'SSYEVR' + CALL SSYEVR( 'V', 'A', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M, WA1, Z, LDU, IWORK, WORK, LWORK, + $ IWORK(2*N+1), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSYEVR(V,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1700 + END IF + END IF +* +* Do tests 70 and 71 (or ... ) +* + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL SSYT21( 1, UPLO, N, 0, A, LDU, WA1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + SRNAMT = 'SSYEVR_2STAGE' + CALL SSYEVR_2STAGE( 'N', 'A', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M2, WA2, Z, LDU, IWORK, + $ WORK, LWORK, IWORK(2*N+1), LIWORK-2*N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'SSYEVR_2STAGE(N,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1700 + END IF + END IF +* +* Do test 72 (or ... ) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 1690 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA1( J ) ), ABS( WA2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA1( J )-WA2( J ) ) ) + 1690 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 1700 CONTINUE +* + NTEST = NTEST + 1 + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) + SRNAMT = 'SSYEVR' + CALL SSYEVR( 'V', 'I', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, IWORK, WORK, LWORK, + $ IWORK(2*N+1), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSYEVR(V,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1710 + END IF + END IF +* +* Do tests 73 and 74 (or +54) +* + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL SSYT22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) + SRNAMT = 'SSYEVR_2STAGE' + CALL SSYEVR_2STAGE( 'N', 'I', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M3, WA3, Z, LDU, IWORK, + $ WORK, LWORK, IWORK(2*N+1), LIWORK-2*N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'SSYEVR_2STAGE(N,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1710 + END IF + END IF +* +* Do test 75 (or +54) +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, ULP*TEMP3 ) + 1710 CONTINUE +* + NTEST = NTEST + 1 + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) + SRNAMT = 'SSYEVR' + CALL SSYEVR( 'V', 'V', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, IWORK, WORK, LWORK, + $ IWORK(2*N+1), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'SSYEVR(V,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 700 + END IF + END IF +* +* Do tests 76 and 77 (or +54) +* + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL SSYT22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) + SRNAMT = 'SSYEVR_2STAGE' + CALL SSYEVR_2STAGE( 'N', 'V', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M3, WA3, Z, LDU, IWORK, + $ WORK, LWORK, IWORK(2*N+1), LIWORK-2*N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'SSYEVR_2STAGE(N,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 700 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.GT.0 ) THEN + RESULT( NTEST ) = ULPINV + GO TO 700 + END IF +* +* Do test 78 (or +54) +* + TEMP1 = SSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = SSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) +* + 1720 CONTINUE +* +* End of Loop -- Check for RESULT(j) > THRESH +* + NTESTT = NTESTT + NTEST +* + CALL SLAFTS( 'SST', N, N, JTYPE, NTEST, RESULT, IOLDSD, + $ THRESH, NOUNIT, NERRS ) +* + 1730 CONTINUE + 1740 CONTINUE +* +* Summary +* + CALL ALASVM( 'SST', NOUNIT, NERRS, NTESTT, 0 ) +* + 9999 FORMAT( ' SDRVST2STG: ', A, ' returned INFO=', I6, '.', / 9X, + $ 'N=', I6, ', JTYPE=', I6, ', ISEED=(', 3( I5, ',' ), I5, ')' ) +* + RETURN +* +* End of SDRVST2STG +* + END diff --git a/TESTING/EIG/serrst.f b/TESTING/EIG/serrst.f index 266e9ec1..dd341aea 100644 --- a/TESTING/EIG/serrst.f +++ b/TESTING/EIG/serrst.f @@ -25,6 +25,10 @@ *> SOPGTR, SOPMTR, SSTEQR, SSTERF, SSTEBZ, SSTEIN, SPTEQR, SSBTRD, *> SSYEV, SSYEVX, SSYEVD, SSBEV, SSBEVX, SSBEVD, *> SSPEV, SSPEVX, SSPEVD, SSTEV, SSTEVX, SSTEVD, and SSTEDC. +*> SSYEVD_2STAGE, SSYEVR_2STAGE, SSYEVX_2STAGE, +*> SSYEV_2STAGE, SSBEV_2STAGE, SSBEVD_2STAGE, +*> SSBEVX_2STAGE, SSYTRD_2STAGE, SSYTRD_SY2SB, +*> SSYTRD_SB2ST *> \endverbatim * * Arguments: @@ -94,7 +98,11 @@ $ SSBEV, SSBEVD, SSBEVX, SSBTRD, SSPEV, SSPEVD, $ SSPEVX, SSPTRD, SSTEBZ, SSTEDC, SSTEIN, SSTEQR, $ SSTERF, SSTEV, SSTEVD, SSTEVR, SSTEVX, SSYEV, - $ SSYEVD, SSYEVR, SSYEVX, SSYTRD + $ SSYEVD, SSYEVR, SSYEVX, SSYTRD, + $ SSYEVD_2STAGE, SSYEVR_2STAGE, SSYEVX_2STAGE, + $ SSYEV_2STAGE, SSBEV_2STAGE, SSBEVD_2STAGE, + $ SSBEVX_2STAGE, SSYTRD_2STAGE, SSYTRD_SY2SB, + $ SSYTRD_SB2ST * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -152,6 +160,103 @@ CALL CHKXER( 'SSYTRD', INFOT, NOUT, LERR, OK ) NT = NT + 4 * +* SSYTRD_2STAGE +* + SRNAMT = 'SSYTRD_2STAGE' + INFOT = 1 + CALL SSYTRD_2STAGE( '/', 'U', 0, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL SSYTRD_2STAGE( 'H', 'U', 0, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SSYTRD_2STAGE( 'N', '/', 0, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SSYTRD_2STAGE( 'N', 'U', -1, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL SSYTRD_2STAGE( 'N', 'U', 2, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL SSYTRD_2STAGE( 'N', 'U', 0, A, 1, D, E, TAU, + $ C, 0, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 12 + CALL SSYTRD_2STAGE( 'N', 'U', 0, A, 1, D, E, TAU, + $ C, 1, W, 0, INFO ) + CALL CHKXER( 'SSYTRD_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 7 +* +* SSYTRD_SY2SB +* + SRNAMT = 'SSYTRD_SY2SB' + INFOT = 1 + CALL SSYTRD_SY2SB( '/', 0, 0, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SY2SB', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SSYTRD_SY2SB( 'U', -1, 0, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SY2SB', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SSYTRD_SY2SB( 'U', 0, -1, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SY2SB', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL SSYTRD_SY2SB( 'U', 2, 0, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SY2SB', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL SSYTRD_SY2SB( 'U', 0, 2, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SY2SB', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL SSYTRD_SY2SB( 'U', 0, 0, A, 1, C, 1, TAU, W, 0, INFO ) + CALL CHKXER( 'SSYTRD_SY2SB', INFOT, NOUT, LERR, OK ) + NT = NT + 6 +* +* SSYTRD_SB2ST +* + SRNAMT = 'SSYTRD_SB2ST' + INFOT = 1 + CALL SSYTRD_SB2ST( '/', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SSYTRD_SB2ST( 'Y', '/', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SSYTRD_SB2ST( 'Y', 'H', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SSYTRD_SB2ST( 'Y', 'N', '/', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL SSYTRD_SB2ST( 'Y', 'N', 'U', -1, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL SSYTRD_SB2ST( 'Y', 'N', 'U', 0, -1, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL SSYTRD_SB2ST( 'Y', 'N', 'U', 0, 1, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL SSYTRD_SB2ST( 'Y', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 0, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL SSYTRD_SB2ST( 'Y', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 0, INFO ) + CALL CHKXER( 'SSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + NT = NT + 9 +* * SORGTR * SRNAMT = 'SORGTR' @@ -536,6 +641,44 @@ CALL CHKXER( 'SSYEVD', INFOT, NOUT, LERR, OK ) NT = NT + 10 * +* SSYEVD_2STAGE +* + SRNAMT = 'SSYEVD_2STAGE' + INFOT = 1 + CALL SSYEVD_2STAGE( '/', 'U', 0, A, 1, X, W, 1, IW, 1, INFO ) + CALL CHKXER( 'SSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL SSYEVD_2STAGE( 'V', 'U', 0, A, 1, X, W, 1, IW, 1, INFO ) + CALL CHKXER( 'SSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SSYEVD_2STAGE( 'N', '/', 0, A, 1, X, W, 1, IW, 1, INFO ) + CALL CHKXER( 'SSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SSYEVD_2STAGE( 'N', 'U', -1, A, 1, X, W, 1, IW, 1, INFO ) + CALL CHKXER( 'SSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL SSYEVD_2STAGE( 'N', 'U', 2, A, 1, X, W, 3, IW, 1, INFO ) + CALL CHKXER( 'SSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SSYEVD_2STAGE( 'N', 'U', 1, A, 1, X, W, 0, IW, 1, INFO ) + CALL CHKXER( 'SSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SSYEVD_2STAGE( 'N', 'U', 2, A, 2, X, W, 4, IW, 1, INFO ) + CALL CHKXER( 'SSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 8 +* CALL SSYEVD_2STAGE( 'V', 'U', 2, A, 2, X, W, 20, IW, 12, INFO ) +* CALL CHKXER( 'SSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL SSYEVD_2STAGE( 'N', 'U', 1, A, 1, X, W, 1, IW, 0, INFO ) + CALL CHKXER( 'SSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL SSYEVD_2STAGE( 'N', 'U', 2, A, 2, X, W, 25, IW, 0, INFO ) + CALL CHKXER( 'SSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 10 +* CALL SSYEVD_2STAGE( 'V', 'U', 2, A, 2, X, W, 27, IW, 11, INFO ) +* CALL CHKXER( 'SSYEVD_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 9 +* * SSYEVR * SRNAMT = 'SSYEVR' @@ -589,6 +732,74 @@ CALL CHKXER( 'SSYEVR', INFOT, NOUT, LERR, OK ) NT = NT + 11 * +* SSYEVR_2STAGE +* + SRNAMT = 'SSYEVR_2STAGE' + N = 1 + INFOT = 1 + CALL SSYEVR_2STAGE( '/', 'A', 'U', 0, A, 1, + $ 0.0E0, 0.0E0, 1, 1, 0.0E0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'SSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL SSYEVR_2STAGE( 'V', 'A', 'U', 0, A, 1, + $ 0.0E0, 0.0E0, 1, 1, 0.0E0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'SSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SSYEVR_2STAGE( 'N', '/', 'U', 0, A, 1, + $ 0.0E0, 0.0E0, 1, 1, 0.0E0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'SSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SSYEVR_2STAGE( 'N', 'A', '/', -1, A, 1, + $ 0.0E0, 0.0E0, 1, 1, 0.0E0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'SSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL SSYEVR_2STAGE( 'N', 'A', 'U', -1, A, 1, + $ 0.0E0, 0.0E0, 1, 1, 0.0E0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'SSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL SSYEVR_2STAGE( 'N', 'A', 'U', 2, A, 1, + $ 0.0E0, 0.0E0, 1, 1, 0.0E0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'SSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SSYEVR_2STAGE( 'N', 'V', 'U', 1, A, 1, + $ 0.0E0, 0.0E0, 1, 1, 0.0E0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'SSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 9 + CALL SSYEVR_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0E0, 0.0E0, 0, 1, 0.0E0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'SSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL SSYEVR_2STAGE( 'N', 'I', 'U', 2, A, 2, + $ 0.0E0, 0.0E0, 2, 1, 0.0E0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'SSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 15 + CALL SSYEVR_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0E0, 0.0E0, 1, 1, 0.0E0, + $ M, R, Z, 0, IW, Q, 26*N, IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'SSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 18 + CALL SSYEVR_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0E0, 0.0E0, 1, 1, 0.0E0, + $ M, R, Z, 1, IW, Q, 0, IW( 2*N+1 ), 10*N, + $ INFO ) + CALL CHKXER( 'SSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 20 + CALL SSYEVR_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0E0, 0.0E0, 1, 1, 0.0E0, + $ M, R, Z, 1, IW, Q, 26*N, IW( 2*N+1 ), 0, + $ INFO ) + CALL CHKXER( 'SSYEVR_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 12 +* * SSYEV * SRNAMT = 'SSYEV ' @@ -609,6 +820,29 @@ CALL CHKXER( 'SSYEV ', INFOT, NOUT, LERR, OK ) NT = NT + 5 * +* SSYEV_2STAGE +* + SRNAMT = 'SSYEV_2STAGE ' + INFOT = 1 + CALL SSYEV_2STAGE( '/', 'U', 0, A, 1, X, W, 1, INFO ) + CALL CHKXER( 'SSYEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL SSYEV_2STAGE( 'V', 'U', 0, A, 1, X, W, 1, INFO ) + CALL CHKXER( 'SSYEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SSYEV_2STAGE( 'N', '/', 0, A, 1, X, W, 1, INFO ) + CALL CHKXER( 'SSYEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SSYEV_2STAGE( 'N', 'U', -1, A, 1, X, W, 1, INFO ) + CALL CHKXER( 'SSYEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL SSYEV_2STAGE( 'N', 'U', 2, A, 1, X, W, 3, INFO ) + CALL CHKXER( 'SSYEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SSYEV_2STAGE( 'N', 'U', 1, A, 1, X, W, 1, INFO ) + CALL CHKXER( 'SSYEV_2STAGE ', INFOT, NOUT, LERR, OK ) + NT = NT + 6 +* * SSYEVX * SRNAMT = 'SSYEVX' @@ -661,6 +895,75 @@ CALL CHKXER( 'SSYEVX', INFOT, NOUT, LERR, OK ) NT = NT + 12 * +* SSYEVX_2STAGE +* + SRNAMT = 'SSYEVX_2STAGE' + INFOT = 1 + CALL SSYEVX_2STAGE( '/', 'A', 'U', 0, A, 1, + $ 0.0E0, 0.0E0, 0, 0, 0.0E0, + $ M, X, Z, 1, W, 1, IW, I3, INFO ) + CALL CHKXER( 'SSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL SSYEVX_2STAGE( 'V', 'A', 'U', 0, A, 1, + $ 0.0E0, 0.0E0, 0, 0, 0.0E0, + $ M, X, Z, 1, W, 1, IW, I3, INFO ) + CALL CHKXER( 'SSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SSYEVX_2STAGE( 'N', '/', 'U', 0, A, 1, + $ 0.0E0, 1.0E0, 1, 0, 0.0E0, + $ M, X, Z, 1, W, 1, IW, I3, INFO ) + CALL CHKXER( 'SSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SSYEVX_2STAGE( 'N', 'A', '/', 0, A, 1, + $ 0.0E0, 0.0E0, 0, 0, 0.0E0, + $ M, X, Z, 1, W, 1, IW, I3, INFO ) + INFOT = 4 + CALL SSYEVX_2STAGE( 'N', 'A', 'U', -1, A, 1, + $ 0.0E0, 0.0E0, 0, 0, 0.0E0, + $ M, X, Z, 1, W, 1, IW, I3, INFO ) + CALL CHKXER( 'SSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL SSYEVX_2STAGE( 'N', 'A', 'U', 2, A, 1, + $ 0.0E0, 0.0E0, 0, 0, 0.0E0, + $ M, X, Z, 1, W, 16, IW, I3, INFO ) + CALL CHKXER( 'SSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL SSYEVX_2STAGE( 'N', 'V', 'U', 1, A, 1, + $ 0.0E0, 0.0E0, 0, 0, 0.0E0, + $ M, X, Z, 1, W, 8, IW, I3, INFO ) + CALL CHKXER( 'SSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 9 + CALL SSYEVX_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0E0, 0.0E0, 0, 0, 0.0E0, + $ M, X, Z, 1, W, 8, IW, I3, INFO ) + CALL CHKXER( 'SSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 9 + CALL SSYEVX_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0E0, 0.0E0, 2, 1, 0.0E0, + $ M, X, Z, 1, W, 8, IW, I3, INFO ) + CALL CHKXER( 'SSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL SSYEVX_2STAGE( 'N', 'I', 'U', 2, A, 2, + $ 0.0E0, 0.0E0, 2, 1, 0.0E0, + $ M, X, Z, 1, W, 16, IW, I3, INFO ) + CALL CHKXER( 'SSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL SSYEVX_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0E0, 0.0E0, 1, 2, 0.0E0, + $ M, X, Z, 1, W, 8, IW, I3, INFO ) + CALL CHKXER( 'SSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 15 + CALL SSYEVX_2STAGE( 'N', 'A', 'U', 2, A, 2, + $ 0.0E0, 0.0E0, 0, 0, 0.0E0, + $ M, X, Z, 0, W, 16, IW, I3, INFO ) + CALL CHKXER( 'SSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 17 + CALL SSYEVX_2STAGE( 'N', 'A', 'U', 1, A, 1, + $ 0.0E0, 0.0E0, 0, 0, 0.0E0, + $ M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'SSYEVX_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 13 +* * SSPEVD * SRNAMT = 'SSPEVD' @@ -784,6 +1087,47 @@ CALL CHKXER( 'SSBTRD', INFOT, NOUT, LERR, OK ) NT = NT + 6 * +* SSYTRD_SB2ST +* + SRNAMT = 'SSYTRD_SB2ST' + INFOT = 1 + CALL SSYTRD_SB2ST( '/', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SSYTRD_SB2ST( 'N', '/', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SSYTRD_SB2ST( 'N', 'H', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SSYTRD_SB2ST( 'N', 'N', '/', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL SSYTRD_SB2ST( 'N', 'N', 'U', -1, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL SSYTRD_SB2ST( 'N', 'N', 'U', 0, -1, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL SSYTRD_SB2ST( 'N', 'N', 'U', 0, 1, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL SSYTRD_SB2ST( 'N', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 0, W, 1, INFO ) + CALL CHKXER( 'SSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL SSYTRD_SB2ST( 'N', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 0, INFO ) + CALL CHKXER( 'SSYTRD_SB2ST', INFOT, NOUT, LERR, OK ) + NT = NT + 9 +* * SSBEVD * SRNAMT = 'SSBEVD' @@ -827,6 +1171,60 @@ CALL CHKXER( 'SSBEVD', INFOT, NOUT, LERR, OK ) NT = NT + 11 * +* SSBEVD_2STAGE +* + SRNAMT = 'SSBEVD_2STAGE' + INFOT = 1 + CALL SSBEVD_2STAGE( '/', 'U', 0, 0, A, 1, X, Z, 1, W, + $ 1, IW, 1, INFO ) + CALL CHKXER( 'SSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL SSBEVD_2STAGE( 'V', 'U', 0, 0, A, 1, X, Z, 1, W, + $ 1, IW, 1, INFO ) + CALL CHKXER( 'SSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SSBEVD_2STAGE( 'N', '/', 0, 0, A, 1, X, Z, 1, W, + $ 1, IW, 1, INFO ) + CALL CHKXER( 'SSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SSBEVD_2STAGE( 'N', 'U', -1, 0, A, 1, X, Z, 1, W, + $ 1, IW, 1, INFO ) + CALL CHKXER( 'SSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL SSBEVD_2STAGE( 'N', 'U', 0, -1, A, 1, X, Z, 1, W, + $ 1, IW, 1, INFO ) + CALL CHKXER( 'SSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL SSBEVD_2STAGE( 'N', 'U', 2, 1, A, 1, X, Z, 1, W, + $ 4, IW, 1, INFO ) + CALL CHKXER( 'SSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 9 +* CALL SSBEVD_2STAGE( 'V', 'U', 2, 1, A, 2, X, Z, 1, W, +* $ 25, IW, 12, INFO ) +* CALL CHKXER( 'SSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL SSBEVD_2STAGE( 'N', 'U', 1, 0, A, 1, X, Z, 1, W, + $ 0, IW, 1, INFO ) + CALL CHKXER( 'SSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL SSBEVD_2STAGE( 'N', 'U', 2, 0, A, 1, X, Z, 1, W, + $ 3, IW, 1, INFO ) + CALL CHKXER( 'SSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 11 +* CALL SSBEVD_2STAGE( 'V', 'U', 2, 0, A, 1, X, Z, 2, W, +* $ 18, IW, 12, INFO ) +* CALL CHKXER( 'SSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL SSBEVD_2STAGE( 'N', 'U', 1, 0, A, 1, X, Z, 1, W, + $ 1, IW, 0, INFO ) + CALL CHKXER( 'SSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 13 +* CALL SSBEVD_2STAGE( 'V', 'U', 2, 0, A, 1, X, Z, 2, W, +* $ 25, IW, 11, INFO ) +* CALL CHKXER( 'SSBEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* NT = NT + 12 + NT = NT + 9 +* * SSBEV * SRNAMT = 'SSBEV ' @@ -850,6 +1248,35 @@ CALL CHKXER( 'SSBEV ', INFOT, NOUT, LERR, OK ) NT = NT + 6 * +* SSBEV_2STAGE +* + SRNAMT = 'SSBEV_2STAGE ' + INFOT = 1 + CALL SSBEV_2STAGE( '/', 'U', 0, 0, A, 1, X, Z, 1, W, 0, INFO ) + CALL CHKXER( 'SSBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL SSBEV_2STAGE( 'V', 'U', 0, 0, A, 1, X, Z, 1, W, 0, INFO ) + CALL CHKXER( 'SSBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SSBEV_2STAGE( 'N', '/', 0, 0, A, 1, X, Z, 1, W, 0, INFO ) + CALL CHKXER( 'SSBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SSBEV_2STAGE( 'N', 'U', -1, 0, A, 1, X, Z, 1, W, 0, INFO ) + CALL CHKXER( 'SSBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL SSBEV_2STAGE( 'N', 'U', 0, -1, A, 1, X, Z, 1, W, 0, INFO ) + CALL CHKXER( 'SSBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL SSBEV_2STAGE( 'N', 'U', 2, 1, A, 1, X, Z, 1, W, 0, INFO ) + CALL CHKXER( 'SSBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 9 + CALL SSBEV_2STAGE( 'N', 'U', 2, 0, A, 1, X, Z, 0, W, 0, INFO ) + CALL CHKXER( 'SSBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL SSBEV_2STAGE( 'N', 'U', 0, 0, A, 1, X, Z, 1, W, 0, INFO ) + CALL CHKXER( 'SSBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + NT = NT + 8 +* * SSBEVX * SRNAMT = 'SSBEVX' @@ -864,6 +1291,7 @@ INFOT = 3 CALL SSBEVX( 'N', 'A', '/', 0, 0, A, 1, Q, 1, 0.0, 0.0, 0, 0, $ 0.0, M, X, Z, 1, W, IW, I3, INFO ) + CALL CHKXER( 'SSBEVX', INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSBEVX( 'N', 'A', 'U', -1, 0, A, 1, Q, 1, 0.0, 0.0, 0, 0, $ 0.0, M, X, Z, 1, W, IW, I3, INFO ) @@ -905,6 +1333,72 @@ $ 0.0, M, X, Z, 1, W, IW, I3, INFO ) CALL CHKXER( 'SSBEVX', INFOT, NOUT, LERR, OK ) NT = NT + 13 +* +* SSBEVX_2STAGE +* + SRNAMT = 'SSBEVX_2STAGE' + INFOT = 1 + CALL SSBEVX_2STAGE( '/', 'A', 'U', 0, 0, A, 1, Q, 1, 0.0E0, + $ 0.0E0, 0, 0, 0.0E0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'SSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL SSBEVX_2STAGE( 'V', 'A', 'U', 0, 0, A, 1, Q, 1, 0.0E0, + $ 0.0E0, 0, 0, 0.0E0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'SSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL SSBEVX_2STAGE( 'N', '/', 'U', 0, 0, A, 1, Q, 1, 0.0E0, + $ 0.0E0, 0, 0, 0.0E0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'SSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL SSBEVX_2STAGE( 'N', 'A', '/', 0, 0, A, 1, Q, 1, 0.0E0, + $ 0.0E0, 0, 0, 0.0E0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'SSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL SSBEVX_2STAGE( 'N', 'A', 'U', -1, 0, A, 1, Q, 1, 0.0E0, + $ 0.0E0, 0, 0, 0.0E0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'SSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL SSBEVX_2STAGE( 'N', 'A', 'U', 0, -1, A, 1, Q, 1, 0.0E0, + $ 0.0E0, 0, 0, 0.0E0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'SSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL SSBEVX_2STAGE( 'N', 'A', 'U', 2, 1, A, 1, Q, 1, 0.0E0, + $ 0.0E0, 0, 0, 0.0E0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'SSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 9 +* CALL SSBEVX_2STAGE( 'V', 'A', 'U', 2, 0, A, 1, Q, 1, 0.0E0, +* $ 0.0E0, 0, 0, 0.0E0, M, X, Z, 2, W, 0, IW, I3, INFO ) +* CALL CHKXER( 'SSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL SSBEVX_2STAGE( 'N', 'V', 'U', 1, 0, A, 1, Q, 1, 0.0E0, + $ 0.0E0, 0, 0, 0.0E0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'SSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 12 + CALL SSBEVX_2STAGE( 'N', 'I', 'U', 1, 0, A, 1, Q, 1, 0.0E0, + $ 0.0E0, 0, 0, 0.0E0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'SSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 12 + CALL SSBEVX_2STAGE( 'N', 'I', 'U', 1, 0, A, 1, Q, 1, 0.0E0, + $ 0.0E0, 2, 1, 0.0E0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'SSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL SSBEVX_2STAGE( 'N', 'I', 'U', 2, 0, A, 1, Q, 1, 0.0E0, + $ 0.0E0, 2, 1, 0.0E0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'SSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL SSBEVX_2STAGE( 'N', 'I', 'U', 1, 0, A, 1, Q, 1, 0.0E0, + $ 0.0E0, 1, 2, 0.0E0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'SSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 18 +* CALL SSBEVX_2STAGE( 'V', 'A', 'U', 2, 0, A, 1, Q, 2, 0.0E0, +* $ 0.0E0, 0, 0, 0.0E0, M, X, Z, 1, W, 0, IW, I3, INFO ) +* CALL CHKXER( 'SSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 20 + CALL SSBEVX_2STAGE( 'N', 'A', 'U', 0, 0, A, 1, Q, 1, 0.0E0, + $ 0.0E0, 0, 0, 0.0E0, M, X, Z, 1, W, 0, IW, I3, INFO ) + CALL CHKXER( 'SSBEVX_2STAGE', INFOT, NOUT, LERR, OK ) +* NT = NT + 15 + NT = NT + 13 END IF * * Print a summary line. diff --git a/TESTING/EIG/zchkee.f b/TESTING/EIG/zchkee.f index 9ca71cee..768ed7c8 100644 --- a/TESTING/EIG/zchkee.f +++ b/TESTING/EIG/zchkee.f @@ -1102,7 +1102,8 @@ $ ZDRGES, ZDRGEV, ZDRGSX, ZDRGVX, ZDRVBD, ZDRVES, $ ZDRVEV, ZDRVSG, ZDRVST, ZDRVSX, ZDRVVX, $ ZERRBD, ZERRED, ZERRGG, ZERRHS, ZERRST, ILAVER, - $ ZDRGES3, ZDRGEV3 + $ ZDRGES3, ZDRGEV3, + $ ZCHKST2STG, ZDRVST2STG, ZCHKHB2STG * .. * .. Intrinsic Functions .. INTRINSIC LEN, MIN @@ -1149,7 +1150,7 @@ PATH = LINE( 1: 3 ) NEP = LSAMEN( 3, PATH, 'NEP' ) .OR. LSAMEN( 3, PATH, 'ZHS' ) SEP = LSAMEN( 3, PATH, 'SEP' ) .OR. LSAMEN( 3, PATH, 'ZST' ) .OR. - $ LSAMEN( 3, PATH, 'ZSG' ) + $ LSAMEN( 3, PATH, 'ZSG' ) .OR. LSAMEN( 3, PATH, 'SE2' ) SVD = LSAMEN( 3, PATH, 'SVD' ) .OR. LSAMEN( 3, PATH, 'ZBD' ) ZEV = LSAMEN( 3, PATH, 'ZEV' ) ZES = LSAMEN( 3, PATH, 'ZES' ) @@ -1829,7 +1830,8 @@ $ WRITE( NOUT, FMT = 9980 )'ZCHKHS', INFO 270 CONTINUE * - ELSE IF( LSAMEN( 3, C3, 'ZST' ) .OR. LSAMEN( 3, C3, 'SEP' ) ) THEN + ELSE IF( LSAMEN( 3, C3, 'ZST' ) .OR. LSAMEN( 3, C3, 'SEP' ) + $ .OR. LSAMEN( 3, C3, 'SE2' ) ) THEN * * ---------------------------------- * SEP: Symmetric Eigenvalue Problem @@ -1859,6 +1861,17 @@ WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), $ NXVAL( I ) IF( TSTCHK ) THEN + IF( LSAMEN( 3, C3, 'SE2' ) ) THEN + CALL ZCHKST2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), + $ DR( 1, 1 ), DR( 1, 2 ), DR( 1, 3 ), + $ DR( 1, 4 ), DR( 1, 5 ), DR( 1, 6 ), + $ DR( 1, 7 ), DR( 1, 8 ), DR( 1, 9 ), + $ DR( 1, 10 ), DR( 1, 11 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), DC( 1, 1 ), A( 1, 6 ), + $ WORK, LWORK, RWORK, LWORK, IWORK, LIWORK, + $ RESULT, INFO ) + ELSE CALL ZCHKST( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), $ DR( 1, 1 ), DR( 1, 2 ), DR( 1, 3 ), @@ -1868,16 +1881,26 @@ $ A( 1, 4 ), A( 1, 5 ), DC( 1, 1 ), A( 1, 6 ), $ WORK, LWORK, RWORK, LWORK, IWORK, LIWORK, $ RESULT, INFO ) + ENDIF IF( INFO.NE.0 ) $ WRITE( NOUT, FMT = 9980 )'ZCHKST', INFO END IF IF( TSTDRV ) THEN + IF( LSAMEN( 3, C3, 'SE2' ) ) THEN + CALL ZDRVST2STG( NN, NVAL, 18, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, DR( 1, 3 ), DR( 1, 4 ), + $ DR( 1, 5 ), DR( 1, 8 ), DR( 1, 9 ), + $ DR( 1, 10 ), A( 1, 2 ), NMAX, A( 1, 3 ), + $ DC( 1, 1 ), A( 1, 4 ), WORK, LWORK, RWORK, + $ LWORK, IWORK, LIWORK, RESULT, INFO ) + ELSE CALL ZDRVST( NN, NVAL, 18, DOTYPE, ISEED, THRESH, NOUT, - $ A( 1, 1 ), NMAX, DR( 1, 3 ), DR( 1, 4 ), - $ DR( 1, 5 ), DR( 1, 8 ), DR( 1, 9 ), - $ DR( 1, 10 ), A( 1, 2 ), NMAX, A( 1, 3 ), - $ DC( 1, 1 ), A( 1, 4 ), WORK, LWORK, RWORK, - $ LWORK, IWORK, LIWORK, RESULT, INFO ) + $ A( 1, 1 ), NMAX, DR( 1, 3 ), DR( 1, 4 ), + $ DR( 1, 5 ), DR( 1, 8 ), DR( 1, 9 ), + $ DR( 1, 10 ), A( 1, 2 ), NMAX, A( 1, 3 ), + $ DC( 1, 1 ), A( 1, 4 ), WORK, LWORK, RWORK, + $ LWORK, IWORK, LIWORK, RESULT, INFO ) + ENDIF IF( INFO.NE.0 ) $ WRITE( NOUT, FMT = 9980 )'ZDRVST', INFO END IF @@ -1910,12 +1933,18 @@ WRITE( NOUT, FMT = 9997 )C3, NBVAL( I ), NBMIN( I ), $ NXVAL( I ) IF( TSTCHK ) THEN - CALL ZDRVSG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, - $ DR( 1, 3 ), A( 1, 3 ), NMAX, A( 1, 4 ), - $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), WORK, - $ LWORK, RWORK, LWORK, IWORK, LIWORK, RESULT, - $ INFO ) +* CALL ZDRVSG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, +* $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, +* $ DR( 1, 3 ), A( 1, 3 ), NMAX, A( 1, 4 ), +* $ A( 1, 5 ), A( 1, 6 ), A( 1, 7 ), WORK, +* $ LWORK, RWORK, LWORK, IWORK, LIWORK, RESULT, +* $ INFO ) + CALL ZDRVSG2STG( NN, NVAL, MAXTYP, DOTYPE, ISEED, THRESH, + $ NOUT, A( 1, 1 ), NMAX, A( 1, 2 ), NMAX, + $ DR( 1, 3 ), DR( 1, 4 ), A( 1, 3 ), NMAX, + $ A( 1, 4 ), A( 1, 5 ), A( 1, 6 ), + $ A( 1, 7 ), WORK, LWORK, RWORK, LWORK, + $ IWORK, LIWORK, RESULT, INFO ) IF( INFO.NE.0 ) $ WRITE( NOUT, FMT = 9980 )'ZDRVSG', INFO END IF @@ -2276,10 +2305,15 @@ CALL ALAREQ( C3, NTYPES, DOTYPE, MAXTYP, NIN, NOUT ) IF( TSTERR ) $ CALL ZERRST( 'ZHB', NOUT ) - CALL ZCHKHB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH, - $ NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), DR( 1, 2 ), - $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT, - $ INFO ) +* CALL ZCHKHB( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, THRESH, +* $ NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), DR( 1, 2 ), +* $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT, +* $ INFO ) + CALL ZCHKHB2STG( NN, NVAL, NK, KVAL, MAXTYP, DOTYPE, ISEED, + $ THRESH, NOUT, A( 1, 1 ), NMAX, DR( 1, 1 ), + $ DR( 1, 2 ), DR( 1, 3 ), DR( 1, 4 ), DR( 1, 5 ), + $ A( 1, 2 ), NMAX, WORK, LWORK, RWORK, RESULT, + $ INFO ) IF( INFO.NE.0 ) $ WRITE( NOUT, FMT = 9980 )'ZCHKHB', INFO * diff --git a/TESTING/EIG/zchkhb2stg.f b/TESTING/EIG/zchkhb2stg.f new file mode 100644 index 00000000..0660b6fb --- /dev/null +++ b/TESTING/EIG/zchkhb2stg.f @@ -0,0 +1,880 @@ +*> \brief \b ZCHKHBSTG +* +* @precisions fortran z -> c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZCHKHBSTG( NSIZES, NN, NWDTHS, KK, NTYPES, DOTYPE, +* ISEED, THRESH, NOUNIT, A, LDA, SD, SE, D1, +* D2, D3, U, LDU, WORK, LWORK, RWORK RESULT, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDU, LWORK, NOUNIT, NSIZES, NTYPES, +* $ NWDTHS +* DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER ISEED( 4 ), KK( * ), NN( * ) +* DOUBLE PRECISION RESULT( * ), RWORK( * ), SD( * ), SE( * ) +* COMPLEX*16 A( LDA, * ), U( LDU, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZCHKHBSTG tests the reduction of a Hermitian band matrix to tridiagonal +*> from, used with the Hermitian eigenvalue problem. +*> +*> ZHBTRD factors a Hermitian band matrix A as U S U* , where * means +*> conjugate transpose, S is symmetric tridiagonal, and U is unitary. +*> ZHBTRD can use either just the lower or just the upper triangle +*> of A; ZCHKHBSTG checks both cases. +*> +*> ZHETRD_HB2ST factors a Hermitian band matrix A as U S U* , +*> where * means conjugate transpose, S is symmetric tridiagonal, and U is +*> unitary. ZHETRD_HB2ST can use either just the lower or just +*> the upper triangle of A; ZCHKHBSTG checks both cases. +*> +*> DSTEQR factors S as Z D1 Z'. +*> D1 is the matrix of eigenvalues computed when Z is not computed +*> and from the S resulting of DSBTRD "U" (used as reference for DSYTRD_SB2ST) +*> D2 is the matrix of eigenvalues computed when Z is not computed +*> and from the S resulting of DSYTRD_SB2ST "U". +*> D3 is the matrix of eigenvalues computed when Z is not computed +*> and from the S resulting of DSYTRD_SB2ST "L". +*> +*> When ZCHKHBSTG is called, a number of matrix "sizes" ("n's"), a number +*> of bandwidths ("k's"), and a number of matrix "types" are +*> specified. For each size ("n"), each bandwidth ("k") less than or +*> equal to "n", and each type of matrix, one matrix will be generated +*> and used to test the hermitian banded reduction routine. For each +*> matrix, a number of tests will be performed: +*> +*> (1) | A - V S V* | / ( |A| n ulp ) computed by ZHBTRD with +*> UPLO='U' +*> +*> (2) | I - UU* | / ( n ulp ) +*> +*> (3) | A - V S V* | / ( |A| n ulp ) computed by ZHBTRD with +*> UPLO='L' +*> +*> (4) | I - UU* | / ( n ulp ) +*> +*> (5) | D1 - D2 | / ( |D1| ulp ) where D1 is computed by +*> DSBTRD with UPLO='U' and +*> D2 is computed by +*> ZHETRD_HB2ST with UPLO='U' +*> +*> (6) | D1 - D3 | / ( |D1| ulp ) where D1 is computed by +*> DSBTRD with UPLO='U' and +*> D3 is computed by +*> ZHETRD_HB2ST with UPLO='L' +*> +*> The "sizes" are specified by an array NN(1:NSIZES); the value of +*> each element NN(j) specifies one size. +*> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); +*> if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. +*> Currently, the list of possible types is: +*> +*> (1) The zero matrix. +*> (2) The identity matrix. +*> +*> (3) A diagonal matrix with evenly spaced entries +*> 1, ..., ULP and random signs. +*> (ULP = (first number larger than 1) - 1 ) +*> (4) A diagonal matrix with geometrically spaced entries +*> 1, ..., ULP and random signs. +*> (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP +*> and random signs. +*> +*> (6) Same as (4), but multiplied by SQRT( overflow threshold ) +*> (7) Same as (4), but multiplied by SQRT( underflow threshold ) +*> +*> (8) A matrix of the form U* D U, where U is unitary and +*> D has evenly spaced entries 1, ..., ULP with random signs +*> on the diagonal. +*> +*> (9) A matrix of the form U* D U, where U is unitary and +*> D has geometrically spaced entries 1, ..., ULP with random +*> signs on the diagonal. +*> +*> (10) A matrix of the form U* D U, where U is unitary and +*> D has "clustered" entries 1, ULP,..., ULP with random +*> signs on the diagonal. +*> +*> (11) Same as (8), but multiplied by SQRT( overflow threshold ) +*> (12) Same as (8), but multiplied by SQRT( underflow threshold ) +*> +*> (13) Hermitian matrix with random entries chosen from (-1,1). +*> (14) Same as (13), but multiplied by SQRT( overflow threshold ) +*> (15) Same as (13), but multiplied by SQRT( underflow threshold ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NSIZES +*> \verbatim +*> NSIZES is INTEGER +*> The number of sizes of matrices to use. If it is zero, +*> ZCHKHBSTG does nothing. It must be at least zero. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER array, dimension (NSIZES) +*> An array containing the sizes to be used for the matrices. +*> Zero values will be skipped. The values must be at least +*> zero. +*> \endverbatim +*> +*> \param[in] NWDTHS +*> \verbatim +*> NWDTHS is INTEGER +*> The number of bandwidths to use. If it is zero, +*> ZCHKHBSTG does nothing. It must be at least zero. +*> \endverbatim +*> +*> \param[in] KK +*> \verbatim +*> KK is INTEGER array, dimension (NWDTHS) +*> An array containing the bandwidths to be used for the band +*> matrices. The values must be at least zero. +*> \endverbatim +*> +*> \param[in] NTYPES +*> \verbatim +*> NTYPES is INTEGER +*> The number of elements in DOTYPE. If it is zero, ZCHKHBSTG +*> does nothing. It must be at least zero. If it is MAXTYP+1 +*> and NSIZES is 1, then an additional type, MAXTYP+1 is +*> defined, which is to use whatever matrix is in A. This +*> is only useful if DOTYPE(1:MAXTYP) is .FALSE. and +*> DOTYPE(MAXTYP+1) is .TRUE. . +*> \endverbatim +*> +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> If DOTYPE(j) is .TRUE., then for each size in NN a +*> matrix of that size and of type j will be generated. +*> If NTYPES is smaller than the maximum number of types +*> defined (PARAMETER MAXTYP), then types NTYPES+1 through +*> MAXTYP will not be generated. If NTYPES is larger +*> than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) +*> will be ignored. +*> \endverbatim +*> +*> \param[in,out] ISEED +*> \verbatim +*> ISEED is INTEGER array, dimension (4) +*> On entry ISEED specifies the seed of the random number +*> generator. The array elements should be between 0 and 4095; +*> if not they will be reduced mod 4096. Also, ISEED(4) must +*> be odd. The random number generator uses a linear +*> congruential sequence limited to small integers, and so +*> should produce machine independent random numbers. The +*> values of ISEED are changed on exit, and can be used in the +*> next call to ZCHKHBSTG to continue the same random number +*> sequence. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is DOUBLE PRECISION +*> A test will count as "failed" if the "error", computed as +*> described above, exceeds THRESH. Note that the error +*> is scaled to be O(1), so THRESH should be a reasonably +*> small multiple of 1, e.g., 10 or 100. In particular, +*> it should not depend on the precision (single vs. double) +*> or the size of the matrix. It must be at least zero. +*> \endverbatim +*> +*> \param[in] NOUNIT +*> \verbatim +*> NOUNIT is INTEGER +*> The FORTRAN unit number for printing out error messages +*> (e.g., if a routine returns IINFO not equal to 0.) +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension +*> (LDA, max(NN)) +*> Used to hold the matrix whose eigenvalues are to be +*> computed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. It must be at least 2 (not 1!) +*> and at least max( KK )+1. +*> \endverbatim +*> +*> \param[out] SD +*> \verbatim +*> SD is DOUBLE PRECISION array, dimension (max(NN)) +*> Used to hold the diagonal of the tridiagonal matrix computed +*> by ZHBTRD. +*> \endverbatim +*> +*> \param[out] SE +*> \verbatim +*> SE is DOUBLE PRECISION array, dimension (max(NN)) +*> Used to hold the off-diagonal of the tridiagonal matrix +*> computed by ZHBTRD. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX*16 array, dimension (LDU, max(NN)) +*> Used to hold the unitary matrix computed by ZHBTRD. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of U. It must be at least 1 +*> and at least max( NN ). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The number of entries in WORK. This must be at least +*> max( LDA+1, max(NN)+1 )*max(NN). +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array +*> \endverbatim +*> +*> \param[out] RESULT +*> \verbatim +*> RESULT is DOUBLE PRECISION array, dimension (4) +*> The values computed by the tests described above. +*> The values are currently limited to 1/ulp, to avoid +*> overflow. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> If 0, then everything ran OK. +*> +*>----------------------------------------------------------------------- +*> +*> Some Local Variables and Parameters: +*> ---- ----- --------- --- ---------- +*> ZERO, ONE Real 0 and 1. +*> MAXTYP The number of types defined. +*> NTEST The number of tests performed, or which can +*> be performed so far, for the current matrix. +*> NTESTT The total number of tests performed so far. +*> NMAX Largest value in NN. +*> NMATS The number of matrices generated so far. +*> NERRS The number of tests which have exceeded THRESH +*> so far. +*> COND, IMODE Values to be passed to the matrix generators. +*> ANORM Norm of A; passed to matrix generators. +*> +*> OVFL, UNFL Overflow and underflow thresholds. +*> ULP, ULPINV Finest relative precision and its inverse. +*> RTOVFL, RTUNFL Square roots of the previous 2 values. +*> The following four arrays decode JTYPE: +*> KTYPE(j) The general type (1-10) for type "j". +*> KMODE(j) The MODE value to be passed to the matrix +*> generator for type "j". +*> KMAGN(j) The order of magnitude ( O(1), +*> O(overflow^(1/2) ), O(underflow^(1/2) ) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16_eig +* +* ===================================================================== + SUBROUTINE ZCHKHB2STG( NSIZES, NN, NWDTHS, KK, NTYPES, DOTYPE, + $ ISEED, THRESH, NOUNIT, A, LDA, SD, SE, D1, + $ D2, D3, U, LDU, WORK, LWORK, RWORK, RESULT, + $ INFO ) +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDU, LWORK, NOUNIT, NSIZES, NTYPES, + $ NWDTHS + DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER ISEED( 4 ), KK( * ), NN( * ) + DOUBLE PRECISION RESULT( * ), RWORK( * ), SD( * ), SE( * ), + $ D1( * ), D2( * ), D3( * ) + COMPLEX*16 A( LDA, * ), U( LDU, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION ZERO, ONE, TWO, TEN + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0, + $ TEN = 10.0D+0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = ONE / TWO ) + INTEGER MAXTYP + PARAMETER ( MAXTYP = 15 ) +* .. +* .. Local Scalars .. + LOGICAL BADNN, BADNNB + INTEGER I, IINFO, IMODE, ITYPE, J, JC, JCOL, JR, JSIZE, + $ JTYPE, JWIDTH, K, KMAX, MTYPES, N, NERRS, + $ NMATS, NMAX, NTEST, NTESTT + DOUBLE PRECISION ANINV, ANORM, COND, OVFL, RTOVFL, RTUNFL, + $ TEMP1, TEMP2, TEMP3, TEMP4, ULP, ULPINV, UNFL +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ), IOLDSD( 4 ), KMAGN( MAXTYP ), + $ KMODE( MAXTYP ), KTYPE( MAXTYP ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLASUM, XERBLA, ZHBT21, ZHBTRD, ZLACPY, ZLASET, + $ ZLATMR, ZLATMS, ZHBTRD_HB2ST, ZSTEQR +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, MAX, MIN, SQRT +* .. +* .. Data statements .. + DATA KTYPE / 1, 2, 5*4, 5*5, 3*8 / + DATA KMAGN / 2*1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, + $ 2, 3 / + DATA KMODE / 2*0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, + $ 0, 0 / +* .. +* .. Executable Statements .. +* +* Check for errors +* + NTESTT = 0 + INFO = 0 +* +* Important constants +* + BADNN = .FALSE. + NMAX = 1 + DO 10 J = 1, NSIZES + NMAX = MAX( NMAX, NN( J ) ) + IF( NN( J ).LT.0 ) + $ BADNN = .TRUE. + 10 CONTINUE +* + BADNNB = .FALSE. + KMAX = 0 + DO 20 J = 1, NSIZES + KMAX = MAX( KMAX, KK( J ) ) + IF( KK( J ).LT.0 ) + $ BADNNB = .TRUE. + 20 CONTINUE + KMAX = MIN( NMAX-1, KMAX ) +* +* Check for errors +* + IF( NSIZES.LT.0 ) THEN + INFO = -1 + ELSE IF( BADNN ) THEN + INFO = -2 + ELSE IF( NWDTHS.LT.0 ) THEN + INFO = -3 + ELSE IF( BADNNB ) THEN + INFO = -4 + ELSE IF( NTYPES.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.KMAX+1 ) THEN + INFO = -11 + ELSE IF( LDU.LT.NMAX ) THEN + INFO = -15 + ELSE IF( ( MAX( LDA, NMAX )+1 )*NMAX.GT.LWORK ) THEN + INFO = -17 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZCHKHBSTG', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( NSIZES.EQ.0 .OR. NTYPES.EQ.0 .OR. NWDTHS.EQ.0 ) + $ RETURN +* +* More Important constants +* + UNFL = DLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + ULP = DLAMCH( 'Epsilon' )*DLAMCH( 'Base' ) + ULPINV = ONE / ULP + RTUNFL = SQRT( UNFL ) + RTOVFL = SQRT( OVFL ) +* +* Loop over sizes, types +* + NERRS = 0 + NMATS = 0 +* + DO 190 JSIZE = 1, NSIZES + N = NN( JSIZE ) + ANINV = ONE / DBLE( MAX( 1, N ) ) +* + DO 180 JWIDTH = 1, NWDTHS + K = KK( JWIDTH ) + IF( K.GT.N ) + $ GO TO 180 + K = MAX( 0, MIN( N-1, K ) ) +* + IF( NSIZES.NE.1 ) THEN + MTYPES = MIN( MAXTYP, NTYPES ) + ELSE + MTYPES = MIN( MAXTYP+1, NTYPES ) + END IF +* + DO 170 JTYPE = 1, MTYPES + IF( .NOT.DOTYPE( JTYPE ) ) + $ GO TO 170 + NMATS = NMATS + 1 + NTEST = 0 +* + DO 30 J = 1, 4 + IOLDSD( J ) = ISEED( J ) + 30 CONTINUE +* +* Compute "A". +* Store as "Upper"; later, we will copy to other format. +* +* Control parameters: +* +* KMAGN KMODE KTYPE +* =1 O(1) clustered 1 zero +* =2 large clustered 2 identity +* =3 small exponential (none) +* =4 arithmetic diagonal, (w/ eigenvalues) +* =5 random log hermitian, w/ eigenvalues +* =6 random (none) +* =7 random diagonal +* =8 random hermitian +* =9 positive definite +* =10 diagonally dominant tridiagonal +* + IF( MTYPES.GT.MAXTYP ) + $ GO TO 100 +* + ITYPE = KTYPE( JTYPE ) + IMODE = KMODE( JTYPE ) +* +* Compute norm +* + GO TO ( 40, 50, 60 )KMAGN( JTYPE ) +* + 40 CONTINUE + ANORM = ONE + GO TO 70 +* + 50 CONTINUE + ANORM = ( RTOVFL*ULP )*ANINV + GO TO 70 +* + 60 CONTINUE + ANORM = RTUNFL*N*ULPINV + GO TO 70 +* + 70 CONTINUE +* + CALL ZLASET( 'Full', LDA, N, CZERO, CZERO, A, LDA ) + IINFO = 0 + IF( JTYPE.LE.15 ) THEN + COND = ULPINV + ELSE + COND = ULPINV*ANINV / TEN + END IF +* +* Special Matrices -- Identity & Jordan block +* +* Zero +* + IF( ITYPE.EQ.1 ) THEN + IINFO = 0 +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Identity +* + DO 80 JCOL = 1, N + A( K+1, JCOL ) = ANORM + 80 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Diagonal Matrix, [Eigen]values Specified +* + CALL ZLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, + $ COND, ANORM, 0, 0, 'Q', A( K+1, 1 ), LDA, + $ WORK, IINFO ) +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Hermitian, eigenvalues specified +* + CALL ZLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, + $ COND, ANORM, K, K, 'Q', A, LDA, WORK, + $ IINFO ) +* + ELSE IF( ITYPE.EQ.7 ) THEN +* +* Diagonal, random eigenvalues +* + CALL ZLATMR( N, N, 'S', ISEED, 'H', WORK, 6, ONE, + $ CONE, 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, 0, 0, + $ ZERO, ANORM, 'Q', A( K+1, 1 ), LDA, + $ IDUMMA, IINFO ) +* + ELSE IF( ITYPE.EQ.8 ) THEN +* +* Hermitian, random eigenvalues +* + CALL ZLATMR( N, N, 'S', ISEED, 'H', WORK, 6, ONE, + $ CONE, 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, K, K, + $ ZERO, ANORM, 'Q', A, LDA, IDUMMA, IINFO ) +* + ELSE IF( ITYPE.EQ.9 ) THEN +* +* Positive definite, eigenvalues specified. +* + CALL ZLATMS( N, N, 'S', ISEED, 'P', RWORK, IMODE, + $ COND, ANORM, K, K, 'Q', A, LDA, + $ WORK( N+1 ), IINFO ) +* + ELSE IF( ITYPE.EQ.10 ) THEN +* +* Positive definite tridiagonal, eigenvalues specified. +* + IF( N.GT.1 ) + $ K = MAX( 1, K ) + CALL ZLATMS( N, N, 'S', ISEED, 'P', RWORK, IMODE, + $ COND, ANORM, 1, 1, 'Q', A( K, 1 ), LDA, + $ WORK, IINFO ) + DO 90 I = 2, N + TEMP1 = ABS( A( K, I ) ) / + $ SQRT( ABS( A( K+1, I-1 )*A( K+1, I ) ) ) + IF( TEMP1.GT.HALF ) THEN + A( K, I ) = HALF*SQRT( ABS( A( K+1, + $ I-1 )*A( K+1, I ) ) ) + END IF + 90 CONTINUE +* + ELSE +* + IINFO = 1 + END IF +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'Generator', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + RETURN + END IF +* + 100 CONTINUE +* +* Call ZHBTRD to compute S and U from upper triangle. +* + CALL ZLACPY( ' ', K+1, N, A, LDA, WORK, LDA ) +* + NTEST = 1 + CALL ZHBTRD( 'V', 'U', N, K, WORK, LDA, SD, SE, U, LDU, + $ WORK( LDA*N+1 ), IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHBTRD(U)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 1 ) = ULPINV + GO TO 150 + END IF + END IF +* +* Do tests 1 and 2 +* + CALL ZHBT21( 'Upper', N, K, 1, A, LDA, SD, SE, U, LDU, + $ WORK, RWORK, RESULT( 1 ) ) +* +* Before converting A into lower for DSBTRD, run DSYTRD_SB2ST +* otherwise matrix A will be converted to lower and then need +* to be converted back to upper in order to run the upper case +* ofDSYTRD_SB2ST +* +* Compute D1 the eigenvalues resulting from the tridiagonal +* form using the DSBTRD and used as reference to compare +* with the DSYTRD_SB2ST routine +* +* Compute D1 from the DSBTRD and used as reference for the +* DSYTRD_SB2ST +* + CALL DCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK, 1 ) +* + CALL ZSTEQR( 'N', N, D1, RWORK, WORK, LDU, + $ RWORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEQR(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 5 ) = ULPINV + GO TO 150 + END IF + END IF +* +* DSYTRD_SB2ST Upper case is used to compute D2. +* Note to set SD and SE to zero to be sure not reusing +* the one from above. Compare it with D1 computed +* using the DSBTRD. +* + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL ZLACPY( ' ', K+1, N, A, LDA, U, LDU ) + LH = MAX(1, 4*N) + LW = LWORK - LH + CALL ZHETRD_HB2ST( 'N', 'N', "U", N, K, U, LDU, SD, SE, + $ WORK, LH, WORK( LH+1 ), LW, IINFO ) +* +* Compute D2 from the DSYTRD_SB2ST Upper case +* + CALL DCOPY( N, SD, 1, D2, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK, 1 ) +* + CALL ZSTEQR( 'N', N, D2, RWORK, WORK, LDU, + $ RWORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEQR(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 5 ) = ULPINV + GO TO 150 + END IF + END IF +* +* Convert A from Upper-Triangle-Only storage to +* Lower-Triangle-Only storage. +* + DO 120 JC = 1, N + DO 110 JR = 0, MIN( K, N-JC ) + A( JR+1, JC ) = DCONJG( A( K+1-JR, JC+JR ) ) + 110 CONTINUE + 120 CONTINUE + DO 140 JC = N + 1 - K, N + DO 130 JR = MIN( K, N-JC ) + 1, K + A( JR+1, JC ) = ZERO + 130 CONTINUE + 140 CONTINUE +* +* Call ZHBTRD to compute S and U from lower triangle +* + CALL ZLACPY( ' ', K+1, N, A, LDA, WORK, LDA ) +* + NTEST = 3 + CALL ZHBTRD( 'V', 'L', N, K, WORK, LDA, SD, SE, U, LDU, + $ WORK( LDA*N+1 ), IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHBTRD(L)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 3 ) = ULPINV + GO TO 150 + END IF + END IF + NTEST = 4 +* +* Do tests 3 and 4 +* + CALL ZHBT21( 'Lower', N, K, 1, A, LDA, SD, SE, U, LDU, + $ WORK, RWORK, RESULT( 3 ) ) +* +* DSYTRD_SB2ST Lower case is used to compute D3. +* Note to set SD and SE to zero to be sure not reusing +* the one from above. Compare it with D1 computed +* using the DSBTRD. +* + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL ZLACPY( ' ', K+1, N, A, LDA, U, LDU ) + LH = MAX(1, 4*N) + LW = LWORK - LH + CALL ZHETRD_HB2ST( 'N', 'N', "L", N, K, U, LDU, SD, SE, + $ WORK, LH, WORK( LH+1 ), LW, IINFO ) +* +* Compute D3 from the 2-stage Upper case +* + CALL DCOPY( N, SD, 1, D3, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK, 1 ) +* + CALL ZSTEQR( 'N', N, D3, RWORK, WORK, LDU, + $ RWORK( N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEQR(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 6 ) = ULPINV + GO TO 150 + END IF + END IF +* +* +* Do Tests 3 and 4 which are similar to 11 and 12 but with the +* D1 computed using the standard 1-stage reduction as reference +* + NTEST = 6 + TEMP1 = ZERO + TEMP2 = ZERO + TEMP3 = ZERO + TEMP4 = ZERO +* + DO 151 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + TEMP3 = MAX( TEMP3, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP4 = MAX( TEMP4, ABS( D1( J )-D3( J ) ) ) + 151 CONTINUE +* + RESULT(5) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) + RESULT(6) = TEMP4 / MAX( UNFL, ULP*MAX( TEMP3, TEMP4 ) ) +* +* End of Loop -- Check for RESULT(j) > THRESH +* + 150 CONTINUE + NTESTT = NTESTT + NTEST +* +* Print out tests which fail. +* + DO 160 JR = 1, NTEST + IF( RESULT( JR ).GE.THRESH ) THEN +* +* If this is the first test to fail, +* print a header to the data file. +* + IF( NERRS.EQ.0 ) THEN + WRITE( NOUNIT, FMT = 9998 )'ZHB' + WRITE( NOUNIT, FMT = 9997 ) + WRITE( NOUNIT, FMT = 9996 ) + WRITE( NOUNIT, FMT = 9995 )'Hermitian' + WRITE( NOUNIT, FMT = 9994 )'unitary', '*', + $ 'conjugate transpose', ( '*', J = 1, 6 ) + END IF + NERRS = NERRS + 1 + WRITE( NOUNIT, FMT = 9993 )N, K, IOLDSD, JTYPE, + $ JR, RESULT( JR ) + END IF + 160 CONTINUE +* + 170 CONTINUE + 180 CONTINUE + 190 CONTINUE +* +* Summary +* + CALL DLASUM( 'ZHB', NOUNIT, NERRS, NTESTT ) + RETURN +* + 9999 FORMAT( ' ZCHKHBSTG: ', A, ' returned INFO=', I6, '.', / 9X, 'N=', + $ I6, ', JTYPE=', I6, ', ISEED=(', 3( I5, ',' ), I5, ')' ) + 9998 FORMAT( / 1X, A3, + $ ' -- Complex Hermitian Banded Tridiagonal Reduction Routines' + $ ) + 9997 FORMAT( ' Matrix types (see DCHK23 for details): ' ) +* + 9996 FORMAT( / ' Special Matrices:', + $ / ' 1=Zero matrix. ', + $ ' 5=Diagonal: clustered entries.', + $ / ' 2=Identity matrix. ', + $ ' 6=Diagonal: large, evenly spaced.', + $ / ' 3=Diagonal: evenly spaced entries. ', + $ ' 7=Diagonal: small, evenly spaced.', + $ / ' 4=Diagonal: geometr. spaced entries.' ) + 9995 FORMAT( ' Dense ', A, ' Banded Matrices:', + $ / ' 8=Evenly spaced eigenvals. ', + $ ' 12=Small, evenly spaced eigenvals.', + $ / ' 9=Geometrically spaced eigenvals. ', + $ ' 13=Matrix with random O(1) entries.', + $ / ' 10=Clustered eigenvalues. ', + $ ' 14=Matrix with large random entries.', + $ / ' 11=Large, evenly spaced eigenvals. ', + $ ' 15=Matrix with small random entries.' ) +* + 9994 FORMAT( / ' Tests performed: (S is Tridiag, U is ', A, ',', + $ / 20X, A, ' means ', A, '.', / ' UPLO=''U'':', + $ / ' 1= | A - U S U', A1, ' | / ( |A| n ulp ) ', + $ ' 2= | I - U U', A1, ' | / ( n ulp )', / ' UPLO=''L'':', + $ / ' 3= | A - U S U', A1, ' | / ( |A| n ulp ) ', + $ ' 4= | I - U U', A1, ' | / ( n ulp )' / ' Eig check:', + $ /' 5= | D1 - D2', '', ' | / ( |D1| ulp ) ', + $ ' 6= | D1 - D3', '', ' | / ( |D1| ulp ) ' ) + 9993 FORMAT( ' N=', I5, ', K=', I4, ', seed=', 4( I4, ',' ), ' type ', + $ I2, ', test(', I2, ')=', G10.3 ) +* +* End of ZCHKHBSTG +* + END diff --git a/TESTING/EIG/zchkst2stg.f b/TESTING/EIG/zchkst2stg.f new file mode 100644 index 00000000..a1aaffbc --- /dev/null +++ b/TESTING/EIG/zchkst2stg.f @@ -0,0 +1,2145 @@ +*> \brief \b ZCHKST2STG +* +* @precisions fortran z -> c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, +* NOUNIT, A, LDA, AP, SD, SE, D1, D2, D3, D4, D5, +* WA1, WA2, WA3, WR, U, LDU, V, VP, TAU, Z, WORK, +* LWORK, RWORK, LRWORK, IWORK, LIWORK, RESULT, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDU, LIWORK, LRWORK, LWORK, NOUNIT, +* $ NSIZES, NTYPES +* DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER ISEED( 4 ), IWORK( * ), NN( * ) +* DOUBLE PRECISION D1( * ), D2( * ), D3( * ), D4( * ), D5( * ), +* $ RESULT( * ), RWORK( * ), SD( * ), SE( * ), +* $ WA1( * ), WA2( * ), WA3( * ), WR( * ) +* COMPLEX*16 A( LDA, * ), AP( * ), TAU( * ), U( LDU, * ), +* $ V( LDU, * ), VP( * ), WORK( * ), Z( LDU, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZCHKST2STG checks the Hermitian eigenvalue problem routines +*> using the 2-stage reduction techniques. Since the generation +*> of Q or the vectors is not available in this release, we only +*> compare the eigenvalue resulting when using the 2-stage to the +*> one considered as reference using the standard 1-stage reduction +*> ZHETRD. For that, we call the standard ZHETRD and compute D1 using +*> DSTEQR, then we call the 2-stage ZHETRD_2STAGE with Upper and Lower +*> and we compute D2 and D3 using DSTEQR and then we replaced tests +*> 3 and 4 by tests 11 and 12. test 1 and 2 remain to verify that +*> the 1-stage results are OK and can be trusted. +*> This testing routine will converge to the ZCHKST in the next +*> release when vectors and generation of Q will be implemented. +*> +*> ZHETRD factors A as U S U* , where * means conjugate transpose, +*> S is real symmetric tridiagonal, and U is unitary. +*> ZHETRD can use either just the lower or just the upper triangle +*> of A; ZCHKST2STG checks both cases. +*> U is represented as a product of Householder +*> transformations, whose vectors are stored in the first +*> n-1 columns of V, and whose scale factors are in TAU. +*> +*> ZHPTRD does the same as ZHETRD, except that A and V are stored +*> in "packed" format. +*> +*> ZUNGTR constructs the matrix U from the contents of V and TAU. +*> +*> ZUPGTR constructs the matrix U from the contents of VP and TAU. +*> +*> ZSTEQR factors S as Z D1 Z* , where Z is the unitary +*> matrix of eigenvectors and D1 is a diagonal matrix with +*> the eigenvalues on the diagonal. D2 is the matrix of +*> eigenvalues computed when Z is not computed. +*> +*> DSTERF computes D3, the matrix of eigenvalues, by the +*> PWK method, which does not yield eigenvectors. +*> +*> ZPTEQR factors S as Z4 D4 Z4* , for a +*> Hermitian positive definite tridiagonal matrix. +*> D5 is the matrix of eigenvalues computed when Z is not +*> computed. +*> +*> DSTEBZ computes selected eigenvalues. WA1, WA2, and +*> WA3 will denote eigenvalues computed to high +*> absolute accuracy, with different range options. +*> WR will denote eigenvalues computed to high relative +*> accuracy. +*> +*> ZSTEIN computes Y, the eigenvectors of S, given the +*> eigenvalues. +*> +*> ZSTEDC factors S as Z D1 Z* , where Z is the unitary +*> matrix of eigenvectors and D1 is a diagonal matrix with +*> the eigenvalues on the diagonal ('I' option). It may also +*> update an input unitary matrix, usually the output +*> from ZHETRD/ZUNGTR or ZHPTRD/ZUPGTR ('V' option). It may +*> also just compute eigenvalues ('N' option). +*> +*> ZSTEMR factors S as Z D1 Z* , where Z is the unitary +*> matrix of eigenvectors and D1 is a diagonal matrix with +*> the eigenvalues on the diagonal ('I' option). ZSTEMR +*> uses the Relatively Robust Representation whenever possible. +*> +*> When ZCHKST2STG is called, a number of matrix "sizes" ("n's") and a +*> number of matrix "types" are specified. For each size ("n") +*> and each type of matrix, one matrix will be generated and used +*> to test the Hermitian eigenroutines. For each matrix, a number +*> of tests will be performed: +*> +*> (1) | A - V S V* | / ( |A| n ulp ) ZHETRD( UPLO='U', ... ) +*> +*> (2) | I - UV* | / ( n ulp ) ZUNGTR( UPLO='U', ... ) +*> +*> (3) | A - V S V* | / ( |A| n ulp ) ZHETRD( UPLO='L', ... ) +*> replaced by | D1 - D2 | / ( |D1| ulp ) where D1 is the +*> eigenvalue matrix computed using S and D2 is the +*> eigenvalue matrix computed using S_2stage the output of +*> ZHETRD_2STAGE("N", "U",....). D1 and D2 are computed +*> via DSTEQR('N',...) +*> +*> (4) | I - UV* | / ( n ulp ) ZUNGTR( UPLO='L', ... ) +*> replaced by | D1 - D3 | / ( |D1| ulp ) where D1 is the +*> eigenvalue matrix computed using S and D3 is the +*> eigenvalue matrix computed using S_2stage the output of +*> ZHETRD_2STAGE("N", "L",....). D1 and D3 are computed +*> via DSTEQR('N',...) +*> +*> (5-8) Same as 1-4, but for ZHPTRD and ZUPGTR. +*> +*> (9) | S - Z D Z* | / ( |S| n ulp ) ZSTEQR('V',...) +*> +*> (10) | I - ZZ* | / ( n ulp ) ZSTEQR('V',...) +*> +*> (11) | D1 - D2 | / ( |D1| ulp ) ZSTEQR('N',...) +*> +*> (12) | D1 - D3 | / ( |D1| ulp ) DSTERF +*> +*> (13) 0 if the true eigenvalues (computed by sturm count) +*> of S are within THRESH of +*> those in D1. 2*THRESH if they are not. (Tested using +*> DSTECH) +*> +*> For S positive definite, +*> +*> (14) | S - Z4 D4 Z4* | / ( |S| n ulp ) ZPTEQR('V',...) +*> +*> (15) | I - Z4 Z4* | / ( n ulp ) ZPTEQR('V',...) +*> +*> (16) | D4 - D5 | / ( 100 |D4| ulp ) ZPTEQR('N',...) +*> +*> When S is also diagonally dominant by the factor gamma < 1, +*> +*> (17) max | D4(i) - WR(i) | / ( |D4(i)| omega ) , +*> i +*> omega = 2 (2n-1) ULP (1 + 8 gamma**2) / (1 - gamma)**4 +*> DSTEBZ( 'A', 'E', ...) +*> +*> (18) | WA1 - D3 | / ( |D3| ulp ) DSTEBZ( 'A', 'E', ...) +*> +*> (19) ( max { min | WA2(i)-WA3(j) | } + +*> i j +*> max { min | WA3(i)-WA2(j) | } ) / ( |D3| ulp ) +*> i j +*> DSTEBZ( 'I', 'E', ...) +*> +*> (20) | S - Y WA1 Y* | / ( |S| n ulp ) DSTEBZ, ZSTEIN +*> +*> (21) | I - Y Y* | / ( n ulp ) DSTEBZ, ZSTEIN +*> +*> (22) | S - Z D Z* | / ( |S| n ulp ) ZSTEDC('I') +*> +*> (23) | I - ZZ* | / ( n ulp ) ZSTEDC('I') +*> +*> (24) | S - Z D Z* | / ( |S| n ulp ) ZSTEDC('V') +*> +*> (25) | I - ZZ* | / ( n ulp ) ZSTEDC('V') +*> +*> (26) | D1 - D2 | / ( |D1| ulp ) ZSTEDC('V') and +*> ZSTEDC('N') +*> +*> Test 27 is disabled at the moment because ZSTEMR does not +*> guarantee high relatvie accuracy. +*> +*> (27) max | D6(i) - WR(i) | / ( |D6(i)| omega ) , +*> i +*> omega = 2 (2n-1) ULP (1 + 8 gamma**2) / (1 - gamma)**4 +*> ZSTEMR('V', 'A') +*> +*> (28) max | D6(i) - WR(i) | / ( |D6(i)| omega ) , +*> i +*> omega = 2 (2n-1) ULP (1 + 8 gamma**2) / (1 - gamma)**4 +*> ZSTEMR('V', 'I') +*> +*> Tests 29 through 34 are disable at present because ZSTEMR +*> does not handle partial specturm requests. +*> +*> (29) | S - Z D Z* | / ( |S| n ulp ) ZSTEMR('V', 'I') +*> +*> (30) | I - ZZ* | / ( n ulp ) ZSTEMR('V', 'I') +*> +*> (31) ( max { min | WA2(i)-WA3(j) | } + +*> i j +*> max { min | WA3(i)-WA2(j) | } ) / ( |D3| ulp ) +*> i j +*> ZSTEMR('N', 'I') vs. CSTEMR('V', 'I') +*> +*> (32) | S - Z D Z* | / ( |S| n ulp ) ZSTEMR('V', 'V') +*> +*> (33) | I - ZZ* | / ( n ulp ) ZSTEMR('V', 'V') +*> +*> (34) ( max { min | WA2(i)-WA3(j) | } + +*> i j +*> max { min | WA3(i)-WA2(j) | } ) / ( |D3| ulp ) +*> i j +*> ZSTEMR('N', 'V') vs. CSTEMR('V', 'V') +*> +*> (35) | S - Z D Z* | / ( |S| n ulp ) ZSTEMR('V', 'A') +*> +*> (36) | I - ZZ* | / ( n ulp ) ZSTEMR('V', 'A') +*> +*> (37) ( max { min | WA2(i)-WA3(j) | } + +*> i j +*> max { min | WA3(i)-WA2(j) | } ) / ( |D3| ulp ) +*> i j +*> ZSTEMR('N', 'A') vs. CSTEMR('V', 'A') +*> +*> The "sizes" are specified by an array NN(1:NSIZES); the value of +*> each element NN(j) specifies one size. +*> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); +*> if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. +*> Currently, the list of possible types is: +*> +*> (1) The zero matrix. +*> (2) The identity matrix. +*> +*> (3) A diagonal matrix with evenly spaced entries +*> 1, ..., ULP and random signs. +*> (ULP = (first number larger than 1) - 1 ) +*> (4) A diagonal matrix with geometrically spaced entries +*> 1, ..., ULP and random signs. +*> (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP +*> and random signs. +*> +*> (6) Same as (4), but multiplied by SQRT( overflow threshold ) +*> (7) Same as (4), but multiplied by SQRT( underflow threshold ) +*> +*> (8) A matrix of the form U* D U, where U is unitary and +*> D has evenly spaced entries 1, ..., ULP with random signs +*> on the diagonal. +*> +*> (9) A matrix of the form U* D U, where U is unitary and +*> D has geometrically spaced entries 1, ..., ULP with random +*> signs on the diagonal. +*> +*> (10) A matrix of the form U* D U, where U is unitary and +*> D has "clustered" entries 1, ULP,..., ULP with random +*> signs on the diagonal. +*> +*> (11) Same as (8), but multiplied by SQRT( overflow threshold ) +*> (12) Same as (8), but multiplied by SQRT( underflow threshold ) +*> +*> (13) Hermitian matrix with random entries chosen from (-1,1). +*> (14) Same as (13), but multiplied by SQRT( overflow threshold ) +*> (15) Same as (13), but multiplied by SQRT( underflow threshold ) +*> (16) Same as (8), but diagonal elements are all positive. +*> (17) Same as (9), but diagonal elements are all positive. +*> (18) Same as (10), but diagonal elements are all positive. +*> (19) Same as (16), but multiplied by SQRT( overflow threshold ) +*> (20) Same as (16), but multiplied by SQRT( underflow threshold ) +*> (21) A diagonally dominant tridiagonal matrix with geometrically +*> spaced diagonal entries 1, ..., ULP. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NSIZES +*> \verbatim +*> NSIZES is INTEGER +*> The number of sizes of matrices to use. If it is zero, +*> ZCHKST2STG does nothing. It must be at least zero. +*> \endverbatim +*> +*> \param[in] NN +*> \verbatim +*> NN is INTEGER array, dimension (NSIZES) +*> An array containing the sizes to be used for the matrices. +*> Zero values will be skipped. The values must be at least +*> zero. +*> \endverbatim +*> +*> \param[in] NTYPES +*> \verbatim +*> NTYPES is INTEGER +*> The number of elements in DOTYPE. If it is zero, ZCHKST2STG +*> does nothing. It must be at least zero. If it is MAXTYP+1 +*> and NSIZES is 1, then an additional type, MAXTYP+1 is +*> defined, which is to use whatever matrix is in A. This +*> is only useful if DOTYPE(1:MAXTYP) is .FALSE. and +*> DOTYPE(MAXTYP+1) is .TRUE. . +*> \endverbatim +*> +*> \param[in] DOTYPE +*> \verbatim +*> DOTYPE is LOGICAL array, dimension (NTYPES) +*> If DOTYPE(j) is .TRUE., then for each size in NN a +*> matrix of that size and of type j will be generated. +*> If NTYPES is smaller than the maximum number of types +*> defined (PARAMETER MAXTYP), then types NTYPES+1 through +*> MAXTYP will not be generated. If NTYPES is larger +*> than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) +*> will be ignored. +*> \endverbatim +*> +*> \param[in,out] ISEED +*> \verbatim +*> ISEED is INTEGER array, dimension (4) +*> On entry ISEED specifies the seed of the random number +*> generator. The array elements should be between 0 and 4095; +*> if not they will be reduced mod 4096. Also, ISEED(4) must +*> be odd. The random number generator uses a linear +*> congruential sequence limited to small integers, and so +*> should produce machine independent random numbers. The +*> values of ISEED are changed on exit, and can be used in the +*> next call to ZCHKST2STG to continue the same random number +*> sequence. +*> \endverbatim +*> +*> \param[in] THRESH +*> \verbatim +*> THRESH is DOUBLE PRECISION +*> A test will count as "failed" if the "error", computed as +*> described above, exceeds THRESH. Note that the error +*> is scaled to be O(1), so THRESH should be a reasonably +*> small multiple of 1, e.g., 10 or 100. In particular, +*> it should not depend on the precision (single vs. double) +*> or the size of the matrix. It must be at least zero. +*> \endverbatim +*> +*> \param[in] NOUNIT +*> \verbatim +*> NOUNIT is INTEGER +*> The FORTRAN unit number for printing out error messages +*> (e.g., if a routine returns IINFO not equal to 0.) +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array of +*> dimension ( LDA , max(NN) ) +*> Used to hold the matrix whose eigenvalues are to be +*> computed. On exit, A contains the last matrix actually +*> used. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. It must be at +*> least 1 and at least max( NN ). +*> \endverbatim +*> +*> \param[out] AP +*> \verbatim +*> AP is COMPLEX*16 array of +*> dimension( max(NN)*max(NN+1)/2 ) +*> The matrix A stored in packed format. +*> \endverbatim +*> +*> \param[out] SD +*> \verbatim +*> SD is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> The diagonal of the tridiagonal matrix computed by ZHETRD. +*> On exit, SD and SE contain the tridiagonal form of the +*> matrix in A. +*> \endverbatim +*> +*> \param[out] SE +*> \verbatim +*> SE is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> The off-diagonal of the tridiagonal matrix computed by +*> ZHETRD. On exit, SD and SE contain the tridiagonal form of +*> the matrix in A. +*> \endverbatim +*> +*> \param[out] D1 +*> \verbatim +*> D1 is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by ZSTEQR simlutaneously +*> with Z. On exit, the eigenvalues in D1 correspond with the +*> matrix in A. +*> \endverbatim +*> +*> \param[out] D2 +*> \verbatim +*> D2 is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by ZSTEQR if Z is not +*> computed. On exit, the eigenvalues in D2 correspond with +*> the matrix in A. +*> \endverbatim +*> +*> \param[out] D3 +*> \verbatim +*> D3 is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by DSTERF. On exit, the +*> eigenvalues in D3 correspond with the matrix in A. +*> \endverbatim +*> +*> \param[out] D4 +*> \verbatim +*> D4 is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by ZPTEQR(V). +*> ZPTEQR factors S as Z4 D4 Z4* +*> On exit, the eigenvalues in D4 correspond with the matrix in A. +*> \endverbatim +*> +*> \param[out] D5 +*> \verbatim +*> D5 is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> The eigenvalues of A, as computed by ZPTEQR(N) +*> when Z is not computed. On exit, the +*> eigenvalues in D4 correspond with the matrix in A. +*> \endverbatim +*> +*> \param[out] WA1 +*> \verbatim +*> WA1 is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> All eigenvalues of A, computed to high +*> absolute accuracy, with different range options. +*> as computed by DSTEBZ. +*> \endverbatim +*> +*> \param[out] WA2 +*> \verbatim +*> WA2 is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> Selected eigenvalues of A, computed to high +*> absolute accuracy, with different range options. +*> as computed by DSTEBZ. +*> Choose random values for IL and IU, and ask for the +*> IL-th through IU-th eigenvalues. +*> \endverbatim +*> +*> \param[out] WA3 +*> \verbatim +*> WA3 is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> Selected eigenvalues of A, computed to high +*> absolute accuracy, with different range options. +*> as computed by DSTEBZ. +*> Determine the values VL and VU of the IL-th and IU-th +*> eigenvalues and ask for all eigenvalues in this range. +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is DOUBLE PRECISION array of +*> dimension( max(NN) ) +*> All eigenvalues of A, computed to high +*> absolute accuracy, with different options. +*> as computed by DSTEBZ. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX*16 array of +*> dimension( LDU, max(NN) ). +*> The unitary matrix computed by ZHETRD + ZUNGTR. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of U, Z, and V. It must be at least 1 +*> and at least max( NN ). +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX*16 array of +*> dimension( LDU, max(NN) ). +*> The Housholder vectors computed by ZHETRD in reducing A to +*> tridiagonal form. The vectors computed with UPLO='U' are +*> in the upper triangle, and the vectors computed with UPLO='L' +*> are in the lower triangle. (As described in ZHETRD, the +*> sub- and superdiagonal are not set to 1, although the +*> true Householder vector has a 1 in that position. The +*> routines that use V, such as ZUNGTR, set those entries to +*> 1 before using them, and then restore them later.) +*> \endverbatim +*> +*> \param[out] VP +*> \verbatim +*> VP is COMPLEX*16 array of +*> dimension( max(NN)*max(NN+1)/2 ) +*> The matrix V stored in packed format. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array of +*> dimension( max(NN) ) +*> The Householder factors computed by ZHETRD in reducing A +*> to tridiagonal form. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array of +*> dimension( LDU, max(NN) ). +*> The unitary matrix of eigenvectors computed by ZSTEQR, +*> ZPTEQR, and ZSTEIN. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array of +*> dimension( LWORK ) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The number of entries in WORK. This must be at least +*> 1 + 4 * Nmax + 2 * Nmax * lg Nmax + 3 * Nmax**2 +*> where Nmax = max( NN(j), 2 ) and lg = log base 2. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, +*> Workspace. +*> \endverbatim +*> +*> \param[out] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The number of entries in IWORK. This must be at least +*> 6 + 6*Nmax + 5 * Nmax * lg Nmax +*> where Nmax = max( NN(j), 2 ) and lg = log base 2. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The number of entries in LRWORK (dimension( ??? ) +*> \endverbatim +*> +*> \param[out] RESULT +*> \verbatim +*> RESULT is DOUBLE PRECISION array, dimension (26) +*> The values computed by the tests described above. +*> The values are currently limited to 1/ulp, to avoid +*> overflow. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> If 0, then everything ran OK. +*> -1: NSIZES < 0 +*> -2: Some NN(j) < 0 +*> -3: NTYPES < 0 +*> -5: THRESH < 0 +*> -9: LDA < 1 or LDA < NMAX, where NMAX is max( NN(j) ). +*> -23: LDU < 1 or LDU < NMAX. +*> -29: LWORK too small. +*> If ZLATMR, CLATMS, ZHETRD, ZUNGTR, ZSTEQR, DSTERF, +*> or ZUNMC2 returns an error code, the +*> absolute value of it is returned. +*> +*>----------------------------------------------------------------------- +*> +*> Some Local Variables and Parameters: +*> ---- ----- --------- --- ---------- +*> ZERO, ONE Real 0 and 1. +*> MAXTYP The number of types defined. +*> NTEST The number of tests performed, or which can +*> be performed so far, for the current matrix. +*> NTESTT The total number of tests performed so far. +*> NBLOCK Blocksize as returned by ENVIR. +*> NMAX Largest value in NN. +*> NMATS The number of matrices generated so far. +*> NERRS The number of tests which have exceeded THRESH +*> so far. +*> COND, IMODE Values to be passed to the matrix generators. +*> ANORM Norm of A; passed to matrix generators. +*> +*> OVFL, UNFL Overflow and underflow thresholds. +*> ULP, ULPINV Finest relative precision and its inverse. +*> RTOVFL, RTUNFL Square roots of the previous 2 values. +*> The following four arrays decode JTYPE: +*> KTYPE(j) The general type (1-10) for type "j". +*> KMODE(j) The MODE value to be passed to the matrix +*> generator for type "j". +*> KMAGN(j) The order of magnitude ( O(1), +*> O(overflow^(1/2) ), O(underflow^(1/2) ) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16_eig +* +* ===================================================================== + SUBROUTINE ZCHKST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, + $ NOUNIT, A, LDA, AP, SD, SE, D1, D2, D3, D4, D5, + $ WA1, WA2, WA3, WR, U, LDU, V, VP, TAU, Z, WORK, + $ LWORK, RWORK, LRWORK, IWORK, LIWORK, RESULT, + $ INFO ) +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDU, LIWORK, LRWORK, LWORK, NOUNIT, + $ NSIZES, NTYPES + DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER ISEED( 4 ), IWORK( * ), NN( * ) + DOUBLE PRECISION D1( * ), D2( * ), D3( * ), D4( * ), D5( * ), + $ RESULT( * ), RWORK( * ), SD( * ), SE( * ), + $ WA1( * ), WA2( * ), WA3( * ), WR( * ) + COMPLEX*16 A( LDA, * ), AP( * ), TAU( * ), U( LDU, * ), + $ V( LDU, * ), VP( * ), WORK( * ), Z( LDU, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, EIGHT, TEN, HUN + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ EIGHT = 8.0D0, TEN = 10.0D0, HUN = 100.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = ONE / TWO ) + INTEGER MAXTYP + PARAMETER ( MAXTYP = 21 ) + LOGICAL CRANGE + PARAMETER ( CRANGE = .FALSE. ) + LOGICAL CREL + PARAMETER ( CREL = .FALSE. ) +* .. +* .. Local Scalars .. + LOGICAL BADNN, TRYRAC + INTEGER I, IINFO, IL, IMODE, INDE, INDRWK, ITEMP, + $ ITYPE, IU, J, JC, JR, JSIZE, JTYPE, LGN, + $ LIWEDC, LOG2UI, LRWEDC, LWEDC, M, M2, M3, + $ MTYPES, N, NAP, NBLOCK, NERRS, NMATS, NMAX, + $ NSPLIT, NTEST, NTESTT, LH, LW + DOUBLE PRECISION ABSTOL, ANINV, ANORM, COND, OVFL, RTOVFL, + $ RTUNFL, TEMP1, TEMP2, TEMP3, TEMP4, ULP, + $ ULPINV, UNFL, VL, VU +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ), IOLDSD( 4 ), ISEED2( 4 ), + $ KMAGN( MAXTYP ), KMODE( MAXTYP ), + $ KTYPE( MAXTYP ) + DOUBLE PRECISION DUMMA( 1 ) +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLARND, DSXT1 + EXTERNAL ILAENV, DLAMCH, DLARND, DSXT1 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLABAD, DLASUM, DSTEBZ, DSTECH, DSTERF, + $ XERBLA, ZCOPY, ZHET21, ZHETRD, ZHPT21, ZHPTRD, + $ ZLACPY, ZLASET, ZLATMR, ZLATMS, ZPTEQR, ZSTEDC, + $ ZSTEMR, ZSTEIN, ZSTEQR, ZSTT21, ZSTT22, ZUNGTR, + $ ZUPGTR, ZHETRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, INT, LOG, MAX, MIN, SQRT +* .. +* .. Data statements .. + DATA KTYPE / 1, 2, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 8, + $ 8, 8, 9, 9, 9, 9, 9, 10 / + DATA KMAGN / 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, + $ 2, 3, 1, 1, 1, 2, 3, 1 / + DATA KMODE / 0, 0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, + $ 0, 0, 4, 3, 1, 4, 4, 3 / +* .. +* .. Executable Statements .. +* +* Keep ftnchek happy + IDUMMA( 1 ) = 1 +* +* Check for errors +* + NTESTT = 0 + INFO = 0 +* +* Important constants +* + BADNN = .FALSE. + TRYRAC = .TRUE. + NMAX = 1 + DO 10 J = 1, NSIZES + NMAX = MAX( NMAX, NN( J ) ) + IF( NN( J ).LT.0 ) + $ BADNN = .TRUE. + 10 CONTINUE +* + NBLOCK = ILAENV( 1, 'ZHETRD', 'L', NMAX, -1, -1, -1 ) + NBLOCK = MIN( NMAX, MAX( 1, NBLOCK ) ) +* +* Check for errors +* + IF( NSIZES.LT.0 ) THEN + INFO = -1 + ELSE IF( BADNN ) THEN + INFO = -2 + ELSE IF( NTYPES.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.NMAX ) THEN + INFO = -9 + ELSE IF( LDU.LT.NMAX ) THEN + INFO = -23 + ELSE IF( 2*MAX( 2, NMAX )**2.GT.LWORK ) THEN + INFO = -29 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZCHKST2STG', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( NSIZES.EQ.0 .OR. NTYPES.EQ.0 ) + $ RETURN +* +* More Important constants +* + UNFL = DLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL DLABAD( UNFL, OVFL ) + ULP = DLAMCH( 'Epsilon' )*DLAMCH( 'Base' ) + ULPINV = ONE / ULP + LOG2UI = INT( LOG( ULPINV ) / LOG( TWO ) ) + RTUNFL = SQRT( UNFL ) + RTOVFL = SQRT( OVFL ) +* +* Loop over sizes, types +* + DO 20 I = 1, 4 + ISEED2( I ) = ISEED( I ) + 20 CONTINUE + NERRS = 0 + NMATS = 0 +* + DO 310 JSIZE = 1, NSIZES + N = NN( JSIZE ) + IF( N.GT.0 ) THEN + LGN = INT( LOG( DBLE( N ) ) / LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + LWEDC = 1 + 4*N + 2*N*LGN + 4*N**2 + LRWEDC = 1 + 3*N + 2*N*LGN + 4*N**2 + LIWEDC = 6 + 6*N + 5*N*LGN + ELSE + LWEDC = 8 + LRWEDC = 7 + LIWEDC = 12 + END IF + NAP = ( N*( N+1 ) ) / 2 + ANINV = ONE / DBLE( MAX( 1, N ) ) +* + IF( NSIZES.NE.1 ) THEN + MTYPES = MIN( MAXTYP, NTYPES ) + ELSE + MTYPES = MIN( MAXTYP+1, NTYPES ) + END IF +* + DO 300 JTYPE = 1, MTYPES + IF( .NOT.DOTYPE( JTYPE ) ) + $ GO TO 300 + NMATS = NMATS + 1 + NTEST = 0 +* + DO 30 J = 1, 4 + IOLDSD( J ) = ISEED( J ) + 30 CONTINUE +* +* Compute "A" +* +* Control parameters: +* +* KMAGN KMODE KTYPE +* =1 O(1) clustered 1 zero +* =2 large clustered 2 identity +* =3 small exponential (none) +* =4 arithmetic diagonal, (w/ eigenvalues) +* =5 random log Hermitian, w/ eigenvalues +* =6 random (none) +* =7 random diagonal +* =8 random Hermitian +* =9 positive definite +* =10 diagonally dominant tridiagonal +* + IF( MTYPES.GT.MAXTYP ) + $ GO TO 100 +* + ITYPE = KTYPE( JTYPE ) + IMODE = KMODE( JTYPE ) +* +* Compute norm +* + GO TO ( 40, 50, 60 )KMAGN( JTYPE ) +* + 40 CONTINUE + ANORM = ONE + GO TO 70 +* + 50 CONTINUE + ANORM = ( RTOVFL*ULP )*ANINV + GO TO 70 +* + 60 CONTINUE + ANORM = RTUNFL*N*ULPINV + GO TO 70 +* + 70 CONTINUE +* + CALL ZLASET( 'Full', LDA, N, CZERO, CZERO, A, LDA ) + IINFO = 0 + IF( JTYPE.LE.15 ) THEN + COND = ULPINV + ELSE + COND = ULPINV*ANINV / TEN + END IF +* +* Special Matrices -- Identity & Jordan block +* +* Zero +* + IF( ITYPE.EQ.1 ) THEN + IINFO = 0 +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Identity +* + DO 80 JC = 1, N + A( JC, JC ) = ANORM + 80 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Diagonal Matrix, [Eigen]values Specified +* + CALL ZLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, COND, + $ ANORM, 0, 0, 'N', A, LDA, WORK, IINFO ) +* +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Hermitian, eigenvalues specified +* + CALL ZLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, COND, + $ ANORM, N, N, 'N', A, LDA, WORK, IINFO ) +* + ELSE IF( ITYPE.EQ.7 ) THEN +* +* Diagonal, random eigenvalues +* + CALL ZLATMR( N, N, 'S', ISEED, 'H', WORK, 6, ONE, CONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, 0, 0, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.8 ) THEN +* +* Hermitian, random eigenvalues +* + CALL ZLATMR( N, N, 'S', ISEED, 'H', WORK, 6, ONE, CONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, N, N, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.9 ) THEN +* +* Positive definite, eigenvalues specified. +* + CALL ZLATMS( N, N, 'S', ISEED, 'P', RWORK, IMODE, COND, + $ ANORM, N, N, 'N', A, LDA, WORK, IINFO ) +* + ELSE IF( ITYPE.EQ.10 ) THEN +* +* Positive definite tridiagonal, eigenvalues specified. +* + CALL ZLATMS( N, N, 'S', ISEED, 'P', RWORK, IMODE, COND, + $ ANORM, 1, 1, 'N', A, LDA, WORK, IINFO ) + DO 90 I = 2, N + TEMP1 = ABS( A( I-1, I ) ) + TEMP2 = SQRT( ABS( A( I-1, I-1 )*A( I, I ) ) ) + IF( TEMP1.GT.HALF*TEMP2 ) THEN + A( I-1, I ) = A( I-1, I )* + $ ( HALF*TEMP2 / ( UNFL+TEMP1 ) ) + A( I, I-1 ) = DCONJG( A( I-1, I ) ) + END IF + 90 CONTINUE +* + ELSE +* + IINFO = 1 + END IF +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'Generator', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + RETURN + END IF +* + 100 CONTINUE +* +* Call ZHETRD and ZUNGTR to compute S and U from +* upper triangle. +* + CALL ZLACPY( 'U', N, N, A, LDA, V, LDU ) +* + NTEST = 1 + CALL ZHETRD( 'U', N, V, LDU, SD, SE, TAU, WORK, LWORK, + $ IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHETRD(U)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 1 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL ZLACPY( 'U', N, N, V, LDU, U, LDU ) +* + NTEST = 2 + CALL ZUNGTR( 'U', N, U, LDU, TAU, WORK, LWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZUNGTR(U)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 2 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do tests 1 and 2 +* + CALL ZHET21( 2, 'Upper', N, 1, A, LDA, SD, SE, U, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( 1 ) ) + CALL ZHET21( 3, 'Upper', N, 1, A, LDA, SD, SE, U, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( 2 ) ) +* +* Compute D1 the eigenvalues resulting from the tridiagonal +* form using the standard 1-stage algorithm and use it as a +* reference to compare with the 2-stage technique +* +* Compute D1 from the 1-stage and used as reference for the +* 2-stage +* + CALL DCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK, 1 ) +* + CALL ZSTEQR( 'N', N, D1, RWORK, WORK, LDU, RWORK( N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEQR(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 3 ) = ULPINV + GO TO 280 + END IF + END IF +* +* 2-STAGE TRD Upper case is used to compute D2. +* Note to set SD and SE to zero to be sure not reusing +* the one from above. Compare it with D1 computed +* using the 1-stage. +* + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL ZLACPY( 'U', N, N, A, LDA, V, LDU ) + LH = MAX(1, 4*N) + LW = LWORK - LH + CALL ZHETRD_2STAGE( 'N', "U", N, V, LDU, SD, SE, TAU, + $ WORK, LH, WORK( LH+1 ), LW, IINFO ) +* +* Compute D2 from the 2-stage Upper case +* + CALL DCOPY( N, SD, 1, D2, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK, 1 ) +* + NTEST = 3 + CALL ZSTEQR( 'N', N, D2, RWORK, WORK, LDU, RWORK( N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEQR(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 3 ) = ULPINV + GO TO 280 + END IF + END IF +* +* 2-STAGE TRD Lower case is used to compute D3. +* Note to set SD and SE to zero to be sure not reusing +* the one from above. Compare it with D1 computed +* using the 1-stage. +* + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SD, 1 ) + CALL DLASET( 'Full', N, 1, ZERO, ZERO, SE, 1 ) + CALL ZLACPY( 'L', N, N, A, LDA, V, LDU ) + CALL ZHETRD_2STAGE( 'N', "L", N, V, LDU, SD, SE, TAU, + $ WORK, LH, WORK( LH+1 ), LW, IINFO ) +* +* Compute D3 from the 2-stage Upper case +* + CALL DCOPY( N, SD, 1, D3, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK, 1 ) +* + NTEST = 4 + CALL ZSTEQR( 'N', N, D3, RWORK, WORK, LDU, RWORK( N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEQR(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 4 ) = ULPINV + GO TO 280 + END IF + END IF +* +* +* Do Tests 3 and 4 which are similar to 11 and 12 but with the +* D1 computed using the standard 1-stage reduction as reference +* + NTEST = 4 + TEMP1 = ZERO + TEMP2 = ZERO + TEMP3 = ZERO + TEMP4 = ZERO +* + DO 151 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + TEMP3 = MAX( TEMP3, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP4 = MAX( TEMP4, ABS( D1( J )-D3( J ) ) ) + 151 CONTINUE +* + RESULT( 3 ) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) + RESULT( 4 ) = TEMP4 / MAX( UNFL, ULP*MAX( TEMP3, TEMP4 ) ) +* +* Skip the DSYTRD for lower that since we replaced its testing +* 3 and 4 by the 2-stage one. + GOTO 101 +* +* Call ZHETRD and ZUNGTR to compute S and U from +* lower triangle, do tests. +* + CALL ZLACPY( 'L', N, N, A, LDA, V, LDU ) +* + NTEST = 3 + CALL ZHETRD( 'L', N, V, LDU, SD, SE, TAU, WORK, LWORK, + $ IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHETRD(L)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 3 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL ZLACPY( 'L', N, N, V, LDU, U, LDU ) +* + NTEST = 4 + CALL ZUNGTR( 'L', N, U, LDU, TAU, WORK, LWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZUNGTR(L)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 4 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL ZHET21( 2, 'Lower', N, 1, A, LDA, SD, SE, U, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( 3 ) ) + CALL ZHET21( 3, 'Lower', N, 1, A, LDA, SD, SE, U, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( 4 ) ) +* +*after skipping old tests 3 4 back to the normal +* + 101 CONTINUE +* +* Store the upper triangle of A in AP +* + I = 0 + DO 120 JC = 1, N + DO 110 JR = 1, JC + I = I + 1 + AP( I ) = A( JR, JC ) + 110 CONTINUE + 120 CONTINUE +* +* Call ZHPTRD and ZUPGTR to compute S and U from AP +* + CALL ZCOPY( NAP, AP, 1, VP, 1 ) +* + NTEST = 5 + CALL ZHPTRD( 'U', N, VP, SD, SE, TAU, IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHPTRD(U)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 5 ) = ULPINV + GO TO 280 + END IF + END IF +* + NTEST = 6 + CALL ZUPGTR( 'U', N, VP, TAU, U, LDU, WORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZUPGTR(U)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 6 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do tests 5 and 6 +* + CALL ZHPT21( 2, 'Upper', N, 1, AP, SD, SE, U, LDU, VP, TAU, + $ WORK, RWORK, RESULT( 5 ) ) + CALL ZHPT21( 3, 'Upper', N, 1, AP, SD, SE, U, LDU, VP, TAU, + $ WORK, RWORK, RESULT( 6 ) ) +* +* Store the lower triangle of A in AP +* + I = 0 + DO 140 JC = 1, N + DO 130 JR = JC, N + I = I + 1 + AP( I ) = A( JR, JC ) + 130 CONTINUE + 140 CONTINUE +* +* Call ZHPTRD and ZUPGTR to compute S and U from AP +* + CALL ZCOPY( NAP, AP, 1, VP, 1 ) +* + NTEST = 7 + CALL ZHPTRD( 'L', N, VP, SD, SE, TAU, IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHPTRD(L)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 7 ) = ULPINV + GO TO 280 + END IF + END IF +* + NTEST = 8 + CALL ZUPGTR( 'L', N, VP, TAU, U, LDU, WORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZUPGTR(L)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 8 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL ZHPT21( 2, 'Lower', N, 1, AP, SD, SE, U, LDU, VP, TAU, + $ WORK, RWORK, RESULT( 7 ) ) + CALL ZHPT21( 3, 'Lower', N, 1, AP, SD, SE, U, LDU, VP, TAU, + $ WORK, RWORK, RESULT( 8 ) ) +* +* Call ZSTEQR to compute D1, D2, and Z, do tests. +* +* Compute D1 and Z +* + CALL DCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK, 1 ) + CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDU ) +* + NTEST = 9 + CALL ZSTEQR( 'V', N, D1, RWORK, Z, LDU, RWORK( N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEQR(V)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 9 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Compute D2 +* + CALL DCOPY( N, SD, 1, D2, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK, 1 ) +* + NTEST = 11 + CALL ZSTEQR( 'N', N, D2, RWORK, WORK, LDU, RWORK( N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEQR(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 11 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Compute D3 (using PWK method) +* + CALL DCOPY( N, SD, 1, D3, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK, 1 ) +* + NTEST = 12 + CALL DSTERF( N, D3, RWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTERF', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 12 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 9 and 10 +* + CALL ZSTT21( N, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, RWORK, + $ RESULT( 9 ) ) +* +* Do Tests 11 and 12 +* + TEMP1 = ZERO + TEMP2 = ZERO + TEMP3 = ZERO + TEMP4 = ZERO +* + DO 150 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + TEMP3 = MAX( TEMP3, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP4 = MAX( TEMP4, ABS( D1( J )-D3( J ) ) ) + 150 CONTINUE +* + RESULT( 11 ) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) + RESULT( 12 ) = TEMP4 / MAX( UNFL, ULP*MAX( TEMP3, TEMP4 ) ) +* +* Do Test 13 -- Sturm Sequence Test of Eigenvalues +* Go up by factors of two until it succeeds +* + NTEST = 13 + TEMP1 = THRESH*( HALF-ULP ) +* + DO 160 J = 0, LOG2UI + CALL DSTECH( N, SD, SE, D1, TEMP1, RWORK, IINFO ) + IF( IINFO.EQ.0 ) + $ GO TO 170 + TEMP1 = TEMP1*TWO + 160 CONTINUE +* + 170 CONTINUE + RESULT( 13 ) = TEMP1 +* +* For positive definite matrices ( JTYPE.GT.15 ) call ZPTEQR +* and do tests 14, 15, and 16 . +* + IF( JTYPE.GT.15 ) THEN +* +* Compute D4 and Z4 +* + CALL DCOPY( N, SD, 1, D4, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK, 1 ) + CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDU ) +* + NTEST = 14 + CALL ZPTEQR( 'V', N, D4, RWORK, Z, LDU, RWORK( N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZPTEQR(V)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 14 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 14 and 15 +* + CALL ZSTT21( N, 0, SD, SE, D4, DUMMA, Z, LDU, WORK, + $ RWORK, RESULT( 14 ) ) +* +* Compute D5 +* + CALL DCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK, 1 ) +* + NTEST = 16 + CALL ZPTEQR( 'N', N, D5, RWORK, Z, LDU, RWORK( N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZPTEQR(N)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 16 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 16 +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 180 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D4( J ) ), ABS( D5( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D4( J )-D5( J ) ) ) + 180 CONTINUE +* + RESULT( 16 ) = TEMP2 / MAX( UNFL, + $ HUN*ULP*MAX( TEMP1, TEMP2 ) ) + ELSE + RESULT( 14 ) = ZERO + RESULT( 15 ) = ZERO + RESULT( 16 ) = ZERO + END IF +* +* Call DSTEBZ with different options and do tests 17-18. +* +* If S is positive definite and diagonally dominant, +* ask for all eigenvalues with high relative accuracy. +* + VL = ZERO + VU = ZERO + IL = 0 + IU = 0 + IF( JTYPE.EQ.21 ) THEN + NTEST = 17 + ABSTOL = UNFL + UNFL + CALL DSTEBZ( 'A', 'E', N, VL, VU, IL, IU, ABSTOL, SD, SE, + $ M, NSPLIT, WR, IWORK( 1 ), IWORK( N+1 ), + $ RWORK, IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEBZ(A,rel)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 17 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do test 17 +* + TEMP2 = TWO*( TWO*N-ONE )*ULP*( ONE+EIGHT*HALF**2 ) / + $ ( ONE-HALF )**4 +* + TEMP1 = ZERO + DO 190 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D4( J )-WR( N-J+1 ) ) / + $ ( ABSTOL+ABS( D4( J ) ) ) ) + 190 CONTINUE +* + RESULT( 17 ) = TEMP1 / TEMP2 + ELSE + RESULT( 17 ) = ZERO + END IF +* +* Now ask for all eigenvalues with high absolute accuracy. +* + NTEST = 18 + ABSTOL = UNFL + UNFL + CALL DSTEBZ( 'A', 'E', N, VL, VU, IL, IU, ABSTOL, SD, SE, M, + $ NSPLIT, WA1, IWORK( 1 ), IWORK( N+1 ), RWORK, + $ IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEBZ(A)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 18 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do test 18 +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 200 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D3( J ) ), ABS( WA1( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D3( J )-WA1( J ) ) ) + 200 CONTINUE +* + RESULT( 18 ) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) +* +* Choose random values for IL and IU, and ask for the +* IL-th through IU-th eigenvalues. +* + NTEST = 19 + IF( N.LE.1 ) THEN + IL = 1 + IU = N + ELSE + IL = 1 + ( N-1 )*INT( DLARND( 1, ISEED2 ) ) + IU = 1 + ( N-1 )*INT( DLARND( 1, ISEED2 ) ) + IF( IU.LT.IL ) THEN + ITEMP = IU + IU = IL + IL = ITEMP + END IF + END IF +* + CALL DSTEBZ( 'I', 'E', N, VL, VU, IL, IU, ABSTOL, SD, SE, + $ M2, NSPLIT, WA2, IWORK( 1 ), IWORK( N+1 ), + $ RWORK, IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEBZ(I)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 19 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Determine the values VL and VU of the IL-th and IU-th +* eigenvalues and ask for all eigenvalues in this range. +* + IF( N.GT.0 ) THEN + IF( IL.NE.1 ) THEN + VL = WA1( IL ) - MAX( HALF*( WA1( IL )-WA1( IL-1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + ELSE + VL = WA1( 1 ) - MAX( HALF*( WA1( N )-WA1( 1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = WA1( IU ) + MAX( HALF*( WA1( IU+1 )-WA1( IU ) ), + $ ULP*ANORM, TWO*RTUNFL ) + ELSE + VU = WA1( N ) + MAX( HALF*( WA1( N )-WA1( 1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + END IF + ELSE + VL = ZERO + VU = ONE + END IF +* + CALL DSTEBZ( 'V', 'E', N, VL, VU, IL, IU, ABSTOL, SD, SE, + $ M3, NSPLIT, WA3, IWORK( 1 ), IWORK( N+1 ), + $ RWORK, IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEBZ(V)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 19 ) = ULPINV + GO TO 280 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.NE.0 ) THEN + RESULT( 19 ) = ULPINV + GO TO 280 + END IF +* +* Do test 19 +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( N ) ), ABS( WA1( 1 ) ) ) + ELSE + TEMP3 = ZERO + END IF +* + RESULT( 19 ) = ( TEMP1+TEMP2 ) / MAX( UNFL, TEMP3*ULP ) +* +* Call ZSTEIN to compute eigenvectors corresponding to +* eigenvalues in WA1. (First call DSTEBZ again, to make sure +* it returns these eigenvalues in the correct order.) +* + NTEST = 21 + CALL DSTEBZ( 'A', 'B', N, VL, VU, IL, IU, ABSTOL, SD, SE, M, + $ NSPLIT, WA1, IWORK( 1 ), IWORK( N+1 ), RWORK, + $ IWORK( 2*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'DSTEBZ(A,B)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 20 ) = ULPINV + RESULT( 21 ) = ULPINV + GO TO 280 + END IF + END IF +* + CALL ZSTEIN( N, SD, SE, M, WA1, IWORK( 1 ), IWORK( N+1 ), Z, + $ LDU, RWORK, IWORK( 2*N+1 ), IWORK( 3*N+1 ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEIN', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 20 ) = ULPINV + RESULT( 21 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do tests 20 and 21 +* + CALL ZSTT21( N, 0, SD, SE, WA1, DUMMA, Z, LDU, WORK, RWORK, + $ RESULT( 20 ) ) +* +* Call ZSTEDC(I) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + INDE = 1 + INDRWK = INDE + N + CALL DCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK( INDE ), 1 ) + CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDU ) +* + NTEST = 22 + CALL ZSTEDC( 'I', N, D1, RWORK( INDE ), Z, LDU, WORK, LWEDC, + $ RWORK( INDRWK ), LRWEDC, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEDC(I)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 22 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 22 and 23 +* + CALL ZSTT21( N, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, RWORK, + $ RESULT( 22 ) ) +* +* Call ZSTEDC(V) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + CALL DCOPY( N, SD, 1, D1, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK( INDE ), 1 ) + CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDU ) +* + NTEST = 24 + CALL ZSTEDC( 'V', N, D1, RWORK( INDE ), Z, LDU, WORK, LWEDC, + $ RWORK( INDRWK ), LRWEDC, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEDC(V)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 24 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 24 and 25 +* + CALL ZSTT21( N, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, RWORK, + $ RESULT( 24 ) ) +* +* Call ZSTEDC(N) to compute D2, do tests. +* +* Compute D2 +* + CALL DCOPY( N, SD, 1, D2, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK( INDE ), 1 ) + CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDU ) +* + NTEST = 26 + CALL ZSTEDC( 'N', N, D2, RWORK( INDE ), Z, LDU, WORK, LWEDC, + $ RWORK( INDRWK ), LRWEDC, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEDC(N)', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 26 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 26 +* + TEMP1 = ZERO + TEMP2 = ZERO +* + DO 210 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + 210 CONTINUE +* + RESULT( 26 ) = TEMP2 / MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) +* +* Only test ZSTEMR if IEEE compliant +* + IF( ILAENV( 10, 'ZSTEMR', 'VA', 1, 0, 0, 0 ).EQ.1 .AND. + $ ILAENV( 11, 'ZSTEMR', 'VA', 1, 0, 0, 0 ).EQ.1 ) THEN +* +* Call ZSTEMR, do test 27 (relative eigenvalue accuracy) +* +* If S is positive definite and diagonally dominant, +* ask for all eigenvalues with high relative accuracy. +* + VL = ZERO + VU = ZERO + IL = 0 + IU = 0 + IF( JTYPE.EQ.21 .AND. CREL ) THEN + NTEST = 27 + ABSTOL = UNFL + UNFL + CALL ZSTEMR( 'V', 'A', N, SD, SE, VL, VU, IL, IU, + $ M, WR, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ RWORK, LRWORK, IWORK( 2*N+1 ), LWORK-2*N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEMR(V,A,rel)', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 27 ) = ULPINV + GO TO 270 + END IF + END IF +* +* Do test 27 +* + TEMP2 = TWO*( TWO*N-ONE )*ULP*( ONE+EIGHT*HALF**2 ) / + $ ( ONE-HALF )**4 +* + TEMP1 = ZERO + DO 220 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D4( J )-WR( N-J+1 ) ) / + $ ( ABSTOL+ABS( D4( J ) ) ) ) + 220 CONTINUE +* + RESULT( 27 ) = TEMP1 / TEMP2 +* + IL = 1 + ( N-1 )*INT( DLARND( 1, ISEED2 ) ) + IU = 1 + ( N-1 )*INT( DLARND( 1, ISEED2 ) ) + IF( IU.LT.IL ) THEN + ITEMP = IU + IU = IL + IL = ITEMP + END IF +* + IF( CRANGE ) THEN + NTEST = 28 + ABSTOL = UNFL + UNFL + CALL ZSTEMR( 'V', 'I', N, SD, SE, VL, VU, IL, IU, + $ M, WR, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ RWORK, LRWORK, IWORK( 2*N+1 ), + $ LWORK-2*N, IINFO ) +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEMR(V,I,rel)', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 28 ) = ULPINV + GO TO 270 + END IF + END IF +* +* +* Do test 28 +* + TEMP2 = TWO*( TWO*N-ONE )*ULP* + $ ( ONE+EIGHT*HALF**2 ) / ( ONE-HALF )**4 +* + TEMP1 = ZERO + DO 230 J = IL, IU + TEMP1 = MAX( TEMP1, ABS( WR( J-IL+1 )-D4( N-J+ + $ 1 ) ) / ( ABSTOL+ABS( WR( J-IL+1 ) ) ) ) + 230 CONTINUE +* + RESULT( 28 ) = TEMP1 / TEMP2 + ELSE + RESULT( 28 ) = ZERO + END IF + ELSE + RESULT( 27 ) = ZERO + RESULT( 28 ) = ZERO + END IF +* +* Call ZSTEMR(V,I) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + CALL DCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK, 1 ) + CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDU ) +* + IF( CRANGE ) THEN + NTEST = 29 + IL = 1 + ( N-1 )*INT( DLARND( 1, ISEED2 ) ) + IU = 1 + ( N-1 )*INT( DLARND( 1, ISEED2 ) ) + IF( IU.LT.IL ) THEN + ITEMP = IU + IU = IL + IL = ITEMP + END IF + CALL ZSTEMR( 'V', 'I', N, D5, RWORK, VL, VU, IL, IU, + $ M, D1, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ RWORK( N+1 ), LRWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEMR(V,I)', IINFO, + $ N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 29 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 29 and 30 +* +* +* Call ZSTEMR to compute D2, do tests. +* +* Compute D2 +* + CALL DCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK, 1 ) +* + NTEST = 31 + CALL ZSTEMR( 'N', 'I', N, D5, RWORK, VL, VU, IL, IU, + $ M, D2, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ RWORK( N+1 ), LRWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEMR(N,I)', IINFO, + $ N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 31 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 31 +* + TEMP1 = ZERO + TEMP2 = ZERO +* + DO 240 J = 1, IU - IL + 1 + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), + $ ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + 240 CONTINUE +* + RESULT( 31 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* +* +* Call ZSTEMR(V,V) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + CALL DCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK, 1 ) + CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDU ) +* + NTEST = 32 +* + IF( N.GT.0 ) THEN + IF( IL.NE.1 ) THEN + VL = D2( IL ) - MAX( HALF* + $ ( D2( IL )-D2( IL-1 ) ), ULP*ANORM, + $ TWO*RTUNFL ) + ELSE + VL = D2( 1 ) - MAX( HALF*( D2( N )-D2( 1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = D2( IU ) + MAX( HALF* + $ ( D2( IU+1 )-D2( IU ) ), ULP*ANORM, + $ TWO*RTUNFL ) + ELSE + VU = D2( N ) + MAX( HALF*( D2( N )-D2( 1 ) ), + $ ULP*ANORM, TWO*RTUNFL ) + END IF + ELSE + VL = ZERO + VU = ONE + END IF +* + CALL ZSTEMR( 'V', 'V', N, D5, RWORK, VL, VU, IL, IU, + $ M, D1, Z, LDU, M, IWORK( 1 ), TRYRAC, + $ RWORK( N+1 ), LRWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEMR(V,V)', IINFO, + $ N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 32 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 32 and 33 +* + CALL ZSTT22( N, M, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, + $ M, RWORK, RESULT( 32 ) ) +* +* Call ZSTEMR to compute D2, do tests. +* +* Compute D2 +* + CALL DCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK, 1 ) +* + NTEST = 34 + CALL ZSTEMR( 'N', 'V', N, D5, RWORK, VL, VU, IL, IU, + $ M, D2, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ RWORK( N+1 ), LRWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEMR(N,V)', IINFO, + $ N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 34 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 34 +* + TEMP1 = ZERO + TEMP2 = ZERO +* + DO 250 J = 1, IU - IL + 1 + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), + $ ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + 250 CONTINUE +* + RESULT( 34 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) + ELSE + RESULT( 29 ) = ZERO + RESULT( 30 ) = ZERO + RESULT( 31 ) = ZERO + RESULT( 32 ) = ZERO + RESULT( 33 ) = ZERO + RESULT( 34 ) = ZERO + END IF +* +* +* Call ZSTEMR(V,A) to compute D1 and Z, do tests. +* +* Compute D1 and Z +* + CALL DCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK, 1 ) +* + NTEST = 35 +* + CALL ZSTEMR( 'V', 'A', N, D5, RWORK, VL, VU, IL, IU, + $ M, D1, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ RWORK( N+1 ), LRWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEMR(V,A)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 35 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Tests 35 and 36 +* + CALL ZSTT22( N, M, 0, SD, SE, D1, DUMMA, Z, LDU, WORK, M, + $ RWORK, RESULT( 35 ) ) +* +* Call ZSTEMR to compute D2, do tests. +* +* Compute D2 +* + CALL DCOPY( N, SD, 1, D5, 1 ) + IF( N.GT.0 ) + $ CALL DCOPY( N-1, SE, 1, RWORK, 1 ) +* + NTEST = 37 + CALL ZSTEMR( 'N', 'A', N, D5, RWORK, VL, VU, IL, IU, + $ M, D2, Z, LDU, N, IWORK( 1 ), TRYRAC, + $ RWORK( N+1 ), LRWORK-N, IWORK( 2*N+1 ), + $ LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZSTEMR(N,A)', IINFO, N, + $ JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( 37 ) = ULPINV + GO TO 280 + END IF + END IF +* +* Do Test 34 +* + TEMP1 = ZERO + TEMP2 = ZERO +* + DO 260 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D2( J ) ) ) + 260 CONTINUE +* + RESULT( 37 ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) + END IF + 270 CONTINUE + 280 CONTINUE + NTESTT = NTESTT + NTEST +* +* End of Loop -- Check for RESULT(j) > THRESH +* +* +* Print out tests which fail. +* + DO 290 JR = 1, NTEST + IF( RESULT( JR ).GE.THRESH ) THEN +* +* If this is the first test to fail, +* print a header to the data file. +* + IF( NERRS.EQ.0 ) THEN + WRITE( NOUNIT, FMT = 9998 )'ZST' + WRITE( NOUNIT, FMT = 9997 ) + WRITE( NOUNIT, FMT = 9996 ) + WRITE( NOUNIT, FMT = 9995 )'Hermitian' + WRITE( NOUNIT, FMT = 9994 ) +* +* Tests performed +* + WRITE( NOUNIT, FMT = 9987 ) + END IF + NERRS = NERRS + 1 + IF( RESULT( JR ).LT.10000.0D0 ) THEN + WRITE( NOUNIT, FMT = 9989 )N, JTYPE, IOLDSD, JR, + $ RESULT( JR ) + ELSE + WRITE( NOUNIT, FMT = 9988 )N, JTYPE, IOLDSD, JR, + $ RESULT( JR ) + END IF + END IF + 290 CONTINUE + 300 CONTINUE + 310 CONTINUE +* +* Summary +* + CALL DLASUM( 'ZST', NOUNIT, NERRS, NTESTT ) + RETURN +* + 9999 FORMAT( ' ZCHKST2STG: ', A, ' returned INFO=', I6, '.', / 9X, + $ 'N=', I6, ', JTYPE=', I6, ', ISEED=(', 3( I5, ',' ), I5, ')' ) +* + 9998 FORMAT( / 1X, A3, ' -- Complex Hermitian eigenvalue problem' ) + 9997 FORMAT( ' Matrix types (see ZCHKST2STG for details): ' ) +* + 9996 FORMAT( / ' Special Matrices:', + $ / ' 1=Zero matrix. ', + $ ' 5=Diagonal: clustered entries.', + $ / ' 2=Identity matrix. ', + $ ' 6=Diagonal: large, evenly spaced.', + $ / ' 3=Diagonal: evenly spaced entries. ', + $ ' 7=Diagonal: small, evenly spaced.', + $ / ' 4=Diagonal: geometr. spaced entries.' ) + 9995 FORMAT( ' Dense ', A, ' Matrices:', + $ / ' 8=Evenly spaced eigenvals. ', + $ ' 12=Small, evenly spaced eigenvals.', + $ / ' 9=Geometrically spaced eigenvals. ', + $ ' 13=Matrix with random O(1) entries.', + $ / ' 10=Clustered eigenvalues. ', + $ ' 14=Matrix with large random entries.', + $ / ' 11=Large, evenly spaced eigenvals. ', + $ ' 15=Matrix with small random entries.' ) + 9994 FORMAT( ' 16=Positive definite, evenly spaced eigenvalues', + $ / ' 17=Positive definite, geometrically spaced eigenvlaues', + $ / ' 18=Positive definite, clustered eigenvalues', + $ / ' 19=Positive definite, small evenly spaced eigenvalues', + $ / ' 20=Positive definite, large evenly spaced eigenvalues', + $ / ' 21=Diagonally dominant tridiagonal, geometrically', + $ ' spaced eigenvalues' ) +* + 9989 FORMAT( ' Matrix order=', I5, ', type=', I2, ', seed=', + $ 4( I4, ',' ), ' result ', I3, ' is', 0P, F8.2 ) + 9988 FORMAT( ' Matrix order=', I5, ', type=', I2, ', seed=', + $ 4( I4, ',' ), ' result ', I3, ' is', 1P, D10.3 ) +* + 9987 FORMAT( / 'Test performed: see ZCHKST2STG for details.', / ) +* End of ZCHKST2STG +* + END diff --git a/TESTING/EIG/zdrvsg2stg.f b/TESTING/EIG/zdrvsg2stg.f new file mode 100644 index 00000000..f75ce60c --- /dev/null +++ b/TESTING/EIG/zdrvsg2stg.f @@ -0,0 +1,1384 @@ +*> \brief \b ZDRVSG2STG +* +* @precisions fortran z -> c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, +* NOUNIT, A, LDA, B, LDB, D, D2, Z, LDZ, AB, +* BB, AP, BP, WORK, NWORK, RWORK, LRWORK, +* IWORK, LIWORK, RESULT, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDZ, LIWORK, LRWORK, NOUNIT, +* $ NSIZES, NTYPES, NWORK +* DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER ISEED( 4 ), IWORK( * ), NN( * ) +* DOUBLE PRECISION D( * ), RESULT( * ), RWORK( * ) +* COMPLEX*16 A( LDA, * ), AB( LDA, * ), AP( * ), +* $ B( LDB, * ), BB( LDB, * ), BP( * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZDRVSG2STG checks the complex Hermitian generalized eigenproblem +*> drivers. +*> +*> ZHEGV computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian-definite generalized +*> eigenproblem. +*> +*> ZHEGVD computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian-definite generalized +*> eigenproblem using a divide and conquer algorithm. +*> +*> ZHEGVX computes selected eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian-definite generalized +*> eigenproblem. +*> +*> ZHPGV computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian-definite generalized +*> eigenproblem in packed storage. +*> +*> ZHPGVD computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian-definite generalized +*> eigenproblem in packed storage using a divide and +*> conquer algorithm. +*> +*> ZHPGVX computes selected eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian-definite generalized +*> eigenproblem in packed storage. +*> +*> ZHBGV computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian-definite banded +*> generalized eigenproblem. +*> +*> ZHBGVD computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian-definite banded +*> generalized eigenproblem using a divide and conquer +*> algorithm. +*> +*> ZHBGVX computes selected eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian-definite banded +*> generalized eigenproblem. +*> +*> When ZDRVSG2STG is called, a number of matrix "sizes" ("n's") and a +*> number of matrix "types" are specified. For each size ("n") +*> and each type of matrix, one matrix A of the given type will be +*> generated; a random well-conditioned matrix B is also generated +*> and the pair (A,B) is used to test the drivers. +*> +*> For each pair (A,B), the following tests are performed: +*> +*> (1) ZHEGV with ITYPE = 1 and UPLO ='U': +*> +*> | A Z - B Z D | / ( |A| |Z| n ulp ) +*> | D - D2 | / ( |D| ulp ) where D is computed by +*> ZHEGV and D2 is computed by +*> ZHEGV_2STAGE. This test is +*> only performed for DSYGV +*> +*> (2) as (1) but calling ZHPGV +*> (3) as (1) but calling ZHBGV +*> (4) as (1) but with UPLO = 'L' +*> (5) as (4) but calling ZHPGV +*> (6) as (4) but calling ZHBGV +*> +*> (7) ZHEGV with ITYPE = 2 and UPLO ='U': +*> +*> | A B Z - Z D | / ( |A| |Z| n ulp ) +*> +*> (8) as (7) but calling ZHPGV +*> (9) as (7) but with UPLO = 'L' +*> (10) as (9) but calling ZHPGV +*> +*> (11) ZHEGV with ITYPE = 3 and UPLO ='U': +*> +*> | B A Z - Z D | / ( |A| |Z| n ulp ) +*> +*> (12) as (11) but calling ZHPGV +*> (13) as (11) but with UPLO = 'L' +*> (14) as (13) but calling ZHPGV +*> +*> ZHEGVD, ZHPGVD and ZHBGVD performed the same 14 tests. +*> +*> ZHEGVX, ZHPGVX and ZHBGVX performed the above 14 tests with +*> the parameter RANGE = 'A', 'N' and 'I', respectively. +*> +*> The "sizes" are specified by an array NN(1:NSIZES); the value of +*> each element NN(j) specifies one size. +*> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); +*> if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. +*> This type is used for the matrix A which has half-bandwidth KA. +*> B is generated as a well-conditioned positive definite matrix +*> with half-bandwidth KB (<= KA). +*> Currently, the list of possible types for A is: +*> +*> (1) The zero matrix. +*> (2) The identity matrix. +*> +*> (3) A diagonal matrix with evenly spaced entries +*> 1, ..., ULP and random signs. +*> (ULP = (first number larger than 1) - 1 ) +*> (4) A diagonal matrix with geometrically spaced entries +*> 1, ..., ULP and random signs. +*> (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP +*> and random signs. +*> +*> (6) Same as (4), but multiplied by SQRT( overflow threshold ) +*> (7) Same as (4), but multiplied by SQRT( underflow threshold ) +*> +*> (8) A matrix of the form U* D U, where U is unitary and +*> D has evenly spaced entries 1, ..., ULP with random signs +*> on the diagonal. +*> +*> (9) A matrix of the form U* D U, where U is unitary and +*> D has geometrically spaced entries 1, ..., ULP with random +*> signs on the diagonal. +*> +*> (10) A matrix of the form U* D U, where U is unitary and +*> D has "clustered" entries 1, ULP,..., ULP with random +*> signs on the diagonal. +*> +*> (11) Same as (8), but multiplied by SQRT( overflow threshold ) +*> (12) Same as (8), but multiplied by SQRT( underflow threshold ) +*> +*> (13) Hermitian matrix with random entries chosen from (-1,1). +*> (14) Same as (13), but multiplied by SQRT( overflow threshold ) +*> (15) Same as (13), but multiplied by SQRT( underflow threshold ) +*> +*> (16) Same as (8), but with KA = 1 and KB = 1 +*> (17) Same as (8), but with KA = 2 and KB = 1 +*> (18) Same as (8), but with KA = 2 and KB = 2 +*> (19) Same as (8), but with KA = 3 and KB = 1 +*> (20) Same as (8), but with KA = 3 and KB = 2 +*> (21) Same as (8), but with KA = 3 and KB = 3 +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> NSIZES INTEGER +*> The number of sizes of matrices to use. If it is zero, +*> ZDRVSG2STG does nothing. It must be at least zero. +*> Not modified. +*> +*> NN INTEGER array, dimension (NSIZES) +*> An array containing the sizes to be used for the matrices. +*> Zero values will be skipped. The values must be at least +*> zero. +*> Not modified. +*> +*> NTYPES INTEGER +*> The number of elements in DOTYPE. If it is zero, ZDRVSG2STG +*> does nothing. It must be at least zero. If it is MAXTYP+1 +*> and NSIZES is 1, then an additional type, MAXTYP+1 is +*> defined, which is to use whatever matrix is in A. This +*> is only useful if DOTYPE(1:MAXTYP) is .FALSE. and +*> DOTYPE(MAXTYP+1) is .TRUE. . +*> Not modified. +*> +*> DOTYPE LOGICAL array, dimension (NTYPES) +*> If DOTYPE(j) is .TRUE., then for each size in NN a +*> matrix of that size and of type j will be generated. +*> If NTYPES is smaller than the maximum number of types +*> defined (PARAMETER MAXTYP), then types NTYPES+1 through +*> MAXTYP will not be generated. If NTYPES is larger +*> than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) +*> will be ignored. +*> Not modified. +*> +*> ISEED INTEGER array, dimension (4) +*> On entry ISEED specifies the seed of the random number +*> generator. The array elements should be between 0 and 4095; +*> if not they will be reduced mod 4096. Also, ISEED(4) must +*> be odd. The random number generator uses a linear +*> congruential sequence limited to small integers, and so +*> should produce machine independent random numbers. The +*> values of ISEED are changed on exit, and can be used in the +*> next call to ZDRVSG2STG to continue the same random number +*> sequence. +*> Modified. +*> +*> THRESH DOUBLE PRECISION +*> A test will count as "failed" if the "error", computed as +*> described above, exceeds THRESH. Note that the error +*> is scaled to be O(1), so THRESH should be a reasonably +*> small multiple of 1, e.g., 10 or 100. In particular, +*> it should not depend on the precision (single vs. double) +*> or the size of the matrix. It must be at least zero. +*> Not modified. +*> +*> NOUNIT INTEGER +*> The FORTRAN unit number for printing out error messages +*> (e.g., if a routine returns IINFO not equal to 0.) +*> Not modified. +*> +*> A COMPLEX*16 array, dimension (LDA , max(NN)) +*> Used to hold the matrix whose eigenvalues are to be +*> computed. On exit, A contains the last matrix actually +*> used. +*> Modified. +*> +*> LDA INTEGER +*> The leading dimension of A. It must be at +*> least 1 and at least max( NN ). +*> Not modified. +*> +*> B COMPLEX*16 array, dimension (LDB , max(NN)) +*> Used to hold the Hermitian positive definite matrix for +*> the generailzed problem. +*> On exit, B contains the last matrix actually +*> used. +*> Modified. +*> +*> LDB INTEGER +*> The leading dimension of B. It must be at +*> least 1 and at least max( NN ). +*> Not modified. +*> +*> D DOUBLE PRECISION array, dimension (max(NN)) +*> The eigenvalues of A. On exit, the eigenvalues in D +*> correspond with the matrix in A. +*> Modified. +*> +*> Z COMPLEX*16 array, dimension (LDZ, max(NN)) +*> The matrix of eigenvectors. +*> Modified. +*> +*> LDZ INTEGER +*> The leading dimension of ZZ. It must be at least 1 and +*> at least max( NN ). +*> Not modified. +*> +*> AB COMPLEX*16 array, dimension (LDA, max(NN)) +*> Workspace. +*> Modified. +*> +*> BB COMPLEX*16 array, dimension (LDB, max(NN)) +*> Workspace. +*> Modified. +*> +*> AP COMPLEX*16 array, dimension (max(NN)**2) +*> Workspace. +*> Modified. +*> +*> BP COMPLEX*16 array, dimension (max(NN)**2) +*> Workspace. +*> Modified. +*> +*> WORK COMPLEX*16 array, dimension (NWORK) +*> Workspace. +*> Modified. +*> +*> NWORK INTEGER +*> The number of entries in WORK. This must be at least +*> 2*N + N**2 where N = max( NN(j), 2 ). +*> Not modified. +*> +*> RWORK DOUBLE PRECISION array, dimension (LRWORK) +*> Workspace. +*> Modified. +*> +*> LRWORK INTEGER +*> The number of entries in RWORK. This must be at least +*> max( 7*N, 1 + 4*N + 2*N*lg(N) + 3*N**2 ) where +*> N = max( NN(j) ) and lg( N ) = smallest integer k such +*> that 2**k >= N . +*> Not modified. +*> +*> IWORK INTEGER array, dimension (LIWORK)) +*> Workspace. +*> Modified. +*> +*> LIWORK INTEGER +*> The number of entries in IWORK. This must be at least +*> 2 + 5*max( NN(j) ). +*> Not modified. +*> +*> RESULT DOUBLE PRECISION array, dimension (70) +*> The values computed by the 70 tests described above. +*> Modified. +*> +*> INFO INTEGER +*> If 0, then everything ran OK. +*> -1: NSIZES < 0 +*> -2: Some NN(j) < 0 +*> -3: NTYPES < 0 +*> -5: THRESH < 0 +*> -9: LDA < 1 or LDA < NMAX, where NMAX is max( NN(j) ). +*> -16: LDZ < 1 or LDZ < NMAX. +*> -21: NWORK too small. +*> -23: LRWORK too small. +*> -25: LIWORK too small. +*> If ZLATMR, CLATMS, ZHEGV, ZHPGV, ZHBGV, CHEGVD, CHPGVD, +*> ZHPGVD, ZHEGVX, CHPGVX, ZHBGVX returns an error code, +*> the absolute value of it is returned. +*> Modified. +*> +*>----------------------------------------------------------------------- +*> +*> Some Local Variables and Parameters: +*> ---- ----- --------- --- ---------- +*> ZERO, ONE Real 0 and 1. +*> MAXTYP The number of types defined. +*> NTEST The number of tests that have been run +*> on this matrix. +*> NTESTT The total number of tests for this call. +*> NMAX Largest value in NN. +*> NMATS The number of matrices generated so far. +*> NERRS The number of tests which have exceeded THRESH +*> so far (computed by DLAFTS). +*> COND, IMODE Values to be passed to the matrix generators. +*> ANORM Norm of A; passed to matrix generators. +*> +*> OVFL, UNFL Overflow and underflow thresholds. +*> ULP, ULPINV Finest relative precision and its inverse. +*> RTOVFL, RTUNFL Square roots of the previous 2 values. +*> The following four arrays decode JTYPE: +*> KTYPE(j) The general type (1-10) for type "j". +*> KMODE(j) The MODE value to be passed to the matrix +*> generator for type "j". +*> KMAGN(j) The order of magnitude ( O(1), +*> O(overflow^(1/2) ), O(underflow^(1/2) ) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16_eig +* +* ===================================================================== + SUBROUTINE ZDRVSG2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, + $ NOUNIT, A, LDA, B, LDB, D, D2, Z, LDZ, AB, + $ BB, AP, BP, WORK, NWORK, RWORK, LRWORK, + $ IWORK, LIWORK, RESULT, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDZ, LIWORK, LRWORK, NOUNIT, + $ NSIZES, NTYPES, NWORK + DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER ISEED( 4 ), IWORK( * ), NN( * ) + DOUBLE PRECISION D( * ), D2( * ), RESULT( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), AB( LDA, * ), AP( * ), + $ B( LDB, * ), BB( LDB, * ), BP( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TEN + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TEN = 10.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) + INTEGER MAXTYP + PARAMETER ( MAXTYP = 21 ) +* .. +* .. Local Scalars .. + LOGICAL BADNN + CHARACTER UPLO + INTEGER I, IBTYPE, IBUPLO, IINFO, IJ, IL, IMODE, ITEMP, + $ ITYPE, IU, J, JCOL, JSIZE, JTYPE, KA, KA9, KB, + $ KB9, M, MTYPES, N, NERRS, NMATS, NMAX, NTEST, + $ NTESTT + DOUBLE PRECISION ABSTOL, ANINV, ANORM, COND, OVFL, RTOVFL, + $ RTUNFL, ULP, ULPINV, UNFL, VL, VU, TEMP1, TEMP2 +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ), IOLDSD( 4 ), ISEED2( 4 ), + $ KMAGN( MAXTYP ), KMODE( MAXTYP ), + $ KTYPE( MAXTYP ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLARND + EXTERNAL LSAME, DLAMCH, DLARND +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, DLAFTS, DLASUM, XERBLA, ZHBGV, ZHBGVD, + $ ZHBGVX, ZHEGV, ZHEGVD, ZHEGVX, ZHPGV, ZHPGVD, + $ ZHPGVX, ZLACPY, ZLASET, ZLATMR, ZLATMS, ZSGT01, + $ ZHEGV_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN, SQRT +* .. +* .. Data statements .. + DATA KTYPE / 1, 2, 5*4, 5*5, 3*8, 6*9 / + DATA KMAGN / 2*1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, + $ 2, 3, 6*1 / + DATA KMODE / 2*0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, + $ 0, 0, 6*4 / +* .. +* .. Executable Statements .. +* +* 1) Check for errors +* + NTESTT = 0 + INFO = 0 +* + BADNN = .FALSE. + NMAX = 0 + DO 10 J = 1, NSIZES + NMAX = MAX( NMAX, NN( J ) ) + IF( NN( J ).LT.0 ) + $ BADNN = .TRUE. + 10 CONTINUE +* +* Check for errors +* + IF( NSIZES.LT.0 ) THEN + INFO = -1 + ELSE IF( BADNN ) THEN + INFO = -2 + ELSE IF( NTYPES.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LE.1 .OR. LDA.LT.NMAX ) THEN + INFO = -9 + ELSE IF( LDZ.LE.1 .OR. LDZ.LT.NMAX ) THEN + INFO = -16 + ELSE IF( 2*MAX( NMAX, 2 )**2.GT.NWORK ) THEN + INFO = -21 + ELSE IF( 2*MAX( NMAX, 2 )**2.GT.LRWORK ) THEN + INFO = -23 + ELSE IF( 2*MAX( NMAX, 2 )**2.GT.LIWORK ) THEN + INFO = -25 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZDRVSG2STG', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( NSIZES.EQ.0 .OR. NTYPES.EQ.0 ) + $ RETURN +* +* More Important constants +* + UNFL = DLAMCH( 'Safe minimum' ) + OVFL = DLAMCH( 'Overflow' ) + CALL DLABAD( UNFL, OVFL ) + ULP = DLAMCH( 'Epsilon' )*DLAMCH( 'Base' ) + ULPINV = ONE / ULP + RTUNFL = SQRT( UNFL ) + RTOVFL = SQRT( OVFL ) +* + DO 20 I = 1, 4 + ISEED2( I ) = ISEED( I ) + 20 CONTINUE +* +* Loop over sizes, types +* + NERRS = 0 + NMATS = 0 +* + DO 650 JSIZE = 1, NSIZES + N = NN( JSIZE ) + ANINV = ONE / DBLE( MAX( 1, N ) ) +* + IF( NSIZES.NE.1 ) THEN + MTYPES = MIN( MAXTYP, NTYPES ) + ELSE + MTYPES = MIN( MAXTYP+1, NTYPES ) + END IF +* + KA9 = 0 + KB9 = 0 + DO 640 JTYPE = 1, MTYPES + IF( .NOT.DOTYPE( JTYPE ) ) + $ GO TO 640 + NMATS = NMATS + 1 + NTEST = 0 +* + DO 30 J = 1, 4 + IOLDSD( J ) = ISEED( J ) + 30 CONTINUE +* +* 2) Compute "A" +* +* Control parameters: +* +* KMAGN KMODE KTYPE +* =1 O(1) clustered 1 zero +* =2 large clustered 2 identity +* =3 small exponential (none) +* =4 arithmetic diagonal, w/ eigenvalues +* =5 random log hermitian, w/ eigenvalues +* =6 random (none) +* =7 random diagonal +* =8 random hermitian +* =9 banded, w/ eigenvalues +* + IF( MTYPES.GT.MAXTYP ) + $ GO TO 90 +* + ITYPE = KTYPE( JTYPE ) + IMODE = KMODE( JTYPE ) +* +* Compute norm +* + GO TO ( 40, 50, 60 )KMAGN( JTYPE ) +* + 40 CONTINUE + ANORM = ONE + GO TO 70 +* + 50 CONTINUE + ANORM = ( RTOVFL*ULP )*ANINV + GO TO 70 +* + 60 CONTINUE + ANORM = RTUNFL*N*ULPINV + GO TO 70 +* + 70 CONTINUE +* + IINFO = 0 + COND = ULPINV +* +* Special Matrices -- Identity & Jordan block +* + IF( ITYPE.EQ.1 ) THEN +* +* Zero +* + KA = 0 + KB = 0 + CALL ZLASET( 'Full', LDA, N, CZERO, CZERO, A, LDA ) +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Identity +* + KA = 0 + KB = 0 + CALL ZLASET( 'Full', LDA, N, CZERO, CZERO, A, LDA ) + DO 80 JCOL = 1, N + A( JCOL, JCOL ) = ANORM + 80 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Diagonal Matrix, [Eigen]values Specified +* + KA = 0 + KB = 0 + CALL ZLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, COND, + $ ANORM, 0, 0, 'N', A, LDA, WORK, IINFO ) +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Hermitian, eigenvalues specified +* + KA = MAX( 0, N-1 ) + KB = KA + CALL ZLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, COND, + $ ANORM, N, N, 'N', A, LDA, WORK, IINFO ) +* + ELSE IF( ITYPE.EQ.7 ) THEN +* +* Diagonal, random eigenvalues +* + KA = 0 + KB = 0 + CALL ZLATMR( N, N, 'S', ISEED, 'H', WORK, 6, ONE, CONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, 0, 0, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.8 ) THEN +* +* Hermitian, random eigenvalues +* + KA = MAX( 0, N-1 ) + KB = KA + CALL ZLATMR( N, N, 'S', ISEED, 'H', WORK, 6, ONE, CONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, N, N, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.9 ) THEN +* +* Hermitian banded, eigenvalues specified +* +* The following values are used for the half-bandwidths: +* +* ka = 1 kb = 1 +* ka = 2 kb = 1 +* ka = 2 kb = 2 +* ka = 3 kb = 1 +* ka = 3 kb = 2 +* ka = 3 kb = 3 +* + KB9 = KB9 + 1 + IF( KB9.GT.KA9 ) THEN + KA9 = KA9 + 1 + KB9 = 1 + END IF + KA = MAX( 0, MIN( N-1, KA9 ) ) + KB = MAX( 0, MIN( N-1, KB9 ) ) + CALL ZLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, COND, + $ ANORM, KA, KA, 'N', A, LDA, WORK, IINFO ) +* + ELSE +* + IINFO = 1 + END IF +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'Generator', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + RETURN + END IF +* + 90 CONTINUE +* + ABSTOL = UNFL + UNFL + IF( N.LE.1 ) THEN + IL = 1 + IU = N + ELSE + IL = 1 + INT( ( N-1 )*DLARND( 1, ISEED2 ) ) + IU = 1 + INT( ( N-1 )*DLARND( 1, ISEED2 ) ) + IF( IL.GT.IU ) THEN + ITEMP = IL + IL = IU + IU = ITEMP + END IF + END IF +* +* 3) Call ZHEGV, ZHPGV, ZHBGV, CHEGVD, CHPGVD, CHBGVD, +* ZHEGVX, ZHPGVX and ZHBGVX, do tests. +* +* loop over the three generalized problems +* IBTYPE = 1: A*x = (lambda)*B*x +* IBTYPE = 2: A*B*x = (lambda)*x +* IBTYPE = 3: B*A*x = (lambda)*x +* + DO 630 IBTYPE = 1, 3 +* +* loop over the setting UPLO +* + DO 620 IBUPLO = 1, 2 + IF( IBUPLO.EQ.1 ) + $ UPLO = 'U' + IF( IBUPLO.EQ.2 ) + $ UPLO = 'L' +* +* Generate random well-conditioned positive definite +* matrix B, of bandwidth not greater than that of A. +* + CALL ZLATMS( N, N, 'U', ISEED, 'P', RWORK, 5, TEN, + $ ONE, KB, KB, UPLO, B, LDB, WORK( N+1 ), + $ IINFO ) +* +* Test ZHEGV +* + NTEST = NTEST + 1 +* + CALL ZLACPY( ' ', N, N, A, LDA, Z, LDZ ) + CALL ZLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL ZHEGV( IBTYPE, 'V', UPLO, N, Z, LDZ, BB, LDB, D, + $ WORK, NWORK, RWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHEGV(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL ZSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* +* Test ZHEGV_2STAGE +* + NTEST = NTEST + 1 +* + CALL ZLACPY( ' ', N, N, A, LDA, Z, LDZ ) + CALL ZLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL ZHEGV_2STAGE( IBTYPE, 'N', UPLO, N, Z, LDZ, + $ BB, LDB, D2, WORK, NWORK, RWORK, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'ZHEGV_2STAGE(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* +C CALL ZSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, +C $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* +* Do Tests | D1 - D2 | / ( |D1| ulp ) +* D1 computed using the standard 1-stage reduction as reference +* D2 computed using the 2-stage reduction +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 151 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D( J ) ), + $ ABS( D2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D( J )-D2( J ) ) ) + 151 CONTINUE +* + RESULT( NTEST ) = TEMP2 / + $ MAX( UNFL, ULP*MAX( TEMP1, TEMP2 ) ) +* +* Test ZHEGVD +* + NTEST = NTEST + 1 +* + CALL ZLACPY( ' ', N, N, A, LDA, Z, LDZ ) + CALL ZLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL ZHEGVD( IBTYPE, 'V', UPLO, N, Z, LDZ, BB, LDB, D, + $ WORK, NWORK, RWORK, LRWORK, IWORK, + $ LIWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHEGVD(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL ZSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* +* Test ZHEGVX +* + NTEST = NTEST + 1 +* + CALL ZLACPY( ' ', N, N, A, LDA, AB, LDA ) + CALL ZLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL ZHEGVX( IBTYPE, 'V', 'A', UPLO, N, AB, LDA, BB, + $ LDB, VL, VU, IL, IU, ABSTOL, M, D, Z, + $ LDZ, WORK, NWORK, RWORK, IWORK( N+1 ), + $ IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHEGVX(V,A' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL ZSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* + CALL ZLACPY( ' ', N, N, A, LDA, AB, LDA ) + CALL ZLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* +* since we do not know the exact eigenvalues of this +* eigenpair, we just set VL and VU as constants. +* It is quite possible that there are no eigenvalues +* in this interval. +* + VL = ZERO + VU = ANORM + CALL ZHEGVX( IBTYPE, 'V', 'V', UPLO, N, AB, LDA, BB, + $ LDB, VL, VU, IL, IU, ABSTOL, M, D, Z, + $ LDZ, WORK, NWORK, RWORK, IWORK( N+1 ), + $ IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHEGVX(V,V,' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL ZSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* + CALL ZLACPY( ' ', N, N, A, LDA, AB, LDA ) + CALL ZLACPY( UPLO, N, N, B, LDB, BB, LDB ) +* + CALL ZHEGVX( IBTYPE, 'V', 'I', UPLO, N, AB, LDA, BB, + $ LDB, VL, VU, IL, IU, ABSTOL, M, D, Z, + $ LDZ, WORK, NWORK, RWORK, IWORK( N+1 ), + $ IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHEGVX(V,I,' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 100 + END IF + END IF +* +* Do Test +* + CALL ZSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* + 100 CONTINUE +* +* Test ZHPGV +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 120 J = 1, N + DO 110 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 110 CONTINUE + 120 CONTINUE + ELSE + IJ = 1 + DO 140 J = 1, N + DO 130 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 130 CONTINUE + 140 CONTINUE + END IF +* + CALL ZHPGV( IBTYPE, 'V', UPLO, N, AP, BP, D, Z, LDZ, + $ WORK, RWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHPGV(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL ZSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* +* Test ZHPGVD +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 160 J = 1, N + DO 150 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 150 CONTINUE + 160 CONTINUE + ELSE + IJ = 1 + DO 180 J = 1, N + DO 170 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 170 CONTINUE + 180 CONTINUE + END IF +* + CALL ZHPGVD( IBTYPE, 'V', UPLO, N, AP, BP, D, Z, LDZ, + $ WORK, NWORK, RWORK, LRWORK, IWORK, + $ LIWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHPGVD(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL ZSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* +* Test ZHPGVX +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 200 J = 1, N + DO 190 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 190 CONTINUE + 200 CONTINUE + ELSE + IJ = 1 + DO 220 J = 1, N + DO 210 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 210 CONTINUE + 220 CONTINUE + END IF +* + CALL ZHPGVX( IBTYPE, 'V', 'A', UPLO, N, AP, BP, VL, + $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, + $ RWORK, IWORK( N+1 ), IWORK, INFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHPGVX(V,A' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL ZSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 240 J = 1, N + DO 230 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 230 CONTINUE + 240 CONTINUE + ELSE + IJ = 1 + DO 260 J = 1, N + DO 250 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 250 CONTINUE + 260 CONTINUE + END IF +* + VL = ZERO + VU = ANORM + CALL ZHPGVX( IBTYPE, 'V', 'V', UPLO, N, AP, BP, VL, + $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, + $ RWORK, IWORK( N+1 ), IWORK, INFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHPGVX(V,V' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL ZSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* +* Copy the matrices into packed storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + IJ = 1 + DO 280 J = 1, N + DO 270 I = 1, J + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 270 CONTINUE + 280 CONTINUE + ELSE + IJ = 1 + DO 300 J = 1, N + DO 290 I = J, N + AP( IJ ) = A( I, J ) + BP( IJ ) = B( I, J ) + IJ = IJ + 1 + 290 CONTINUE + 300 CONTINUE + END IF +* + CALL ZHPGVX( IBTYPE, 'V', 'I', UPLO, N, AP, BP, VL, + $ VU, IL, IU, ABSTOL, M, D, Z, LDZ, WORK, + $ RWORK, IWORK( N+1 ), IWORK, INFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHPGVX(V,I' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 310 + END IF + END IF +* +* Do Test +* + CALL ZSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* + 310 CONTINUE +* + IF( IBTYPE.EQ.1 ) THEN +* +* TEST ZHBGV +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 340 J = 1, N + DO 320 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 320 CONTINUE + DO 330 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 330 CONTINUE + 340 CONTINUE + ELSE + DO 370 J = 1, N + DO 350 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 350 CONTINUE + DO 360 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 360 CONTINUE + 370 CONTINUE + END IF +* + CALL ZHBGV( 'V', UPLO, N, KA, KB, AB, LDA, BB, LDB, + $ D, Z, LDZ, WORK, RWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHBGV(V,' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL ZSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* +* TEST ZHBGVD +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 400 J = 1, N + DO 380 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 380 CONTINUE + DO 390 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 390 CONTINUE + 400 CONTINUE + ELSE + DO 430 J = 1, N + DO 410 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 410 CONTINUE + DO 420 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 420 CONTINUE + 430 CONTINUE + END IF +* + CALL ZHBGVD( 'V', UPLO, N, KA, KB, AB, LDA, BB, + $ LDB, D, Z, LDZ, WORK, NWORK, RWORK, + $ LRWORK, IWORK, LIWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHBGVD(V,' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL ZSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* +* Test ZHBGVX +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 460 J = 1, N + DO 440 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 440 CONTINUE + DO 450 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 450 CONTINUE + 460 CONTINUE + ELSE + DO 490 J = 1, N + DO 470 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 470 CONTINUE + DO 480 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 480 CONTINUE + 490 CONTINUE + END IF +* + CALL ZHBGVX( 'V', 'A', UPLO, N, KA, KB, AB, LDA, + $ BB, LDB, BP, MAX( 1, N ), VL, VU, IL, + $ IU, ABSTOL, M, D, Z, LDZ, WORK, RWORK, + $ IWORK( N+1 ), IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHBGVX(V,A' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL ZSGT01( IBTYPE, UPLO, N, N, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 520 J = 1, N + DO 500 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 500 CONTINUE + DO 510 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 510 CONTINUE + 520 CONTINUE + ELSE + DO 550 J = 1, N + DO 530 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 530 CONTINUE + DO 540 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 540 CONTINUE + 550 CONTINUE + END IF +* + VL = ZERO + VU = ANORM + CALL ZHBGVX( 'V', 'V', UPLO, N, KA, KB, AB, LDA, + $ BB, LDB, BP, MAX( 1, N ), VL, VU, IL, + $ IU, ABSTOL, M, D, Z, LDZ, WORK, RWORK, + $ IWORK( N+1 ), IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHBGVX(V,V' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL ZSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 1 +* +* Copy the matrices into band storage. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 580 J = 1, N + DO 560 I = MAX( 1, J-KA ), J + AB( KA+1+I-J, J ) = A( I, J ) + 560 CONTINUE + DO 570 I = MAX( 1, J-KB ), J + BB( KB+1+I-J, J ) = B( I, J ) + 570 CONTINUE + 580 CONTINUE + ELSE + DO 610 J = 1, N + DO 590 I = J, MIN( N, J+KA ) + AB( 1+I-J, J ) = A( I, J ) + 590 CONTINUE + DO 600 I = J, MIN( N, J+KB ) + BB( 1+I-J, J ) = B( I, J ) + 600 CONTINUE + 610 CONTINUE + END IF +* + CALL ZHBGVX( 'V', 'I', UPLO, N, KA, KB, AB, LDA, + $ BB, LDB, BP, MAX( 1, N ), VL, VU, IL, + $ IU, ABSTOL, M, D, Z, LDZ, WORK, RWORK, + $ IWORK( N+1 ), IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHBGVX(V,I' // + $ UPLO // ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 620 + END IF + END IF +* +* Do Test +* + CALL ZSGT01( IBTYPE, UPLO, N, M, A, LDA, B, LDB, Z, + $ LDZ, D, WORK, RWORK, RESULT( NTEST ) ) +* + END IF +* + 620 CONTINUE + 630 CONTINUE +* +* End of Loop -- Check for RESULT(j) > THRESH +* + NTESTT = NTESTT + NTEST + CALL DLAFTS( 'ZSG', N, N, JTYPE, NTEST, RESULT, IOLDSD, + $ THRESH, NOUNIT, NERRS ) + 640 CONTINUE + 650 CONTINUE +* +* Summary +* + CALL DLASUM( 'ZSG', NOUNIT, NERRS, NTESTT ) +* + RETURN +* + 9999 FORMAT( ' ZDRVSG2STG: ', A, ' returned INFO=', I6, '.', / 9X, + $ 'N=', I6, ', JTYPE=', I6, ', ISEED=(', 3( I5, ',' ), I5, ')' ) +* +* End of ZDRVSG2STG +* + END diff --git a/TESTING/EIG/zdrvst2stg.f b/TESTING/EIG/zdrvst2stg.f new file mode 100644 index 00000000..0b33f52d --- /dev/null +++ b/TESTING/EIG/zdrvst2stg.f @@ -0,0 +1,2118 @@ +*> \brief \b ZDRVST2STG +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZDRVST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, +* NOUNIT, A, LDA, D1, D2, D3, WA1, WA2, WA3, U, +* LDU, V, TAU, Z, WORK, LWORK, RWORK, LRWORK, +* IWORK, LIWORK, RESULT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDU, LIWORK, LRWORK, LWORK, NOUNIT, +* $ NSIZES, NTYPES +* DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. +* LOGICAL DOTYPE( * ) +* INTEGER ISEED( 4 ), IWORK( * ), NN( * ) +* DOUBLE PRECISION D1( * ), D2( * ), D3( * ), RESULT( * ), +* $ RWORK( * ), WA1( * ), WA2( * ), WA3( * ) +* COMPLEX*16 A( LDA, * ), TAU( * ), U( LDU, * ), +* $ V( LDU, * ), WORK( * ), Z( LDU, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZDRVST2STG checks the Hermitian eigenvalue problem drivers. +*> +*> ZHEEVD computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian matrix, +*> using a divide-and-conquer algorithm. +*> +*> ZHEEVX computes selected eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian matrix. +*> +*> ZHEEVR computes selected eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian matrix +*> using the Relatively Robust Representation where it can. +*> +*> ZHPEVD computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian matrix in packed +*> storage, using a divide-and-conquer algorithm. +*> +*> ZHPEVX computes selected eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian matrix in packed +*> storage. +*> +*> ZHBEVD computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian band matrix, +*> using a divide-and-conquer algorithm. +*> +*> ZHBEVX computes selected eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian band matrix. +*> +*> ZHEEV computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian matrix. +*> +*> ZHPEV computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian matrix in packed +*> storage. +*> +*> ZHBEV computes all eigenvalues and, optionally, +*> eigenvectors of a complex Hermitian band matrix. +*> +*> When ZDRVST2STG is called, a number of matrix "sizes" ("n's") and a +*> number of matrix "types" are specified. For each size ("n") +*> and each type of matrix, one matrix will be generated and used +*> to test the appropriate drivers. For each matrix and each +*> driver routine called, the following tests will be performed: +*> +*> (1) | A - Z D Z' | / ( |A| n ulp ) +*> +*> (2) | I - Z Z' | / ( n ulp ) +*> +*> (3) | D1 - D2 | / ( |D1| ulp ) +*> +*> where Z is the matrix of eigenvectors returned when the +*> eigenvector option is given and D1 and D2 are the eigenvalues +*> returned with and without the eigenvector option. +*> +*> The "sizes" are specified by an array NN(1:NSIZES); the value of +*> each element NN(j) specifies one size. +*> The "types" are specified by a logical array DOTYPE( 1:NTYPES ); +*> if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. +*> Currently, the list of possible types is: +*> +*> (1) The zero matrix. +*> (2) The identity matrix. +*> +*> (3) A diagonal matrix with evenly spaced entries +*> 1, ..., ULP and random signs. +*> (ULP = (first number larger than 1) - 1 ) +*> (4) A diagonal matrix with geometrically spaced entries +*> 1, ..., ULP and random signs. +*> (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP +*> and random signs. +*> +*> (6) Same as (4), but multiplied by SQRT( overflow threshold ) +*> (7) Same as (4), but multiplied by SQRT( underflow threshold ) +*> +*> (8) A matrix of the form U* D U, where U is unitary and +*> D has evenly spaced entries 1, ..., ULP with random signs +*> on the diagonal. +*> +*> (9) A matrix of the form U* D U, where U is unitary and +*> D has geometrically spaced entries 1, ..., ULP with random +*> signs on the diagonal. +*> +*> (10) A matrix of the form U* D U, where U is unitary and +*> D has "clustered" entries 1, ULP,..., ULP with random +*> signs on the diagonal. +*> +*> (11) Same as (8), but multiplied by SQRT( overflow threshold ) +*> (12) Same as (8), but multiplied by SQRT( underflow threshold ) +*> +*> (13) Symmetric matrix with random entries chosen from (-1,1). +*> (14) Same as (13), but multiplied by SQRT( overflow threshold ) +*> (15) Same as (13), but multiplied by SQRT( underflow threshold ) +*> (16) A band matrix with half bandwidth randomly chosen between +*> 0 and N-1, with evenly spaced eigenvalues 1, ..., ULP +*> with random signs. +*> (17) Same as (16), but multiplied by SQRT( overflow threshold ) +*> (18) Same as (16), but multiplied by SQRT( underflow threshold ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> NSIZES INTEGER +*> The number of sizes of matrices to use. If it is zero, +*> ZDRVST2STG does nothing. It must be at least zero. +*> Not modified. +*> +*> NN INTEGER array, dimension (NSIZES) +*> An array containing the sizes to be used for the matrices. +*> Zero values will be skipped. The values must be at least +*> zero. +*> Not modified. +*> +*> NTYPES INTEGER +*> The number of elements in DOTYPE. If it is zero, ZDRVST2STG +*> does nothing. It must be at least zero. If it is MAXTYP+1 +*> and NSIZES is 1, then an additional type, MAXTYP+1 is +*> defined, which is to use whatever matrix is in A. This +*> is only useful if DOTYPE(1:MAXTYP) is .FALSE. and +*> DOTYPE(MAXTYP+1) is .TRUE. . +*> Not modified. +*> +*> DOTYPE LOGICAL array, dimension (NTYPES) +*> If DOTYPE(j) is .TRUE., then for each size in NN a +*> matrix of that size and of type j will be generated. +*> If NTYPES is smaller than the maximum number of types +*> defined (PARAMETER MAXTYP), then types NTYPES+1 through +*> MAXTYP will not be generated. If NTYPES is larger +*> than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) +*> will be ignored. +*> Not modified. +*> +*> ISEED INTEGER array, dimension (4) +*> On entry ISEED specifies the seed of the random number +*> generator. The array elements should be between 0 and 4095; +*> if not they will be reduced mod 4096. Also, ISEED(4) must +*> be odd. The random number generator uses a linear +*> congruential sequence limited to small integers, and so +*> should produce machine independent random numbers. The +*> values of ISEED are changed on exit, and can be used in the +*> next call to ZDRVST2STG to continue the same random number +*> sequence. +*> Modified. +*> +*> THRESH DOUBLE PRECISION +*> A test will count as "failed" if the "error", computed as +*> described above, exceeds THRESH. Note that the error +*> is scaled to be O(1), so THRESH should be a reasonably +*> small multiple of 1, e.g., 10 or 100. In particular, +*> it should not depend on the precision (single vs. double) +*> or the size of the matrix. It must be at least zero. +*> Not modified. +*> +*> NOUNIT INTEGER +*> The FORTRAN unit number for printing out error messages +*> (e.g., if a routine returns IINFO not equal to 0.) +*> Not modified. +*> +*> A COMPLEX*16 array, dimension (LDA , max(NN)) +*> Used to hold the matrix whose eigenvalues are to be +*> computed. On exit, A contains the last matrix actually +*> used. +*> Modified. +*> +*> LDA INTEGER +*> The leading dimension of A. It must be at +*> least 1 and at least max( NN ). +*> Not modified. +*> +*> D1 DOUBLE PRECISION array, dimension (max(NN)) +*> The eigenvalues of A, as computed by ZSTEQR simlutaneously +*> with Z. On exit, the eigenvalues in D1 correspond with the +*> matrix in A. +*> Modified. +*> +*> D2 DOUBLE PRECISION array, dimension (max(NN)) +*> The eigenvalues of A, as computed by ZSTEQR if Z is not +*> computed. On exit, the eigenvalues in D2 correspond with +*> the matrix in A. +*> Modified. +*> +*> D3 DOUBLE PRECISION array, dimension (max(NN)) +*> The eigenvalues of A, as computed by DSTERF. On exit, the +*> eigenvalues in D3 correspond with the matrix in A. +*> Modified. +*> +*> WA1 DOUBLE PRECISION array, dimension +*> +*> WA2 DOUBLE PRECISION array, dimension +*> +*> WA3 DOUBLE PRECISION array, dimension +*> +*> U COMPLEX*16 array, dimension (LDU, max(NN)) +*> The unitary matrix computed by ZHETRD + ZUNGC3. +*> Modified. +*> +*> LDU INTEGER +*> The leading dimension of U, Z, and V. It must be at +*> least 1 and at least max( NN ). +*> Not modified. +*> +*> V COMPLEX*16 array, dimension (LDU, max(NN)) +*> The Housholder vectors computed by ZHETRD in reducing A to +*> tridiagonal form. +*> Modified. +*> +*> TAU COMPLEX*16 array, dimension (max(NN)) +*> The Householder factors computed by ZHETRD in reducing A +*> to tridiagonal form. +*> Modified. +*> +*> Z COMPLEX*16 array, dimension (LDU, max(NN)) +*> The unitary matrix of eigenvectors computed by ZHEEVD, +*> ZHEEVX, ZHPEVD, CHPEVX, ZHBEVD, and CHBEVX. +*> Modified. +*> +*> WORK - COMPLEX*16 array of dimension ( LWORK ) +*> Workspace. +*> Modified. +*> +*> LWORK - INTEGER +*> The number of entries in WORK. This must be at least +*> 2*max( NN(j), 2 )**2. +*> Not modified. +*> +*> RWORK DOUBLE PRECISION array, dimension (3*max(NN)) +*> Workspace. +*> Modified. +*> +*> LRWORK - INTEGER +*> The number of entries in RWORK. +*> +*> IWORK INTEGER array, dimension (6*max(NN)) +*> Workspace. +*> Modified. +*> +*> LIWORK - INTEGER +*> The number of entries in IWORK. +*> +*> RESULT DOUBLE PRECISION array, dimension (??) +*> The values computed by the tests described above. +*> The values are currently limited to 1/ulp, to avoid +*> overflow. +*> Modified. +*> +*> INFO INTEGER +*> If 0, then everything ran OK. +*> -1: NSIZES < 0 +*> -2: Some NN(j) < 0 +*> -3: NTYPES < 0 +*> -5: THRESH < 0 +*> -9: LDA < 1 or LDA < NMAX, where NMAX is max( NN(j) ). +*> -16: LDU < 1 or LDU < NMAX. +*> -21: LWORK too small. +*> If DLATMR, SLATMS, ZHETRD, DORGC3, ZSTEQR, DSTERF, +*> or DORMC2 returns an error code, the +*> absolute value of it is returned. +*> Modified. +*> +*>----------------------------------------------------------------------- +*> +*> Some Local Variables and Parameters: +*> ---- ----- --------- --- ---------- +*> ZERO, ONE Real 0 and 1. +*> MAXTYP The number of types defined. +*> NTEST The number of tests performed, or which can +*> be performed so far, for the current matrix. +*> NTESTT The total number of tests performed so far. +*> NMAX Largest value in NN. +*> NMATS The number of matrices generated so far. +*> NERRS The number of tests which have exceeded THRESH +*> so far (computed by DLAFTS). +*> COND, IMODE Values to be passed to the matrix generators. +*> ANORM Norm of A; passed to matrix generators. +*> +*> OVFL, UNFL Overflow and underflow thresholds. +*> ULP, ULPINV Finest relative precision and its inverse. +*> RTOVFL, RTUNFL Square roots of the previous 2 values. +*> The following four arrays decode JTYPE: +*> KTYPE(j) The general type (1-10) for type "j". +*> KMODE(j) The MODE value to be passed to the matrix +*> generator for type "j". +*> KMAGN(j) The order of magnitude ( O(1), +*> O(overflow^(1/2) ), O(underflow^(1/2) ) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16_eig +* +* ===================================================================== + SUBROUTINE ZDRVST2STG( NSIZES, NN, NTYPES, DOTYPE, ISEED, THRESH, + $ NOUNIT, A, LDA, D1, D2, D3, WA1, WA2, WA3, U, + $ LDU, V, TAU, Z, WORK, LWORK, RWORK, LRWORK, + $ IWORK, LIWORK, RESULT, INFO ) +* +* -- LAPACK test routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDU, LIWORK, LRWORK, LWORK, NOUNIT, + $ NSIZES, NTYPES + DOUBLE PRECISION THRESH +* .. +* .. Array Arguments .. + LOGICAL DOTYPE( * ) + INTEGER ISEED( 4 ), IWORK( * ), NN( * ) + DOUBLE PRECISION D1( * ), D2( * ), D3( * ), RESULT( * ), + $ RWORK( * ), WA1( * ), WA2( * ), WA3( * ) + COMPLEX*16 A( LDA, * ), TAU( * ), U( LDU, * ), + $ V( LDU, * ), WORK( * ), Z( LDU, * ) +* .. +* +* ===================================================================== +* +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, TEN + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0, + $ TEN = 10.0D+0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = ONE / TWO ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) + INTEGER MAXTYP + PARAMETER ( MAXTYP = 18 ) +* .. +* .. Local Scalars .. + LOGICAL BADNN + CHARACTER UPLO + INTEGER I, IDIAG, IHBW, IINFO, IL, IMODE, INDWRK, INDX, + $ IROW, ITEMP, ITYPE, IU, IUPLO, J, J1, J2, JCOL, + $ JSIZE, JTYPE, KD, LGN, LIWEDC, LRWEDC, LWEDC, + $ M, M2, M3, MTYPES, N, NERRS, NMATS, NMAX, + $ NTEST, NTESTT + DOUBLE PRECISION ABSTOL, ANINV, ANORM, COND, OVFL, RTOVFL, + $ RTUNFL, TEMP1, TEMP2, TEMP3, ULP, ULPINV, UNFL, + $ VL, VU +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ), IOLDSD( 4 ), ISEED2( 4 ), + $ ISEED3( 4 ), KMAGN( MAXTYP ), KMODE( MAXTYP ), + $ KTYPE( MAXTYP ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLARND, DSXT1 + EXTERNAL DLAMCH, DLARND, DSXT1 +* .. +* .. External Subroutines .. + EXTERNAL ALASVM, DLABAD, DLAFTS, XERBLA, ZHBEV, ZHBEVD, + $ ZHBEVX, ZHEEV, ZHEEVD, ZHEEVR, ZHEEVX, ZHET21, + $ ZHET22, ZHPEV, ZHPEVD, ZHPEVX, ZLACPY, ZLASET, + $ ZHEEVD_2STAGE, ZHEEVR_2STAGE, ZHEEVX_2STAGE, + $ ZHEEV_2STAGE, ZHBEV_2STAGE, ZHBEVD_2STAGE, + $ ZHBEVX_2STAGE, ZHETRD_2STAGE, ZHETRD_SY2SB, + $ ZHETRD_SB2ST, ZLATMR, ZLATMS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG, MAX, MIN, SQRT +* .. +* .. Data statements .. + DATA KTYPE / 1, 2, 5*4, 5*5, 3*8, 3*9 / + DATA KMAGN / 2*1, 1, 1, 1, 2, 3, 1, 1, 1, 2, 3, 1, + $ 2, 3, 1, 2, 3 / + DATA KMODE / 2*0, 4, 3, 1, 4, 4, 4, 3, 1, 4, 4, 0, + $ 0, 0, 4, 4, 4 / +* .. +* .. Executable Statements .. +* +* 1) Check for errors +* + NTESTT = 0 + INFO = 0 +* + BADNN = .FALSE. + NMAX = 1 + DO 10 J = 1, NSIZES + NMAX = MAX( NMAX, NN( J ) ) + IF( NN( J ).LT.0 ) + $ BADNN = .TRUE. + 10 CONTINUE +* +* Check for errors +* + IF( NSIZES.LT.0 ) THEN + INFO = -1 + ELSE IF( BADNN ) THEN + INFO = -2 + ELSE IF( NTYPES.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.NMAX ) THEN + INFO = -9 + ELSE IF( LDU.LT.NMAX ) THEN + INFO = -16 + ELSE IF( 2*MAX( 2, NMAX )**2.GT.LWORK ) THEN + INFO = -22 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZDRVST2STG', -INFO ) + RETURN + END IF +* +* Quick return if nothing to do +* + IF( NSIZES.EQ.0 .OR. NTYPES.EQ.0 ) + $ RETURN +* +* More Important constants +* + UNFL = DLAMCH( 'Safe minimum' ) + OVFL = DLAMCH( 'Overflow' ) + CALL DLABAD( UNFL, OVFL ) + ULP = DLAMCH( 'Epsilon' )*DLAMCH( 'Base' ) + ULPINV = ONE / ULP + RTUNFL = SQRT( UNFL ) + RTOVFL = SQRT( OVFL ) +* +* Loop over sizes, types +* + DO 20 I = 1, 4 + ISEED2( I ) = ISEED( I ) + ISEED3( I ) = ISEED( I ) + 20 CONTINUE +* + NERRS = 0 + NMATS = 0 +* + DO 1220 JSIZE = 1, NSIZES + N = NN( JSIZE ) + IF( N.GT.0 ) THEN + LGN = INT( LOG( DBLE( N ) ) / LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + LWEDC = MAX( 2*N+N*N, 2*N*N ) + LRWEDC = 1 + 4*N + 2*N*LGN + 3*N**2 + LIWEDC = 3 + 5*N + ELSE + LWEDC = 2 + LRWEDC = 8 + LIWEDC = 8 + END IF + ANINV = ONE / DBLE( MAX( 1, N ) ) +* + IF( NSIZES.NE.1 ) THEN + MTYPES = MIN( MAXTYP, NTYPES ) + ELSE + MTYPES = MIN( MAXTYP+1, NTYPES ) + END IF +* + DO 1210 JTYPE = 1, MTYPES + IF( .NOT.DOTYPE( JTYPE ) ) + $ GO TO 1210 + NMATS = NMATS + 1 + NTEST = 0 +* + DO 30 J = 1, 4 + IOLDSD( J ) = ISEED( J ) + 30 CONTINUE +* +* 2) Compute "A" +* +* Control parameters: +* +* KMAGN KMODE KTYPE +* =1 O(1) clustered 1 zero +* =2 large clustered 2 identity +* =3 small exponential (none) +* =4 arithmetic diagonal, (w/ eigenvalues) +* =5 random log Hermitian, w/ eigenvalues +* =6 random (none) +* =7 random diagonal +* =8 random Hermitian +* =9 band Hermitian, w/ eigenvalues +* + IF( MTYPES.GT.MAXTYP ) + $ GO TO 110 +* + ITYPE = KTYPE( JTYPE ) + IMODE = KMODE( JTYPE ) +* +* Compute norm +* + GO TO ( 40, 50, 60 )KMAGN( JTYPE ) +* + 40 CONTINUE + ANORM = ONE + GO TO 70 +* + 50 CONTINUE + ANORM = ( RTOVFL*ULP )*ANINV + GO TO 70 +* + 60 CONTINUE + ANORM = RTUNFL*N*ULPINV + GO TO 70 +* + 70 CONTINUE +* + CALL ZLASET( 'Full', LDA, N, CZERO, CZERO, A, LDA ) + IINFO = 0 + COND = ULPINV +* +* Special Matrices -- Identity & Jordan block +* +* Zero +* + IF( ITYPE.EQ.1 ) THEN + IINFO = 0 +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Identity +* + DO 80 JCOL = 1, N + A( JCOL, JCOL ) = ANORM + 80 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Diagonal Matrix, [Eigen]values Specified +* + CALL ZLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, COND, + $ ANORM, 0, 0, 'N', A, LDA, WORK, IINFO ) +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Hermitian, eigenvalues specified +* + CALL ZLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, COND, + $ ANORM, N, N, 'N', A, LDA, WORK, IINFO ) +* + ELSE IF( ITYPE.EQ.7 ) THEN +* +* Diagonal, random eigenvalues +* + CALL ZLATMR( N, N, 'S', ISEED, 'H', WORK, 6, ONE, CONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, 0, 0, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.8 ) THEN +* +* Hermitian, random eigenvalues +* + CALL ZLATMR( N, N, 'S', ISEED, 'H', WORK, 6, ONE, CONE, + $ 'T', 'N', WORK( N+1 ), 1, ONE, + $ WORK( 2*N+1 ), 1, ONE, 'N', IDUMMA, N, N, + $ ZERO, ANORM, 'NO', A, LDA, IWORK, IINFO ) +* + ELSE IF( ITYPE.EQ.9 ) THEN +* +* Hermitian banded, eigenvalues specified +* + IHBW = INT( ( N-1 )*DLARND( 1, ISEED3 ) ) + CALL ZLATMS( N, N, 'S', ISEED, 'H', RWORK, IMODE, COND, + $ ANORM, IHBW, IHBW, 'Z', U, LDU, WORK, + $ IINFO ) +* +* Store as dense matrix for most routines. +* + CALL ZLASET( 'Full', LDA, N, CZERO, CZERO, A, LDA ) + DO 100 IDIAG = -IHBW, IHBW + IROW = IHBW - IDIAG + 1 + J1 = MAX( 1, IDIAG+1 ) + J2 = MIN( N, N+IDIAG ) + DO 90 J = J1, J2 + I = J - IDIAG + A( I, J ) = U( IROW, J ) + 90 CONTINUE + 100 CONTINUE + ELSE + IINFO = 1 + END IF +* + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'Generator', IINFO, N, JTYPE, + $ IOLDSD + INFO = ABS( IINFO ) + RETURN + END IF +* + 110 CONTINUE +* + ABSTOL = UNFL + UNFL + IF( N.LE.1 ) THEN + IL = 1 + IU = N + ELSE + IL = 1 + INT( ( N-1 )*DLARND( 1, ISEED2 ) ) + IU = 1 + INT( ( N-1 )*DLARND( 1, ISEED2 ) ) + IF( IL.GT.IU ) THEN + ITEMP = IL + IL = IU + IU = ITEMP + END IF + END IF +* +* Perform tests storing upper or lower triangular +* part of matrix. +* + DO 1200 IUPLO = 0, 1 + IF( IUPLO.EQ.0 ) THEN + UPLO = 'L' + ELSE + UPLO = 'U' + END IF +* +* Call ZHEEVD and CHEEVX. +* + CALL ZLACPY( ' ', N, N, A, LDA, V, LDU ) +* + NTEST = NTEST + 1 + CALL ZHEEVD( 'V', UPLO, N, A, LDU, D1, WORK, LWEDC, + $ RWORK, LRWEDC, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHEEVD(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 130 + END IF + END IF +* +* Do tests 1 and 2. +* + CALL ZHET21( 1, UPLO, N, 0, V, LDU, D1, D2, A, LDU, Z, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + CALL ZLACPY( ' ', N, N, V, LDU, A, LDA ) +* + NTEST = NTEST + 2 + CALL ZHEEVD_2STAGE( 'N', UPLO, N, A, LDU, D3, WORK, + $ LWORK, RWORK, LRWEDC, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'ZHEEVD_2STAGE(N,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 130 + END IF + END IF +* +* Do test 3. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 120 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 120 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 130 CONTINUE + CALL ZLACPY( ' ', N, N, V, LDU, A, LDA ) +* + NTEST = NTEST + 1 +* + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( D1( 1 ) ), ABS( D1( N ) ) ) + IF( IL.NE.1 ) THEN + VL = D1( IL ) - MAX( HALF*( D1( IL )-D1( IL-1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + ELSE IF( N.GT.0 ) THEN + VL = D1( 1 ) - MAX( HALF*( D1( N )-D1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = D1( IU ) + MAX( HALF*( D1( IU+1 )-D1( IU ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + ELSE IF( N.GT.0 ) THEN + VU = D1( N ) + MAX( HALF*( D1( N )-D1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + ELSE + TEMP3 = ZERO + VL = ZERO + VU = ONE + END IF +* + CALL ZHEEVX( 'V', 'A', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M, WA1, Z, LDU, WORK, LWORK, RWORK, + $ IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHEEVX(V,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 150 + END IF + END IF +* +* Do tests 4 and 5. +* + CALL ZLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL ZHET21( 1, UPLO, N, 0, A, LDU, WA1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + CALL ZHEEVX_2STAGE( 'N', 'A', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M2, WA2, Z, LDU, + $ WORK, LWORK, RWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'ZHEEVX_2STAGE(N,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 150 + END IF + END IF +* +* Do test 6. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 140 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA1( J ) ), ABS( WA2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA1( J )-WA2( J ) ) ) + 140 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 150 CONTINUE + CALL ZLACPY( ' ', N, N, V, LDU, A, LDA ) +* + NTEST = NTEST + 1 +* + CALL ZHEEVX( 'V', 'I', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, WORK, LWORK, RWORK, + $ IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHEEVX(V,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 160 + END IF + END IF +* +* Do tests 7 and 8. +* + CALL ZLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL ZHET22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + CALL ZHEEVX_2STAGE( 'N', 'I', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M3, WA3, Z, LDU, + $ WORK, LWORK, RWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'ZHEEVX_2STAGE(N,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 160 + END IF + END IF +* +* Do test 9. +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 160 CONTINUE + CALL ZLACPY( ' ', N, N, V, LDU, A, LDA ) +* + NTEST = NTEST + 1 +* + CALL ZHEEVX( 'V', 'V', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, WORK, LWORK, RWORK, + $ IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHEEVX(V,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 170 + END IF + END IF +* +* Do tests 10 and 11. +* + CALL ZLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL ZHET22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + CALL ZHEEVX_2STAGE( 'N', 'V', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M3, WA3, Z, LDU, + $ WORK, LWORK, RWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'ZHEEVX_2STAGE(N,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 170 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.GT.0 ) THEN + RESULT( NTEST ) = ULPINV + GO TO 170 + END IF +* +* Do test 12. +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 170 CONTINUE +* +* Call ZHPEVD and CHPEVX. +* + CALL ZLACPY( ' ', N, N, V, LDU, A, LDA ) +* +* Load array WORK with the upper or lower triangular +* part of the matrix in packed form. +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 190 J = 1, N + DO 180 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 180 CONTINUE + 190 CONTINUE + ELSE + INDX = 1 + DO 210 J = 1, N + DO 200 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 200 CONTINUE + 210 CONTINUE + END IF +* + NTEST = NTEST + 1 + INDWRK = N*( N+1 ) / 2 + 1 + CALL ZHPEVD( 'V', UPLO, N, WORK, D1, Z, LDU, + $ WORK( INDWRK ), LWEDC, RWORK, LRWEDC, IWORK, + $ LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHPEVD(V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 270 + END IF + END IF +* +* Do tests 13 and 14. +* + CALL ZHET21( 1, UPLO, N, 0, A, LDA, D1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 230 J = 1, N + DO 220 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 220 CONTINUE + 230 CONTINUE + ELSE + INDX = 1 + DO 250 J = 1, N + DO 240 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 240 CONTINUE + 250 CONTINUE + END IF +* + NTEST = NTEST + 2 + INDWRK = N*( N+1 ) / 2 + 1 + CALL ZHPEVD( 'N', UPLO, N, WORK, D3, Z, LDU, + $ WORK( INDWRK ), LWEDC, RWORK, LRWEDC, IWORK, + $ LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHPEVD(N,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 270 + END IF + END IF +* +* Do test 15. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 260 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 260 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* +* Load array WORK with the upper or lower triangular part +* of the matrix in packed form. +* + 270 CONTINUE + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 290 J = 1, N + DO 280 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 280 CONTINUE + 290 CONTINUE + ELSE + INDX = 1 + DO 310 J = 1, N + DO 300 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 300 CONTINUE + 310 CONTINUE + END IF +* + NTEST = NTEST + 1 +* + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( D1( 1 ) ), ABS( D1( N ) ) ) + IF( IL.NE.1 ) THEN + VL = D1( IL ) - MAX( HALF*( D1( IL )-D1( IL-1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + ELSE IF( N.GT.0 ) THEN + VL = D1( 1 ) - MAX( HALF*( D1( N )-D1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + IF( IU.NE.N ) THEN + VU = D1( IU ) + MAX( HALF*( D1( IU+1 )-D1( IU ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + ELSE IF( N.GT.0 ) THEN + VU = D1( N ) + MAX( HALF*( D1( N )-D1( 1 ) ), + $ TEN*ULP*TEMP3, TEN*RTUNFL ) + END IF + ELSE + TEMP3 = ZERO + VL = ZERO + VU = ONE + END IF +* + CALL ZHPEVX( 'V', 'A', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M, WA1, Z, LDU, V, RWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHPEVX(V,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 370 + END IF + END IF +* +* Do tests 16 and 17. +* + CALL ZHET21( 1, UPLO, N, 0, A, LDU, WA1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 330 J = 1, N + DO 320 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 320 CONTINUE + 330 CONTINUE + ELSE + INDX = 1 + DO 350 J = 1, N + DO 340 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 340 CONTINUE + 350 CONTINUE + END IF +* + CALL ZHPEVX( 'N', 'A', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, V, RWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHPEVX(N,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 370 + END IF + END IF +* +* Do test 18. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 360 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA1( J ) ), ABS( WA2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA1( J )-WA2( J ) ) ) + 360 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 370 CONTINUE + NTEST = NTEST + 1 + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 390 J = 1, N + DO 380 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 380 CONTINUE + 390 CONTINUE + ELSE + INDX = 1 + DO 410 J = 1, N + DO 400 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 400 CONTINUE + 410 CONTINUE + END IF +* + CALL ZHPEVX( 'V', 'I', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, V, RWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHPEVX(V,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 460 + END IF + END IF +* +* Do tests 19 and 20. +* + CALL ZHET22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 430 J = 1, N + DO 420 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 420 CONTINUE + 430 CONTINUE + ELSE + INDX = 1 + DO 450 J = 1, N + DO 440 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 440 CONTINUE + 450 CONTINUE + END IF +* + CALL ZHPEVX( 'N', 'I', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M3, WA3, Z, LDU, V, RWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHPEVX(N,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 460 + END IF + END IF +* +* Do test 21. +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 460 CONTINUE + NTEST = NTEST + 1 + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 480 J = 1, N + DO 470 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 470 CONTINUE + 480 CONTINUE + ELSE + INDX = 1 + DO 500 J = 1, N + DO 490 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 490 CONTINUE + 500 CONTINUE + END IF +* + CALL ZHPEVX( 'V', 'V', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, V, RWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHPEVX(V,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 550 + END IF + END IF +* +* Do tests 22 and 23. +* + CALL ZHET22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 520 J = 1, N + DO 510 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 510 CONTINUE + 520 CONTINUE + ELSE + INDX = 1 + DO 540 J = 1, N + DO 530 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 530 CONTINUE + 540 CONTINUE + END IF +* + CALL ZHPEVX( 'N', 'V', UPLO, N, WORK, VL, VU, IL, IU, + $ ABSTOL, M3, WA3, Z, LDU, V, RWORK, IWORK, + $ IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHPEVX(N,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 550 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.GT.0 ) THEN + RESULT( NTEST ) = ULPINV + GO TO 550 + END IF +* +* Do test 24. +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 550 CONTINUE +* +* Call ZHBEVD and CHBEVX. +* + IF( JTYPE.LE.7 ) THEN + KD = 0 + ELSE IF( JTYPE.GE.8 .AND. JTYPE.LE.15 ) THEN + KD = MAX( N-1, 0 ) + ELSE + KD = IHBW + END IF +* +* Load array V with the upper or lower triangular part +* of the matrix in band form. +* + IF( IUPLO.EQ.1 ) THEN + DO 570 J = 1, N + DO 560 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 560 CONTINUE + 570 CONTINUE + ELSE + DO 590 J = 1, N + DO 580 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 580 CONTINUE + 590 CONTINUE + END IF +* + NTEST = NTEST + 1 + CALL ZHBEVD( 'V', UPLO, N, KD, V, LDU, D1, Z, LDU, WORK, + $ LWEDC, RWORK, LRWEDC, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9998 )'ZHBEVD(V,' // UPLO // + $ ')', IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 650 + END IF + END IF +* +* Do tests 25 and 26. +* + CALL ZHET21( 1, UPLO, N, 0, A, LDA, D1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + IF( IUPLO.EQ.1 ) THEN + DO 610 J = 1, N + DO 600 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 600 CONTINUE + 610 CONTINUE + ELSE + DO 630 J = 1, N + DO 620 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 620 CONTINUE + 630 CONTINUE + END IF +* + NTEST = NTEST + 2 + CALL ZHBEVD_2STAGE( 'N', UPLO, N, KD, V, LDU, D3, + $ Z, LDU, WORK, LWORK, RWORK, + $ LRWEDC, IWORK, LIWEDC, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9998 ) + $ 'ZHBEVD_2STAGE(N,' // UPLO // + $ ')', IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 650 + END IF + END IF +* +* Do test 27. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 640 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 640 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* +* Load array V with the upper or lower triangular part +* of the matrix in band form. +* + 650 CONTINUE + IF( IUPLO.EQ.1 ) THEN + DO 670 J = 1, N + DO 660 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 660 CONTINUE + 670 CONTINUE + ELSE + DO 690 J = 1, N + DO 680 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 680 CONTINUE + 690 CONTINUE + END IF +* + NTEST = NTEST + 1 + CALL ZHBEVX( 'V', 'A', UPLO, N, KD, V, LDU, U, LDU, VL, + $ VU, IL, IU, ABSTOL, M, WA1, Z, LDU, WORK, + $ RWORK, IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHBEVX(V,A,' // UPLO // + $ ')', IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 750 + END IF + END IF +* +* Do tests 28 and 29. +* + CALL ZHET21( 1, UPLO, N, 0, A, LDU, WA1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + DO 710 J = 1, N + DO 700 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 700 CONTINUE + 710 CONTINUE + ELSE + DO 730 J = 1, N + DO 720 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 720 CONTINUE + 730 CONTINUE + END IF +* + CALL ZHBEVX_2STAGE( 'N', 'A', UPLO, N, KD, V, LDU, + $ U, LDU, VL, VU, IL, IU, ABSTOL, + $ M2, WA2, Z, LDU, WORK, LWORK, + $ RWORK, IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9998 ) + $ 'ZHBEVX_2STAGE(N,A,' // UPLO // + $ ')', IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 750 + END IF + END IF +* +* Do test 30. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 740 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA1( J ) ), ABS( WA2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA1( J )-WA2( J ) ) ) + 740 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* +* Load array V with the upper or lower triangular part +* of the matrix in band form. +* + 750 CONTINUE + NTEST = NTEST + 1 + IF( IUPLO.EQ.1 ) THEN + DO 770 J = 1, N + DO 760 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 760 CONTINUE + 770 CONTINUE + ELSE + DO 790 J = 1, N + DO 780 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 780 CONTINUE + 790 CONTINUE + END IF +* + CALL ZHBEVX( 'V', 'I', UPLO, N, KD, V, LDU, U, LDU, VL, + $ VU, IL, IU, ABSTOL, M2, WA2, Z, LDU, WORK, + $ RWORK, IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9998 )'ZHBEVX(V,I,' // UPLO // + $ ')', IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 840 + END IF + END IF +* +* Do tests 31 and 32. +* + CALL ZHET22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + DO 810 J = 1, N + DO 800 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 800 CONTINUE + 810 CONTINUE + ELSE + DO 830 J = 1, N + DO 820 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 820 CONTINUE + 830 CONTINUE + END IF + CALL ZHBEVX_2STAGE( 'N', 'I', UPLO, N, KD, V, LDU, + $ U, LDU, VL, VU, IL, IU, ABSTOL, + $ M3, WA3, Z, LDU, WORK, LWORK, + $ RWORK, IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9998 ) + $ 'ZHBEVX_2STAGE(N,I,' // UPLO // + $ ')', IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 840 + END IF + END IF +* +* Do test 33. +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* +* Load array V with the upper or lower triangular part +* of the matrix in band form. +* + 840 CONTINUE + NTEST = NTEST + 1 + IF( IUPLO.EQ.1 ) THEN + DO 860 J = 1, N + DO 850 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 850 CONTINUE + 860 CONTINUE + ELSE + DO 880 J = 1, N + DO 870 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 870 CONTINUE + 880 CONTINUE + END IF + CALL ZHBEVX( 'V', 'V', UPLO, N, KD, V, LDU, U, LDU, VL, + $ VU, IL, IU, ABSTOL, M2, WA2, Z, LDU, WORK, + $ RWORK, IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9998 )'ZHBEVX(V,V,' // UPLO // + $ ')', IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 930 + END IF + END IF +* +* Do tests 34 and 35. +* + CALL ZHET22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 +* + IF( IUPLO.EQ.1 ) THEN + DO 900 J = 1, N + DO 890 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 890 CONTINUE + 900 CONTINUE + ELSE + DO 920 J = 1, N + DO 910 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 910 CONTINUE + 920 CONTINUE + END IF + CALL ZHBEVX_2STAGE( 'N', 'V', UPLO, N, KD, V, LDU, + $ U, LDU, VL, VU, IL, IU, ABSTOL, + $ M3, WA3, Z, LDU, WORK, LWORK, + $ RWORK, IWORK, IWORK( 5*N+1 ), IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9998 ) + $ 'ZHBEVX_2STAGE(N,V,' // UPLO // + $ ')', IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 930 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.GT.0 ) THEN + RESULT( NTEST ) = ULPINV + GO TO 930 + END IF +* +* Do test 36. +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + 930 CONTINUE +* +* Call ZHEEV +* + CALL ZLACPY( ' ', N, N, A, LDA, V, LDU ) +* + NTEST = NTEST + 1 + CALL ZHEEV( 'V', UPLO, N, A, LDU, D1, WORK, LWORK, RWORK, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHEEV(V,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 950 + END IF + END IF +* +* Do tests 37 and 38 +* + CALL ZHET21( 1, UPLO, N, 0, V, LDU, D1, D2, A, LDU, Z, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + CALL ZLACPY( ' ', N, N, V, LDU, A, LDA ) +* + NTEST = NTEST + 2 + CALL ZHEEV_2STAGE( 'N', UPLO, N, A, LDU, D3, + $ WORK, LWORK, RWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'ZHEEV_2STAGE(N,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 950 + END IF + END IF +* +* Do test 39 +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 940 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 940 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 950 CONTINUE +* + CALL ZLACPY( ' ', N, N, V, LDU, A, LDA ) +* +* Call ZHPEV +* +* Load array WORK with the upper or lower triangular +* part of the matrix in packed form. +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 970 J = 1, N + DO 960 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 960 CONTINUE + 970 CONTINUE + ELSE + INDX = 1 + DO 990 J = 1, N + DO 980 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 980 CONTINUE + 990 CONTINUE + END IF +* + NTEST = NTEST + 1 + INDWRK = N*( N+1 ) / 2 + 1 + CALL ZHPEV( 'V', UPLO, N, WORK, D1, Z, LDU, + $ WORK( INDWRK ), RWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHPEV(V,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1050 + END IF + END IF +* +* Do tests 40 and 41. +* + CALL ZHET21( 1, UPLO, N, 0, A, LDA, D1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + IF( IUPLO.EQ.1 ) THEN + INDX = 1 + DO 1010 J = 1, N + DO 1000 I = 1, J + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1000 CONTINUE + 1010 CONTINUE + ELSE + INDX = 1 + DO 1030 J = 1, N + DO 1020 I = J, N + WORK( INDX ) = A( I, J ) + INDX = INDX + 1 + 1020 CONTINUE + 1030 CONTINUE + END IF +* + NTEST = NTEST + 2 + INDWRK = N*( N+1 ) / 2 + 1 + CALL ZHPEV( 'N', UPLO, N, WORK, D3, Z, LDU, + $ WORK( INDWRK ), RWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHPEV(N,' // UPLO // ')', + $ IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1050 + END IF + END IF +* +* Do test 42 +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 1040 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 1040 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 1050 CONTINUE +* +* Call ZHBEV +* + IF( JTYPE.LE.7 ) THEN + KD = 0 + ELSE IF( JTYPE.GE.8 .AND. JTYPE.LE.15 ) THEN + KD = MAX( N-1, 0 ) + ELSE + KD = IHBW + END IF +* +* Load array V with the upper or lower triangular part +* of the matrix in band form. +* + IF( IUPLO.EQ.1 ) THEN + DO 1070 J = 1, N + DO 1060 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1060 CONTINUE + 1070 CONTINUE + ELSE + DO 1090 J = 1, N + DO 1080 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1080 CONTINUE + 1090 CONTINUE + END IF +* + NTEST = NTEST + 1 + CALL ZHBEV( 'V', UPLO, N, KD, V, LDU, D1, Z, LDU, WORK, + $ RWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9998 )'ZHBEV(V,' // UPLO // ')', + $ IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1140 + END IF + END IF +* +* Do tests 43 and 44. +* + CALL ZHET21( 1, UPLO, N, 0, A, LDA, D1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + IF( IUPLO.EQ.1 ) THEN + DO 1110 J = 1, N + DO 1100 I = MAX( 1, J-KD ), J + V( KD+1+I-J, J ) = A( I, J ) + 1100 CONTINUE + 1110 CONTINUE + ELSE + DO 1130 J = 1, N + DO 1120 I = J, MIN( N, J+KD ) + V( 1+I-J, J ) = A( I, J ) + 1120 CONTINUE + 1130 CONTINUE + END IF +* + NTEST = NTEST + 2 + CALL ZHBEV_2STAGE( 'N', UPLO, N, KD, V, LDU, D3, Z, LDU, + $ WORK, LWORK, RWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9998 ) + $ 'ZHBEV_2STAGE(N,' // UPLO // ')', + $ IINFO, N, KD, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1140 + END IF + END IF +* + 1140 CONTINUE +* +* Do test 45. +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 1150 J = 1, N + TEMP1 = MAX( TEMP1, ABS( D1( J ) ), ABS( D3( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( D1( J )-D3( J ) ) ) + 1150 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + CALL ZLACPY( ' ', N, N, A, LDA, V, LDU ) + NTEST = NTEST + 1 + CALL ZHEEVR( 'V', 'A', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M, WA1, Z, LDU, IWORK, WORK, LWORK, + $ RWORK, LRWORK, IWORK( 2*N+1 ), LIWORK-2*N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHEEVR(V,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1170 + END IF + END IF +* +* Do tests 45 and 46 (or ... ) +* + CALL ZLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL ZHET21( 1, UPLO, N, 0, A, LDU, WA1, D2, Z, LDU, V, + $ LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + CALL ZHEEVR_2STAGE( 'N', 'A', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M2, WA2, Z, LDU, + $ IWORK, WORK, LWORK, RWORK, LRWORK, + $ IWORK( 2*N+1 ), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'ZHEEVR_2STAGE(N,A,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1170 + END IF + END IF +* +* Do test 47 (or ... ) +* + TEMP1 = ZERO + TEMP2 = ZERO + DO 1160 J = 1, N + TEMP1 = MAX( TEMP1, ABS( WA1( J ) ), ABS( WA2( J ) ) ) + TEMP2 = MAX( TEMP2, ABS( WA1( J )-WA2( J ) ) ) + 1160 CONTINUE + RESULT( NTEST ) = TEMP2 / MAX( UNFL, + $ ULP*MAX( TEMP1, TEMP2 ) ) +* + 1170 CONTINUE +* + NTEST = NTEST + 1 + CALL ZLACPY( ' ', N, N, V, LDU, A, LDA ) + CALL ZHEEVR( 'V', 'I', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, IWORK, WORK, LWORK, + $ RWORK, LRWORK, IWORK( 2*N+1 ), LIWORK-2*N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHEEVR(V,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1180 + END IF + END IF +* +* Do tests 48 and 49 (or +??) +* + CALL ZLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL ZHET22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + CALL ZLACPY( ' ', N, N, V, LDU, A, LDA ) + CALL ZHEEVR_2STAGE( 'N', 'I', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M3, WA3, Z, LDU, + $ IWORK, WORK, LWORK, RWORK, LRWORK, + $ IWORK( 2*N+1 ), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'ZHEEVR_2STAGE(N,I,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1180 + END IF + END IF +* +* Do test 50 (or +??) +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, ULP*TEMP3 ) + 1180 CONTINUE +* + NTEST = NTEST + 1 + CALL ZLACPY( ' ', N, N, V, LDU, A, LDA ) + CALL ZHEEVR( 'V', 'V', UPLO, N, A, LDU, VL, VU, IL, IU, + $ ABSTOL, M2, WA2, Z, LDU, IWORK, WORK, LWORK, + $ RWORK, LRWORK, IWORK( 2*N+1 ), LIWORK-2*N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 )'ZHEEVR(V,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + RESULT( NTEST+1 ) = ULPINV + RESULT( NTEST+2 ) = ULPINV + GO TO 1190 + END IF + END IF +* +* Do tests 51 and 52 (or +??) +* + CALL ZLACPY( ' ', N, N, V, LDU, A, LDA ) +* + CALL ZHET22( 1, UPLO, N, M2, 0, A, LDU, WA2, D2, Z, LDU, + $ V, LDU, TAU, WORK, RWORK, RESULT( NTEST ) ) +* + NTEST = NTEST + 2 + CALL ZLACPY( ' ', N, N, V, LDU, A, LDA ) + CALL ZHEEVR_2STAGE( 'N', 'V', UPLO, N, A, LDU, VL, VU, + $ IL, IU, ABSTOL, M3, WA3, Z, LDU, + $ IWORK, WORK, LWORK, RWORK, LRWORK, + $ IWORK( 2*N+1 ), LIWORK-2*N, IINFO ) + IF( IINFO.NE.0 ) THEN + WRITE( NOUNIT, FMT = 9999 ) + $ 'ZHEEVR_2STAGE(N,V,' // UPLO // + $ ')', IINFO, N, JTYPE, IOLDSD + INFO = ABS( IINFO ) + IF( IINFO.LT.0 ) THEN + RETURN + ELSE + RESULT( NTEST ) = ULPINV + GO TO 1190 + END IF + END IF +* + IF( M3.EQ.0 .AND. N.GT.0 ) THEN + RESULT( NTEST ) = ULPINV + GO TO 1190 + END IF +* +* Do test 52 (or +??) +* + TEMP1 = DSXT1( 1, WA2, M2, WA3, M3, ABSTOL, ULP, UNFL ) + TEMP2 = DSXT1( 1, WA3, M3, WA2, M2, ABSTOL, ULP, UNFL ) + IF( N.GT.0 ) THEN + TEMP3 = MAX( ABS( WA1( 1 ) ), ABS( WA1( N ) ) ) + ELSE + TEMP3 = ZERO + END IF + RESULT( NTEST ) = ( TEMP1+TEMP2 ) / + $ MAX( UNFL, TEMP3*ULP ) +* + CALL ZLACPY( ' ', N, N, V, LDU, A, LDA ) +* +* +* +* +* Load array V with the upper or lower triangular part +* of the matrix in band form. +* + 1190 CONTINUE +* + 1200 CONTINUE +* +* End of Loop -- Check for RESULT(j) > THRESH +* + NTESTT = NTESTT + NTEST + CALL DLAFTS( 'ZST', N, N, JTYPE, NTEST, RESULT, IOLDSD, + $ THRESH, NOUNIT, NERRS ) +* + 1210 CONTINUE + 1220 CONTINUE +* +* Summary +* + CALL ALASVM( 'ZST', NOUNIT, NERRS, NTESTT, 0 ) +* + 9999 FORMAT( ' ZDRVST2STG: ', A, ' returned INFO=', I6, / 9X, 'N=', I6, + $ ', JTYPE=', I6, ', ISEED=(', 3( I5, ',' ), I5, ')' ) + 9998 FORMAT( ' ZDRVST2STG: ', A, ' returned INFO=', I6, / 9X, 'N=', I6, + $ ', KD=', I6, ', JTYPE=', I6, ', ISEED=(', 3( I5, ',' ), I5, + $ ')' ) +* + RETURN +* +* End of ZDRVST2STG +* + END diff --git a/TESTING/EIG/zerrst.f b/TESTING/EIG/zerrst.f index 92c9e52c..8afa1dce 100644 --- a/TESTING/EIG/zerrst.f +++ b/TESTING/EIG/zerrst.f @@ -1,5 +1,7 @@ *> \brief \b ZERRST * +* @precisions fortran z -> c +* * =========== DOCUMENTATION =========== * * Online html documentation available at @@ -25,6 +27,10 @@ *> ZUNGTR, ZUPMTR, ZSTEQR, CSTEIN, ZPTEQR, ZHBTRD, *> ZHEEV, CHEEVX, CHEEVD, ZHBEV, CHBEVX, CHBEVD, *> ZHPEV, CHPEVX, CHPEVD, and ZSTEDC. +*> ZHEEVD_2STAGE, ZHEEVR_2STAGE, ZHEEVX_2STAGE, +*> ZHEEV_2STAGE, ZHBEV_2STAGE, ZHBEVD_2STAGE, +*> ZHBEVX_2STAGE, ZHETRD_2STAGE, ZHETRD_SY2SB, +*> ZHETRD_SB2ST *> \endverbatim * * Arguments: @@ -93,7 +99,11 @@ EXTERNAL CHKXER, ZHBEV, ZHBEVD, ZHBEVX, ZHBTRD, ZHEEV, $ ZHEEVD, ZHEEVR, ZHEEVX, ZHETRD, ZHPEV, ZHPEVD, $ ZHPEVX, ZHPTRD, ZPTEQR, ZSTEDC, ZSTEIN, ZSTEQR, - $ ZUNGTR, ZUNMTR, ZUPGTR, ZUPMTR + $ ZUNGTR, ZUNMTR, ZUPGTR, ZUPMTR, + $ ZHEEVD_2STAGE, ZHEEVR_2STAGE, ZHEEVX_2STAGE, + $ ZHEEV_2STAGE, ZHBEV_2STAGE, ZHBEVD_2STAGE, + $ ZHBEVX_2STAGE, ZHETRD_2STAGE, ZHETRD_SY2SB, + $ ZHETRD_SB2ST * .. * .. Scalars in Common .. LOGICAL LERR, OK @@ -151,6 +161,103 @@ CALL CHKXER( 'ZHETRD', INFOT, NOUT, LERR, OK ) NT = NT + 4 * +* ZHETRD_2STAGE +* + SRNAMT = 'ZHETRD_2STAGE' + INFOT = 1 + CALL ZHETRD_2STAGE( '/', 'U', 0, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL ZHETRD_2STAGE( 'H', 'U', 0, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZHETRD_2STAGE( 'N', '/', 0, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZHETRD_2STAGE( 'N', 'U', -1, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZHETRD_2STAGE( 'N', 'U', 2, A, 1, D, E, TAU, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL ZHETRD_2STAGE( 'N', 'U', 0, A, 1, D, E, TAU, + $ C, 0, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 12 + CALL ZHETRD_2STAGE( 'N', 'U', 0, A, 1, D, E, TAU, + $ C, 1, W, 0, INFO ) + CALL CHKXER( 'ZHETRD_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 7 +* +* ZHETRD_HE2HB +* + SRNAMT = 'ZHETRD_HE2HB' + INFOT = 1 + CALL ZHETRD_HE2HB( '/', 0, 0, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HE2HB', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZHETRD_HE2HB( 'U', -1, 0, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HE2HB', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZHETRD_HE2HB( 'U', 0, -1, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HE2HB', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZHETRD_HE2HB( 'U', 2, 0, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HE2HB', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL ZHETRD_HE2HB( 'U', 0, 2, A, 1, C, 1, TAU, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HE2HB', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL ZHETRD_HE2HB( 'U', 0, 0, A, 1, C, 1, TAU, W, 0, INFO ) + CALL CHKXER( 'ZHETRD_HE2HB', INFOT, NOUT, LERR, OK ) + NT = NT + 6 +* +* ZHETRD_HB2ST +* + SRNAMT = 'ZHETRD_HB2ST' + INFOT = 1 + CALL ZHETRD_HB2ST( '/', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZHETRD_HB2ST( 'Y', '/', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZHETRD_HB2ST( 'Y', 'H', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZHETRD_HB2ST( 'Y', 'N', '/', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZHETRD_HB2ST( 'Y', 'N', 'U', -1, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZHETRD_HB2ST( 'Y', 'N', 'U', 0, -1, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL ZHETRD_HB2ST( 'Y', 'N', 'U', 0, 1, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL ZHETRD_HB2ST( 'Y', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 0, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL ZHETRD_HB2ST( 'Y', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 0, INFO ) + CALL CHKXER( 'ZHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + NT = NT + 9 +* * ZUNGTR * SRNAMT = 'ZUNGTR' @@ -377,6 +484,63 @@ CALL CHKXER( 'ZHEEVD', INFOT, NOUT, LERR, OK ) NT = NT + 12 * +* ZHEEVD_2STAGE +* + SRNAMT = 'ZHEEVD_2STAGE' + INFOT = 1 + CALL ZHEEVD_2STAGE( '/', 'U', 0, A, 1, X, W, 1, + $ RW, 1, IW, 1, INFO ) + CALL CHKXER( 'ZHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL ZHEEVD_2STAGE( 'V', 'U', 0, A, 1, X, W, 1, + $ RW, 1, IW, 1, INFO ) + CALL CHKXER( 'ZHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZHEEVD_2STAGE( 'N', '/', 0, A, 1, X, W, 1, + $ RW, 1, IW, 1, INFO ) + CALL CHKXER( 'ZHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZHEEVD_2STAGE( 'N', 'U', -1, A, 1, X, W, 1, + $ RW, 1, IW, 1, INFO ) + CALL CHKXER( 'ZHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZHEEVD_2STAGE( 'N', 'U', 2, A, 1, X, W, 3, + $ RW, 2, IW, 1, INFO ) + CALL CHKXER( 'ZHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZHEEVD_2STAGE( 'N', 'U', 1, A, 1, X, W, 0, + $ RW, 1, IW, 1, INFO ) + CALL CHKXER( 'ZHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZHEEVD_2STAGE( 'N', 'U', 2, A, 2, X, W, 2, + $ RW, 2, IW, 1, INFO ) + CALL CHKXER( 'ZHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 8 +* CALL ZHEEVD_2STAGE( 'V', 'U', 2, A, 2, X, W, 3, +* $ RW, 25, IW, 12, INFO ) +* CALL CHKXER( 'ZHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL ZHEEVD_2STAGE( 'N', 'U', 1, A, 1, X, W, 1, + $ RW, 0, IW, 1, INFO ) + CALL CHKXER( 'ZHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL ZHEEVD_2STAGE( 'N', 'U', 2, A, 2, X, W, 25, + $ RW, 1, IW, 1, INFO ) + CALL CHKXER( 'ZHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 10 +* CALL ZHEEVD_2STAGE( 'V', 'U', 2, A, 2, X, W, 8, +* $ RW, 18, IW, 12, INFO ) +* CALL CHKXER( 'ZHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 12 + CALL ZHEEVD_2STAGE( 'N', 'U', 1, A, 1, X, W, 1, + $ RW, 1, IW, 0, INFO ) + CALL CHKXER( 'ZHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 12 +* CALL ZHEEVD_2STAGE( 'V', 'U', 2, A, 2, X, W, 8, +* $ RW, 25, IW, 11, INFO ) +* CALL CHKXER( 'ZHEEVD_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 10 +* * ZHEEV * SRNAMT = 'ZHEEV ' @@ -397,6 +561,29 @@ CALL CHKXER( 'ZHEEV ', INFOT, NOUT, LERR, OK ) NT = NT + 5 * +* ZHEEV_2STAGE +* + SRNAMT = 'ZHEEV_2STAGE ' + INFOT = 1 + CALL ZHEEV_2STAGE( '/', 'U', 0, A, 1, X, W, 1, RW, INFO ) + CALL CHKXER( 'ZHEEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL ZHEEV_2STAGE( 'V', 'U', 0, A, 1, X, W, 1, RW, INFO ) + CALL CHKXER( 'ZHEEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZHEEV_2STAGE( 'N', '/', 0, A, 1, X, W, 1, RW, INFO ) + CALL CHKXER( 'ZHEEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZHEEV_2STAGE( 'N', 'U', -1, A, 1, X, W, 1, RW, INFO ) + CALL CHKXER( 'ZHEEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZHEEV_2STAGE( 'N', 'U', 2, A, 1, X, W, 3, RW, INFO ) + CALL CHKXER( 'ZHEEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZHEEV_2STAGE( 'N', 'U', 2, A, 2, X, W, 2, RW, INFO ) + CALL CHKXER( 'ZHEEV_2STAGE ', INFOT, NOUT, LERR, OK ) + NT = NT + 6 +* * ZHEEVX * SRNAMT = 'ZHEEVX' @@ -441,6 +628,65 @@ CALL CHKXER( 'ZHEEVX', INFOT, NOUT, LERR, OK ) NT = NT + 10 * +* ZHEEVX_2STAGE +* + SRNAMT = 'ZHEEVX_2STAGE' + INFOT = 1 + CALL ZHEEVX_2STAGE( '/', 'A', 'U', 0, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 1, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL ZHEEVX_2STAGE( 'V', 'A', 'U', 0, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 1, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZHEEVX_2STAGE( 'N', '/', 'U', 0, A, 1, + $ 0.0D0, 1.0D0, 1, 0, 0.0D0, + $ M, X, Z, 1, W, 1, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZHEEVX_2STAGE( 'N', 'A', '/', 0, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 1, RW, IW, I3, INFO ) + INFOT = 4 + CALL ZHEEVX_2STAGE( 'N', 'A', 'U', -1, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 1, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL ZHEEVX_2STAGE( 'N', 'A', 'U', 2, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 2, W, 3, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZHEEVX_2STAGE( 'N', 'V', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 1, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 9 + CALL ZHEEVX_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 1, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL ZHEEVX_2STAGE( 'N', 'I', 'U', 2, A, 2, + $ 0.0D0, 0.0D0, 2, 1, 0.0D0, + $ M, X, Z, 2, W, 3, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 15 + CALL ZHEEVX_2STAGE( 'N', 'A', 'U', 2, A, 2, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 0, W, 3, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 17 + CALL ZHEEVX_2STAGE( 'N', 'A', 'U', 2, A, 2, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 2, W, 0, RW, IW, I1, INFO ) + CALL CHKXER( 'ZHEEVX_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 11 +* * ZHEEVR * SRNAMT = 'ZHEEVR' @@ -508,6 +754,90 @@ CALL CHKXER( 'ZHEEVR', INFOT, NOUT, LERR, OK ) NT = NT + 12 * +* ZHEEVR_2STAGE +* + SRNAMT = 'ZHEEVR_2STAGE' + N = 1 + INFOT = 1 + CALL ZHEEVR_2STAGE( '/', 'A', 'U', 0, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N, RW, 24*N, IW( 2*N+1 ), + $ 10*N, INFO ) + CALL CHKXER( 'ZHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL ZHEEVR_2STAGE( 'V', 'A', 'U', 0, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N, RW, 24*N, IW( 2*N+1 ), + $ 10*N, INFO ) + CALL CHKXER( 'ZHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZHEEVR_2STAGE( 'N', '/', 'U', 0, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N, RW, 24*N, IW( 2*N+1 ), + $ 10*N, INFO ) + CALL CHKXER( 'ZHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZHEEVR_2STAGE( 'N', 'A', '/', -1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N, RW, 24*N, + $ IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'ZHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZHEEVR_2STAGE( 'N', 'A', 'U', -1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N, RW, 24*N, + $ IW( 2*N+1 ), 10*N, INFO ) + CALL CHKXER( 'ZHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL ZHEEVR_2STAGE( 'N', 'A', 'U', 2, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N, RW, 24*N, IW( 2*N+1 ), + $ 10*N, INFO ) + CALL CHKXER( 'ZHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 8 + CALL ZHEEVR_2STAGE( 'N', 'V', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N, RW, 24*N, IW( 2*N+1 ), + $ 10*N, INFO ) + CALL CHKXER( 'ZHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 9 + CALL ZHEEVR_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 0, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N, RW, 24*N, IW( 2*N+1 ), + $ 10*N, INFO ) + CALL CHKXER( 'ZHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 10 + CALL ZHEEVR_2STAGE( 'N', 'I', 'U', 2, A, 2, + $ 0.0D0, 0.0D0, 2, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N, RW, 24*N, IW( 2*N+1 ), + $ 10*N, INFO ) + CALL CHKXER( 'ZHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 15 + CALL ZHEEVR_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 0, IW, Q, 2*N, RW, 24*N, IW( 2*N+1 ), + $ 10*N, INFO ) + CALL CHKXER( 'ZHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 18 + CALL ZHEEVR_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 2*N-1, RW, 24*N, IW( 2*N+1 ), + $ 10*N, INFO ) + CALL CHKXER( 'ZHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 20 + CALL ZHEEVR_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 26*N, RW, 24*N-1, IW( 2*N-1 ), + $ 10*N, INFO ) + CALL CHKXER( 'ZHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 22 + CALL ZHEEVR_2STAGE( 'N', 'I', 'U', 1, A, 1, + $ 0.0D0, 0.0D0, 1, 1, 0.0D0, + $ M, R, Z, 1, IW, Q, 26*N, RW, 24*N, IW, 10*N-1, + $ INFO ) + CALL CHKXER( 'ZHEEVR_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 13 +* * ZHPEVD * SRNAMT = 'ZHPEVD' @@ -646,6 +976,47 @@ CALL CHKXER( 'ZHBTRD', INFOT, NOUT, LERR, OK ) NT = NT + 6 * +* ZHETRD_HB2ST +* + SRNAMT = 'ZHETRD_HB2ST' + INFOT = 1 + CALL ZHETRD_HB2ST( '/', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZHETRD_HB2ST( 'N', '/', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZHETRD_HB2ST( 'N', 'H', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZHETRD_HB2ST( 'N', 'N', '/', 0, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZHETRD_HB2ST( 'N', 'N', 'U', -1, 0, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZHETRD_HB2ST( 'N', 'N', 'U', 0, -1, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL ZHETRD_HB2ST( 'N', 'N', 'U', 0, 1, A, 1, D, E, + $ C, 1, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL ZHETRD_HB2ST( 'N', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 0, W, 1, INFO ) + CALL CHKXER( 'ZHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL ZHETRD_HB2ST( 'N', 'N', 'U', 0, 0, A, 1, D, E, + $ C, 1, W, 0, INFO ) + CALL CHKXER( 'ZHETRD_HB2ST', INFOT, NOUT, LERR, OK ) + NT = NT + 9 +* * ZHBEVD * SRNAMT = 'ZHBEVD' @@ -711,6 +1082,75 @@ CALL CHKXER( 'ZHBEVD', INFOT, NOUT, LERR, OK ) NT = NT + 15 * +* ZHBEVD_2STAGE +* + SRNAMT = 'ZHBEVD_2STAGE' + INFOT = 1 + CALL ZHBEVD_2STAGE( '/', 'U', 0, 0, A, 1, X, Z, 1, + $ W, 1, RW, 1, IW, 1, INFO ) + CALL CHKXER( 'ZHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL ZHBEVD_2STAGE( 'V', 'U', 0, 0, A, 1, X, Z, 1, + $ W, 1, RW, 1, IW, 1, INFO ) + CALL CHKXER( 'ZHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZHBEVD_2STAGE( 'N', '/', 0, 0, A, 1, X, Z, 1, + $ W, 1, RW, 1, IW, 1, INFO ) + CALL CHKXER( 'ZHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZHBEVD_2STAGE( 'N', 'U', -1, 0, A, 1, X, Z, 1, + $ W, 1, RW, 1, IW, 1, INFO ) + CALL CHKXER( 'ZHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZHBEVD_2STAGE( 'N', 'U', 0, -1, A, 1, X, Z, 1, + $ W, 1, RW, 1, IW, 1, INFO ) + CALL CHKXER( 'ZHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL ZHBEVD_2STAGE( 'N', 'U', 2, 1, A, 1, X, Z, 1, + $ W, 2, RW, 2, IW, 1, INFO ) + CALL CHKXER( 'ZHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 9 + CALL ZHBEVD_2STAGE( 'N', 'U', 2, 1, A, 2, X, Z, 0, + $ W, 8, RW, 25, IW, 12, INFO ) + CALL CHKXER( 'ZHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL ZHBEVD_2STAGE( 'N', 'U', 1, 0, A, 1, X, Z, 1, + $ W, 0, RW, 1, IW, 1, INFO ) + CALL CHKXER( 'ZHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL ZHBEVD_2STAGE( 'N', 'U', 2, 1, A, 2, X, Z, 2, + $ W, 1, RW, 2, IW, 1, INFO ) + CALL CHKXER( 'ZHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 11 +* CALL ZHBEVD_2STAGE( 'V', 'U', 2, 1, A, 2, X, Z, 2, +* $ W, 2, RW, 25, IW, 12, INFO ) +* CALL CHKXER( 'ZHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL ZHBEVD_2STAGE( 'N', 'U', 1, 0, A, 1, X, Z, 1, + $ W, 1, RW, 0, IW, 1, INFO ) + CALL CHKXER( 'ZHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL ZHBEVD_2STAGE( 'N', 'U', 2, 1, A, 2, X, Z, 2, + $ W, 25, RW, 1, IW, 1, INFO ) + CALL CHKXER( 'ZHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 13 +* CALL ZHBEVD_2STAGE( 'V', 'U', 2, 1, A, 2, X, Z, 2, +* $ W, 25, RW, 2, IW, 12, INFO ) +* CALL CHKXER( 'ZHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 15 + CALL ZHBEVD_2STAGE( 'N', 'U', 1, 0, A, 1, X, Z, 1, + $ W, 1, RW, 1, IW, 0, INFO ) + CALL CHKXER( 'ZHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 15 + CALL ZHBEVD_2STAGE( 'N', 'U', 2, 1, A, 2, X, Z, 2, + $ W, 25, RW, 2, IW, 0, INFO ) + CALL CHKXER( 'ZHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 15 +* CALL ZHBEVD_2STAGE( 'V', 'U', 2, 1, A, 2, X, Z, 2, +* $ W, 25, RW, 25, IW, 2, INFO ) +* CALL CHKXER( 'ZHBEVD_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 13 +* * ZHBEV * SRNAMT = 'ZHBEV ' @@ -734,6 +1174,43 @@ CALL CHKXER( 'ZHBEV ', INFOT, NOUT, LERR, OK ) NT = NT + 6 * +* ZHBEV_2STAGE +* + SRNAMT = 'ZHBEV_2STAGE ' + INFOT = 1 + CALL ZHBEV_2STAGE( '/', 'U', 0, 0, A, 1, X, + $ Z, 1, W, 0, RW, INFO ) + CALL CHKXER( 'ZHBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 1 + CALL ZHBEV_2STAGE( 'V', 'U', 0, 0, A, 1, X, + $ Z, 1, W, 0, RW, INFO ) + CALL CHKXER( 'ZHBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZHBEV_2STAGE( 'N', '/', 0, 0, A, 1, X, + $ Z, 1, W, 0, RW, INFO ) + CALL CHKXER( 'ZHBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZHBEV_2STAGE( 'N', 'U', -1, 0, A, 1, X, + $ Z, 1, W, 0, RW, INFO ) + CALL CHKXER( 'ZHBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 4 + CALL ZHBEV_2STAGE( 'N', 'U', 0, -1, A, 1, X, + $ Z, 1, W, 0, RW, INFO ) + CALL CHKXER( 'ZHBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 6 + CALL ZHBEV_2STAGE( 'N', 'U', 2, 1, A, 1, X, + $ Z, 1, W, 0, RW, INFO ) + CALL CHKXER( 'ZHBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 9 + CALL ZHBEV_2STAGE( 'N', 'U', 2, 0, A, 1, X, + $ Z, 0, W, 0, RW, INFO ) + CALL CHKXER( 'ZHBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL ZHBEV_2STAGE( 'N', 'U', 2, 0, A, 1, X, + $ Z, 1, W, 0, RW, INFO ) + CALL CHKXER( 'ZHBEV_2STAGE ', INFOT, NOUT, LERR, OK ) + NT = NT + 8 +* * ZHBEVX * SRNAMT = 'ZHBEVX' @@ -781,6 +1258,74 @@ $ 0, 0.0D0, M, X, Z, 1, W, RW, IW, I3, INFO ) CALL CHKXER( 'ZHBEVX', INFOT, NOUT, LERR, OK ) NT = NT + 11 +* +* ZHBEVX_2STAGE +* + SRNAMT = 'ZHBEVX_2STAGE' + INFOT = 1 + CALL ZHBEVX_2STAGE( '/', 'A', 'U', 0, 0, A, 1, Q, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + INFOT = 1 + CALL ZHBEVX_2STAGE( 'V', 'A', 'U', 0, 0, A, 1, Q, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 2 + CALL ZHBEVX_2STAGE( 'N', '/', 'U', 0, 0, A, 1, Q, 1, + $ 0.0D0, 1.0D0, 1, 0, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 3 + CALL ZHBEVX_2STAGE( 'N', 'A', '/', 0, 0, A, 1, Q, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + INFOT = 4 + CALL ZHBEVX_2STAGE( 'N', 'A', 'U', -1, 0, A, 1, Q, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 5 + CALL ZHBEVX_2STAGE( 'N', 'A', 'U', 0, -1, A, 1, Q, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 7 + CALL ZHBEVX_2STAGE( 'N', 'A', 'U', 2, 1, A, 1, Q, 2, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 2, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) +* INFOT = 9 +* CALL ZHBEVX_2STAGE( 'V', 'A', 'U', 2, 0, A, 1, Q, 1, +* $ 0.0D0, 0.0D0, 0, 0, 0.0D0, +* $ M, X, Z, 2, W, 0, RW, IW, I3, INFO ) +* CALL CHKXER( 'ZHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 11 + CALL ZHBEVX_2STAGE( 'N', 'V', 'U', 1, 0, A, 1, Q, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 12 + CALL ZHBEVX_2STAGE( 'N', 'I', 'U', 1, 0, A, 1, Q, 1, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 13 + CALL ZHBEVX_2STAGE( 'N', 'I', 'U', 1, 0, A, 1, Q, 1, + $ 0.0D0, 0.0D0, 1, 2, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 18 + CALL ZHBEVX_2STAGE( 'N', 'A', 'U', 2, 0, A, 1, Q, 2, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 0, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + INFOT = 20 + CALL ZHBEVX_2STAGE( 'N', 'A', 'U', 2, 0, A, 1, Q, 2, + $ 0.0D0, 0.0D0, 0, 0, 0.0D0, + $ M, X, Z, 1, W, 0, RW, IW, I3, INFO ) + CALL CHKXER( 'ZHBEVX_2STAGE', INFOT, NOUT, LERR, OK ) + NT = NT + 12 END IF * * Print a summary line. diff --git a/TESTING/Makefile b/TESTING/Makefile index 968a9a2b..9b641e76 100644 --- a/TESTING/Makefile +++ b/TESTING/Makefile @@ -46,6 +46,7 @@ all: single complex double complex16 singleproto doubleproto complexproto co SEIGTST= snep.out \ ssep.out \ + sse2.out \ ssvd.out \ sec.out \ sed.out \ @@ -66,6 +67,7 @@ SEIGTST= snep.out \ CEIGTST= cnep.out \ csep.out \ + cse2.out \ csvd.out \ cec.out \ ced.out \ @@ -86,6 +88,7 @@ CEIGTST= cnep.out \ DEIGTST= dnep.out \ dsep.out \ + dse2.out \ dsvd.out \ dec.out \ ded.out \ @@ -106,6 +109,7 @@ DEIGTST= dnep.out \ ZEIGTST= znep.out \ zsep.out \ + zse2.out \ zsvd.out \ zec.out \ zed.out \ @@ -223,6 +227,10 @@ ssep.out: sep.in xeigtsts @echo SEP: Testing Symmetric Eigenvalue Problem routines ./xeigtsts < sep.in > $@ 2>&1 +sse2.out: se2.in xeigtsts + @echo SEP: Testing Symmetric Eigenvalue Problem routines + ./xeigtsts < se2.in > $@ 2>&1 + ssvd.out: svd.in xeigtsts @echo SVD: Testing Singular Value Decomposition routines ./xeigtsts < svd.in > $@ 2>&1 @@ -301,6 +309,10 @@ csep.out: sep.in xeigtstc @echo SEP: Testing Symmetric Eigenvalue Problem routines ./xeigtstc < sep.in > $@ 2>&1 +cse2.out: se2.in xeigtstc + @echo SEP: Testing Symmetric Eigenvalue Problem routines + ./xeigtstc < se2.in > $@ 2>&1 + csvd.out: svd.in xeigtstc @echo SVD: Testing Singular Value Decomposition routines ./xeigtstc < svd.in > $@ 2>&1 @@ -379,6 +391,10 @@ dsep.out: sep.in xeigtstd @echo SEP: Testing Symmetric Eigenvalue Problem routines ./xeigtstd < sep.in > $@ 2>&1 +dse2.out: se2.in xeigtstd + @echo SEP: Testing Symmetric Eigenvalue Problem routines + ./xeigtstd < se2.in > $@ 2>&1 + dsvd.out: svd.in xeigtstd @echo SVD: Testing Singular Value Decomposition routines ./xeigtstd < svd.in > $@ 2>&1 @@ -457,6 +473,10 @@ zsep.out: sep.in xeigtstz @echo SEP: Testing Symmetric Eigenvalue Problem routines ./xeigtstz < sep.in > $@ 2>&1 +zse2.out: se2.in xeigtstz + @echo SEP: Testing Symmetric Eigenvalue Problem routines + ./xeigtstz < se2.in > $@ 2>&1 + zsvd.out: svd.in xeigtstz @echo SVD: Testing Singular Value Decomposition routines ./xeigtstz < svd.in > $@ 2>&1 diff --git a/TESTING/se2.in b/TESTING/se2.in new file mode 100644 index 00000000..e20649c9 --- /dev/null +++ b/TESTING/se2.in @@ -0,0 +1,15 @@ +SE2: Data file for testing Symmetric Eigenvalue Problem routines +6 Number of values of N +0 1 2 3 5 20 Values of N (dimension) +5 Number of values of NB +1 3 3 3 10 Values of NB (blocksize) +2 2 2 2 2 Values of NBMIN (minimum blocksize) +1 0 5 9 1 Values of NX (crossover point) +50.0 Threshold value +T Put T to test the LAPACK routines +T Put T to test the driver routines +T Put T to test the error exits +1 Code to interpret the seed +SE2 20 +1 2 3 4 5 6 7 8 10 11 12 13 14 15 16 17 18 19 20 21 + |