From 4943ddd03a9baf2d2c73eb5b84a1b0f1a42d3242 Mon Sep 17 00:00:00 2001 From: Joey Ye Date: Thu, 21 Aug 2008 03:01:00 +0000 Subject: * doc/extend.texi: Document AVX built-in function. * doc/invoke.texi: Document -mavx. git-svn-id: https://gcc.gnu.org/svn/gcc/branches/ix86/avx@139371 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.avx | 5 ++ gcc/doc/extend.texi | 135 ++++++++++++++++++++++++++++++++++++++++++++++++++++ gcc/doc/invoke.texi | 10 +++- 3 files changed, 148 insertions(+), 2 deletions(-) diff --git a/gcc/ChangeLog.avx b/gcc/ChangeLog.avx index ff3732717a9..7dab0dbbc08 100644 --- a/gcc/ChangeLog.avx +++ b/gcc/ChangeLog.avx @@ -1,3 +1,8 @@ +2008-08-20 Joey Ye + + * doc/extend.texi: Document AVX built-in function. + * doc/invoke.texi: Document -mavx. + 2008-08-20 Xuepeng Guo * config/i386/sse.md (*avx_aesenc): New. diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 1f8cbd3031c..760c1288293 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -8271,6 +8271,141 @@ depending on the size of @code{unsigned long}. Generates the @code{popcntq} machine instruction. @end table +The following built-in functions are available when @option{-mavx} is +used. All of them generate the machine instruction that is part of the +name. + +@smallexample +v4df __builtin_ia32_addpd256 (v4df,v4df) +v8sf __builtin_ia32_addps256 (v8sf,v8sf) +v4df __builtin_ia32_addsubpd256 (v4df,v4df) +v8sf __builtin_ia32_addsubps256 (v8sf,v8sf) +v4df __builtin_ia32_andnpd256 (v4df,v4df) +v8sf __builtin_ia32_andnps256 (v8sf,v8sf) +v4df __builtin_ia32_andpd256 (v4df,v4df) +v8sf __builtin_ia32_andps256 (v8sf,v8sf) +v4df __builtin_ia32_blendpd256 (v4df,v4df,int) +v8sf __builtin_ia32_blendps256 (v8sf,v8sf,int) +v4df __builtin_ia32_blendvpd256 (v4df,v4df,v4df) +v8sf __builtin_ia32_blendvps256 (v8sf,v8sf,v8sf) +v2df __builtin_ia32_cmppd (v2df,v2df,int) +v4df __builtin_ia32_cmppd256 (v4df,v4df,int) +v4sf __builtin_ia32_cmpps (v4sf,v4sf,int) +v8sf __builtin_ia32_cmpps256 (v8sf,v8sf,int) +v2df __builtin_ia32_cmpsd (v2df,v2df,int) +v4sf __builtin_ia32_cmpss (v4sf,v4sf,int) +v4df __builtin_ia32_cvtdq2pd256 (v4si) +v8sf __builtin_ia32_cvtdq2ps256 (v8si) +v4si __builtin_ia32_cvtpd2dq256 (v4df) +v4sf __builtin_ia32_cvtpd2ps256 (v4df) +v8si __builtin_ia32_cvtps2dq256 (v8sf) +v4df __builtin_ia32_cvtps2pd256 (v4sf) +v4si __builtin_ia32_cvttpd2dq256 (v4df) +v8si __builtin_ia32_cvttps2dq256 (v8sf) +v4df __builtin_ia32_divpd256 (v4df,v4df) +v8sf __builtin_ia32_divps256 (v8sf,v8sf) +v8sf __builtin_ia32_dpps256 (v8sf,v8sf,int) +v4df __builtin_ia32_haddpd256 (v4df,v4df) +v8sf __builtin_ia32_haddps256 (v8sf,v8sf) +v4df __builtin_ia32_hsubpd256 (v4df,v4df) +v8sf __builtin_ia32_hsubps256 (v8sf,v8sf) +v32qi __builtin_ia32_lddqu256 (pcchar) +v32qi __builtin_ia32_loaddqu256 (pcchar) +v4df __builtin_ia32_loadupd256 (pcdouble) +v8sf __builtin_ia32_loadups256 (pcfloat) +v2df __builtin_ia32_maskloadpd (pcv2df,v2df) +v4df __builtin_ia32_maskloadpd256 (pcv4df,v4df) +v4sf __builtin_ia32_maskloadps (pcv4sf,v4sf) +v8sf __builtin_ia32_maskloadps256 (pcv8sf,v8sf) +void __builtin_ia32_maskstorepd (pv2df,v2df,v2df) +void __builtin_ia32_maskstorepd256 (pv4df,v4df,v4df) +void __builtin_ia32_maskstoreps (pv4sf,v4sf,v4sf) +void __builtin_ia32_maskstoreps256 (pv8sf,v8sf,v8sf) +v4df __builtin_ia32_maxpd256 (v4df,v4df) +v8sf __builtin_ia32_maxps256 (v8sf,v8sf) +v4df __builtin_ia32_minpd256 (v4df,v4df) +v8sf __builtin_ia32_minps256 (v8sf,v8sf) +v4df __builtin_ia32_movddup256 (v4df) +int __builtin_ia32_movmskpd256 (v4df) +int __builtin_ia32_movmskps256 (v8sf) +v8sf __builtin_ia32_movshdup256 (v8sf) +v8sf __builtin_ia32_movsldup256 (v8sf) +v4df __builtin_ia32_mulpd256 (v4df,v4df) +v8sf __builtin_ia32_mulps256 (v8sf,v8sf) +v4df __builtin_ia32_orpd256 (v4df,v4df) +v8sf __builtin_ia32_orps256 (v8sf,v8sf) +v2df __builtin_ia32_pd_pd256 (v4df) +v4df __builtin_ia32_pd256_pd (v2df) +v4sf __builtin_ia32_ps_ps256 (v8sf) +v8sf __builtin_ia32_ps256_ps (v4sf) +int __builtin_ia32_ptestc256 (v4di,v4di,ptest) +int __builtin_ia32_ptestnzc256 (v4di,v4di,ptest) +int __builtin_ia32_ptestz256 (v4di,v4di,ptest) +v8sf __builtin_ia32_rcpps256 (v8sf) +v4df __builtin_ia32_roundpd256 (v4df,int) +v8sf __builtin_ia32_roundps256 (v8sf,int) +v8sf __builtin_ia32_rsqrtps_nr256 (v8sf) +v8sf __builtin_ia32_rsqrtps256 (v8sf) +v4df __builtin_ia32_shufpd256 (v4df,v4df,int) +v8sf __builtin_ia32_shufps256 (v8sf,v8sf,int) +v4si __builtin_ia32_si_si256 (v8si) +v8si __builtin_ia32_si256_si (v4si) +v4df __builtin_ia32_sqrtpd256 (v4df) +v8sf __builtin_ia32_sqrtps_nr256 (v8sf) +v8sf __builtin_ia32_sqrtps256 (v8sf) +void __builtin_ia32_storedqu256 (pchar,v32qi) +void __builtin_ia32_storeupd256 (pdouble,v4df) +void __builtin_ia32_storeups256 (pfloat,v8sf) +v4df __builtin_ia32_subpd256 (v4df,v4df) +v8sf __builtin_ia32_subps256 (v8sf,v8sf) +v4df __builtin_ia32_unpckhpd256 (v4df,v4df) +v8sf __builtin_ia32_unpckhps256 (v8sf,v8sf) +v4df __builtin_ia32_unpcklpd256 (v4df,v4df) +v8sf __builtin_ia32_unpcklps256 (v8sf,v8sf) +v4df __builtin_ia32_vbroadcastf128_pd256 (pcv2df) +v8sf __builtin_ia32_vbroadcastf128_ps256 (pcv4sf) +v4df __builtin_ia32_vbroadcastsd256 (pcdouble) +v4sf __builtin_ia32_vbroadcastss (pcfloat) +v8sf __builtin_ia32_vbroadcastss256 (pcfloat) +v2df __builtin_ia32_vextractf128_pd256 (v4df,int) +v4sf __builtin_ia32_vextractf128_ps256 (v8sf,int) +v4si __builtin_ia32_vextractf128_si256 (v8si,int) +v4df __builtin_ia32_vinsertf128_pd256 (v4df,v2df,int) +v8sf __builtin_ia32_vinsertf128_ps256 (v8sf,v4sf,int) +v8si __builtin_ia32_vinsertf128_si256 (v8si,v4si,int) +v4df __builtin_ia32_vperm2f128_pd256 (v4df,v4df,int) +v8sf __builtin_ia32_vperm2f128_ps256 (v8sf,v8sf,int) +v8si __builtin_ia32_vperm2f128_si256 (v8si,v8si,int) +v2df __builtin_ia32_vpermil2pd (v2df,v2df,v2di,int) +v4df __builtin_ia32_vpermil2pd256 (v4df,v4df,v4di,int) +v4sf __builtin_ia32_vpermil2ps (v4sf,v4sf,v4si,int) +v8sf __builtin_ia32_vpermil2ps256 (v8sf,v8sf,v8si,int) +v2df __builtin_ia32_vpermilpd (v2df,int) +v4df __builtin_ia32_vpermilpd256 (v4df,int) +v4sf __builtin_ia32_vpermilps (v4sf,int) +v8sf __builtin_ia32_vpermilps256 (v8sf,int) +v2df __builtin_ia32_vpermilvarpd (v2df,v2di) +v4df __builtin_ia32_vpermilvarpd256 (v4df,v4di) +v4sf __builtin_ia32_vpermilvarps (v4sf,v4si) +v8sf __builtin_ia32_vpermilvarps256 (v8sf,v8si) +int __builtin_ia32_vtestcpd (v2df,v2df,ptest) +int __builtin_ia32_vtestcpd256 (v4df,v4df,ptest) +int __builtin_ia32_vtestcps (v4sf,v4sf,ptest) +int __builtin_ia32_vtestcps256 (v8sf,v8sf,ptest) +int __builtin_ia32_vtestnzcpd (v2df,v2df,ptest) +int __builtin_ia32_vtestnzcpd256 (v4df,v4df,ptest) +int __builtin_ia32_vtestnzcps (v4sf,v4sf,ptest) +int __builtin_ia32_vtestnzcps256 (v8sf,v8sf,ptest) +int __builtin_ia32_vtestzpd (v2df,v2df,ptest) +int __builtin_ia32_vtestzpd256 (v4df,v4df,ptest) +int __builtin_ia32_vtestzps (v4sf,v4sf,ptest) +int __builtin_ia32_vtestzps256 (v8sf,v8sf,ptest) +void __builtin_ia32_vzeroall (void) +void __builtin_ia32_vzeroupper (void) +v4df __builtin_ia32_xorpd256 (v4df,v4df) +v8sf __builtin_ia32_xorps256 (v8sf,v8sf) +@end smallexample + The following built-in functions are available when @option{-maes} is used. All of them generate the machine instruction that is part of the name. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 1280e4928b1..2876d54ac11 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -559,7 +559,7 @@ Objective-C and Objective-C++ Dialects}. -mpreferred-stack-boundary=@var{num} -mincoming-stack-boundary=@var{num} -mcld -mcx16 -msahf -mrecip @gol --mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 @gol +-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol -maes -mpclmul @gol -msse4a -m3dnow -mpopcnt -mabm -msse5 @gol -mthreads -mno-align-stringops -minline-all-stringops @gol @@ -10798,6 +10798,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @itemx -mno-sse4.2 @itemx -msse4 @itemx -mno-sse4 +@itemx -mavx +@itemx -mno-avx @itemx -maes @itemx -mno-aes @itemx -mpclmul @@ -10819,7 +10821,7 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @opindex m3dnow @opindex mno-3dnow These switches enable or disable the use of instructions in the MMX, -SSE, SSE2, SSE3, SSSE3, SSE4.1, AES, PCLMUL, SSE4A, SSE5, ABM or +SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AES, PCLMUL, SSE4A, SSE5, ABM or 3DNow!@: extended instruction sets. These extensions are also available as built-in functions: see @ref{X86 Built-in Functions}, for details of the functions enabled and @@ -10828,6 +10830,10 @@ disabled by these switches. To have SSE/SSE2 instructions generated automatically from floating-point code (as opposed to 387 instructions), see @option{-mfpmath=sse}. +GCC depresses SSEx instructions when @option{-mavx} is used. Instead, it +generates new AVX instructions or AVX equivalence for all SSEx instructions +when needed. + These options will enable GCC to use these extended instructions in generated code, even without @option{-mfpmath=sse}. Applications which perform runtime CPU detection must compile separate files for each -- cgit v1.2.3