aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoey Ye <joey.ye@intel.com>2008-08-21 03:01:00 +0000
committerJoey Ye <joey.ye@intel.com>2008-08-21 03:01:00 +0000
commit4943ddd03a9baf2d2c73eb5b84a1b0f1a42d3242 (patch)
tree3ceb9998cd61cae1f74db320a7c86754cb595d04
parent1eacf76f715878ba0653ed13eecb9abe862f4f44 (diff)
* doc/extend.texi: Document AVX built-in function.
* doc/invoke.texi: Document -mavx. git-svn-id: https://gcc.gnu.org/svn/gcc/branches/ix86/avx@139371 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog.avx5
-rw-r--r--gcc/doc/extend.texi135
-rw-r--r--gcc/doc/invoke.texi10
3 files changed, 148 insertions, 2 deletions
diff --git a/gcc/ChangeLog.avx b/gcc/ChangeLog.avx
index ff3732717a9..7dab0dbbc08 100644
--- a/gcc/ChangeLog.avx
+++ b/gcc/ChangeLog.avx
@@ -1,3 +1,8 @@
+2008-08-20 Joey Ye <joey.ye@intel.com>
+
+ * doc/extend.texi: Document AVX built-in function.
+ * doc/invoke.texi: Document -mavx.
+
2008-08-20 Xuepeng Guo <xuepeng.guo@intel.com>
* config/i386/sse.md (*avx_aesenc): New.
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 1f8cbd3031c..760c1288293 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -8271,6 +8271,141 @@ depending on the size of @code{unsigned long}.
Generates the @code{popcntq} machine instruction.
@end table
+The following built-in functions are available when @option{-mavx} is
+used. All of them generate the machine instruction that is part of the
+name.
+
+@smallexample
+v4df __builtin_ia32_addpd256 (v4df,v4df)
+v8sf __builtin_ia32_addps256 (v8sf,v8sf)
+v4df __builtin_ia32_addsubpd256 (v4df,v4df)
+v8sf __builtin_ia32_addsubps256 (v8sf,v8sf)
+v4df __builtin_ia32_andnpd256 (v4df,v4df)
+v8sf __builtin_ia32_andnps256 (v8sf,v8sf)
+v4df __builtin_ia32_andpd256 (v4df,v4df)
+v8sf __builtin_ia32_andps256 (v8sf,v8sf)
+v4df __builtin_ia32_blendpd256 (v4df,v4df,int)
+v8sf __builtin_ia32_blendps256 (v8sf,v8sf,int)
+v4df __builtin_ia32_blendvpd256 (v4df,v4df,v4df)
+v8sf __builtin_ia32_blendvps256 (v8sf,v8sf,v8sf)
+v2df __builtin_ia32_cmppd (v2df,v2df,int)
+v4df __builtin_ia32_cmppd256 (v4df,v4df,int)
+v4sf __builtin_ia32_cmpps (v4sf,v4sf,int)
+v8sf __builtin_ia32_cmpps256 (v8sf,v8sf,int)
+v2df __builtin_ia32_cmpsd (v2df,v2df,int)
+v4sf __builtin_ia32_cmpss (v4sf,v4sf,int)
+v4df __builtin_ia32_cvtdq2pd256 (v4si)
+v8sf __builtin_ia32_cvtdq2ps256 (v8si)
+v4si __builtin_ia32_cvtpd2dq256 (v4df)
+v4sf __builtin_ia32_cvtpd2ps256 (v4df)
+v8si __builtin_ia32_cvtps2dq256 (v8sf)
+v4df __builtin_ia32_cvtps2pd256 (v4sf)
+v4si __builtin_ia32_cvttpd2dq256 (v4df)
+v8si __builtin_ia32_cvttps2dq256 (v8sf)
+v4df __builtin_ia32_divpd256 (v4df,v4df)
+v8sf __builtin_ia32_divps256 (v8sf,v8sf)
+v8sf __builtin_ia32_dpps256 (v8sf,v8sf,int)
+v4df __builtin_ia32_haddpd256 (v4df,v4df)
+v8sf __builtin_ia32_haddps256 (v8sf,v8sf)
+v4df __builtin_ia32_hsubpd256 (v4df,v4df)
+v8sf __builtin_ia32_hsubps256 (v8sf,v8sf)
+v32qi __builtin_ia32_lddqu256 (pcchar)
+v32qi __builtin_ia32_loaddqu256 (pcchar)
+v4df __builtin_ia32_loadupd256 (pcdouble)
+v8sf __builtin_ia32_loadups256 (pcfloat)
+v2df __builtin_ia32_maskloadpd (pcv2df,v2df)
+v4df __builtin_ia32_maskloadpd256 (pcv4df,v4df)
+v4sf __builtin_ia32_maskloadps (pcv4sf,v4sf)
+v8sf __builtin_ia32_maskloadps256 (pcv8sf,v8sf)
+void __builtin_ia32_maskstorepd (pv2df,v2df,v2df)
+void __builtin_ia32_maskstorepd256 (pv4df,v4df,v4df)
+void __builtin_ia32_maskstoreps (pv4sf,v4sf,v4sf)
+void __builtin_ia32_maskstoreps256 (pv8sf,v8sf,v8sf)
+v4df __builtin_ia32_maxpd256 (v4df,v4df)
+v8sf __builtin_ia32_maxps256 (v8sf,v8sf)
+v4df __builtin_ia32_minpd256 (v4df,v4df)
+v8sf __builtin_ia32_minps256 (v8sf,v8sf)
+v4df __builtin_ia32_movddup256 (v4df)
+int __builtin_ia32_movmskpd256 (v4df)
+int __builtin_ia32_movmskps256 (v8sf)
+v8sf __builtin_ia32_movshdup256 (v8sf)
+v8sf __builtin_ia32_movsldup256 (v8sf)
+v4df __builtin_ia32_mulpd256 (v4df,v4df)
+v8sf __builtin_ia32_mulps256 (v8sf,v8sf)
+v4df __builtin_ia32_orpd256 (v4df,v4df)
+v8sf __builtin_ia32_orps256 (v8sf,v8sf)
+v2df __builtin_ia32_pd_pd256 (v4df)
+v4df __builtin_ia32_pd256_pd (v2df)
+v4sf __builtin_ia32_ps_ps256 (v8sf)
+v8sf __builtin_ia32_ps256_ps (v4sf)
+int __builtin_ia32_ptestc256 (v4di,v4di,ptest)
+int __builtin_ia32_ptestnzc256 (v4di,v4di,ptest)
+int __builtin_ia32_ptestz256 (v4di,v4di,ptest)
+v8sf __builtin_ia32_rcpps256 (v8sf)
+v4df __builtin_ia32_roundpd256 (v4df,int)
+v8sf __builtin_ia32_roundps256 (v8sf,int)
+v8sf __builtin_ia32_rsqrtps_nr256 (v8sf)
+v8sf __builtin_ia32_rsqrtps256 (v8sf)
+v4df __builtin_ia32_shufpd256 (v4df,v4df,int)
+v8sf __builtin_ia32_shufps256 (v8sf,v8sf,int)
+v4si __builtin_ia32_si_si256 (v8si)
+v8si __builtin_ia32_si256_si (v4si)
+v4df __builtin_ia32_sqrtpd256 (v4df)
+v8sf __builtin_ia32_sqrtps_nr256 (v8sf)
+v8sf __builtin_ia32_sqrtps256 (v8sf)
+void __builtin_ia32_storedqu256 (pchar,v32qi)
+void __builtin_ia32_storeupd256 (pdouble,v4df)
+void __builtin_ia32_storeups256 (pfloat,v8sf)
+v4df __builtin_ia32_subpd256 (v4df,v4df)
+v8sf __builtin_ia32_subps256 (v8sf,v8sf)
+v4df __builtin_ia32_unpckhpd256 (v4df,v4df)
+v8sf __builtin_ia32_unpckhps256 (v8sf,v8sf)
+v4df __builtin_ia32_unpcklpd256 (v4df,v4df)
+v8sf __builtin_ia32_unpcklps256 (v8sf,v8sf)
+v4df __builtin_ia32_vbroadcastf128_pd256 (pcv2df)
+v8sf __builtin_ia32_vbroadcastf128_ps256 (pcv4sf)
+v4df __builtin_ia32_vbroadcastsd256 (pcdouble)
+v4sf __builtin_ia32_vbroadcastss (pcfloat)
+v8sf __builtin_ia32_vbroadcastss256 (pcfloat)
+v2df __builtin_ia32_vextractf128_pd256 (v4df,int)
+v4sf __builtin_ia32_vextractf128_ps256 (v8sf,int)
+v4si __builtin_ia32_vextractf128_si256 (v8si,int)
+v4df __builtin_ia32_vinsertf128_pd256 (v4df,v2df,int)
+v8sf __builtin_ia32_vinsertf128_ps256 (v8sf,v4sf,int)
+v8si __builtin_ia32_vinsertf128_si256 (v8si,v4si,int)
+v4df __builtin_ia32_vperm2f128_pd256 (v4df,v4df,int)
+v8sf __builtin_ia32_vperm2f128_ps256 (v8sf,v8sf,int)
+v8si __builtin_ia32_vperm2f128_si256 (v8si,v8si,int)
+v2df __builtin_ia32_vpermil2pd (v2df,v2df,v2di,int)
+v4df __builtin_ia32_vpermil2pd256 (v4df,v4df,v4di,int)
+v4sf __builtin_ia32_vpermil2ps (v4sf,v4sf,v4si,int)
+v8sf __builtin_ia32_vpermil2ps256 (v8sf,v8sf,v8si,int)
+v2df __builtin_ia32_vpermilpd (v2df,int)
+v4df __builtin_ia32_vpermilpd256 (v4df,int)
+v4sf __builtin_ia32_vpermilps (v4sf,int)
+v8sf __builtin_ia32_vpermilps256 (v8sf,int)
+v2df __builtin_ia32_vpermilvarpd (v2df,v2di)
+v4df __builtin_ia32_vpermilvarpd256 (v4df,v4di)
+v4sf __builtin_ia32_vpermilvarps (v4sf,v4si)
+v8sf __builtin_ia32_vpermilvarps256 (v8sf,v8si)
+int __builtin_ia32_vtestcpd (v2df,v2df,ptest)
+int __builtin_ia32_vtestcpd256 (v4df,v4df,ptest)
+int __builtin_ia32_vtestcps (v4sf,v4sf,ptest)
+int __builtin_ia32_vtestcps256 (v8sf,v8sf,ptest)
+int __builtin_ia32_vtestnzcpd (v2df,v2df,ptest)
+int __builtin_ia32_vtestnzcpd256 (v4df,v4df,ptest)
+int __builtin_ia32_vtestnzcps (v4sf,v4sf,ptest)
+int __builtin_ia32_vtestnzcps256 (v8sf,v8sf,ptest)
+int __builtin_ia32_vtestzpd (v2df,v2df,ptest)
+int __builtin_ia32_vtestzpd256 (v4df,v4df,ptest)
+int __builtin_ia32_vtestzps (v4sf,v4sf,ptest)
+int __builtin_ia32_vtestzps256 (v8sf,v8sf,ptest)
+void __builtin_ia32_vzeroall (void)
+void __builtin_ia32_vzeroupper (void)
+v4df __builtin_ia32_xorpd256 (v4df,v4df)
+v8sf __builtin_ia32_xorps256 (v8sf,v8sf)
+@end smallexample
+
The following built-in functions are available when @option{-maes} is
used. All of them generate the machine instruction that is part of the
name.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 1280e4928b1..2876d54ac11 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -559,7 +559,7 @@ Objective-C and Objective-C++ Dialects}.
-mpreferred-stack-boundary=@var{num}
-mincoming-stack-boundary=@var{num}
-mcld -mcx16 -msahf -mrecip @gol
--mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 @gol
+-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
-maes -mpclmul @gol
-msse4a -m3dnow -mpopcnt -mabm -msse5 @gol
-mthreads -mno-align-stringops -minline-all-stringops @gol
@@ -10798,6 +10798,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@itemx -mno-sse4.2
@itemx -msse4
@itemx -mno-sse4
+@itemx -mavx
+@itemx -mno-avx
@itemx -maes
@itemx -mno-aes
@itemx -mpclmul
@@ -10819,7 +10821,7 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@opindex m3dnow
@opindex mno-3dnow
These switches enable or disable the use of instructions in the MMX,
-SSE, SSE2, SSE3, SSSE3, SSE4.1, AES, PCLMUL, SSE4A, SSE5, ABM or
+SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AES, PCLMUL, SSE4A, SSE5, ABM or
3DNow!@: extended instruction sets.
These extensions are also available as built-in functions: see
@ref{X86 Built-in Functions}, for details of the functions enabled and
@@ -10828,6 +10830,10 @@ disabled by these switches.
To have SSE/SSE2 instructions generated automatically from floating-point
code (as opposed to 387 instructions), see @option{-mfpmath=sse}.
+GCC depresses SSEx instructions when @option{-mavx} is used. Instead, it
+generates new AVX instructions or AVX equivalence for all SSEx instructions
+when needed.
+
These options will enable GCC to use these extended instructions in
generated code, even without @option{-mfpmath=sse}. Applications which
perform runtime CPU detection must compile separate files for each