aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author(no author) <(no author)@138bc75d-0d04-0410-961f-82ee72b054a4>2002-11-13 11:19:59 +0000
committer(no author) <(no author)@138bc75d-0d04-0410-961f-82ee72b054a4>2002-11-13 11:19:59 +0000
commitde824608e40e2fe2b72a0709bdaedecfab8f093e (patch)
treeda645c3b88649cb0e97e0abf63c3e387ba199537
parentbb21973da28b9dcaf5b0a2ea14ec715c498297a3 (diff)
This commit was manufactured by cvs2svn to create taghammer-3_3-merge-20021211
'hammer-3_3-merge-20021211'. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/tags/hammer-3_3-merge-20021211@59074 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--config/mpw-mh-mpw157
-rw-r--r--config/mpw/ChangeLog53
-rw-r--r--config/mpw/MoveIfChange19
-rw-r--r--config/mpw/README23
-rw-r--r--config/mpw/forward-include3
-rw-r--r--config/mpw/g-mpw-make.sed293
-rw-r--r--config/mpw/mpw-touch7
-rw-r--r--config/mpw/mpw-true1
-rw-r--r--config/mpw/null-command1
-rw-r--r--config/mpw/open-brace4
-rw-r--r--config/mpw/tr-7to8-src9
-rw-r--r--config/mpw/true1
-rw-r--r--gcc/ChangeLog.hammer109
-rw-r--r--gcc/config/i386/athlon.md573
-rw-r--r--gcc/config/i386/i386.c117
-rw-r--r--gcc/config/i386/i386.h10
-rw-r--r--gcc/config/i386/i386.md821
-rw-r--r--gcc/config/i386/scodbx.h84
-rw-r--r--gcc/config/i386/t-darwin11
-rw-r--r--gcc/config/i386/xm-dgux.h4
-rw-r--r--gcc/config/i386/xm-sun.h21
-rw-r--r--gcc/config/i386/xm-sysv3.h3
-rw-r--r--gcc/testsuite/g++.old-deja/g++.robertl/eb42.C19
23 files changed, 1885 insertions, 458 deletions
diff --git a/config/mpw-mh-mpw b/config/mpw-mh-mpw
new file mode 100644
index 00000000000..543ef4fb2a1
--- /dev/null
+++ b/config/mpw-mh-mpw
@@ -0,0 +1,157 @@
+# This is an MPW makefile fragment.
+
+# Since there are a multiplicity of Mac compilers and two different
+# processors, this file is primarily a library of options for each
+# compiler. Somebody else (such as a configure or build script) will
+# make the actual choice.
+
+# Compiler to use for compiling.
+
+CC_MPW_C = C -d MPW_C -d ALMOST_STDC -d ANSI_PROTOTYPES -d MPW -mc68020 -model far -b -w
+
+CC_SC = SC -d ALMOST_STDC -d ANSI_PROTOTYPES -d MPW -mc68020 -model far -b -i '' -i :
+
+CC_MWC68K = MWC68K -d MPW -enum int -mpw_chars -sym on -w off -mc68020 -model far
+
+CC_PPCC = PPCC -d powerc=1 -d pascal= -d ALMOST_STDC -d ANSI_PROTOTYPES -d MPW -w
+
+CC_MRC = MrC -d powerc=1 -d pascal= -d ALMOST_STDC -d ANSI_PROTOTYPES -d MPW -i '' -i : -jm
+
+CC_SMrC = SMrC -d MPW
+
+# "-mpw_chars" is necessary because GNU sources often mix signed and
+# unsigned casually.
+# "-w off" is not a great idea, but CW7 is complaining about enum
+# assignments.
+# "-opt global,peep,l4,speed" is sometimes good, and sometimes bad.
+# We must use {CIncludes} so that MPW tools will work; {MWCIncludes}
+# defines stdout, islower, etc, in ways that are incompatible with MPW's
+# runtime. However, this cannot be done via -i "{CIncludes}", since
+# that does not affect how <>-type includes happen; instead, the variable
+# MWCIncludes must be set to point at {CIncludes}.
+
+CC_MWCPPC = MWCPPC -d MPW -enum int -mpw_chars -sym on -w off
+
+# Note that GCC does *not* wire in a definition of "pascal", so that
+# it can be handled in another way if desired.
+
+CC_68K_GCC = gC -Dpascal= -DANSI_PROTOTYPES -DMPW
+
+CC_PPC_GCC = gC -Dpowerc=1 -Dpascal= -DANSI_PROTOTYPES -DMPW
+
+# Nothing for the default CFLAGS.
+
+CFLAGS =
+
+# Tool to use for making libraries/archives.
+
+AR_LIB = Lib
+
+AR_MWLINK68K = MWLink68K -xm library
+
+AR_PPCLINK = PPCLink -xm library
+
+AR_MWLINKPPC = MWLinkPPC -xm library
+
+AR_AR = ar
+
+AR_FLAGS = -o
+
+RANLIB_NULL = null-command
+
+RANLIB_RANLIB = ranlib
+
+# Compiler and/or linker to use for linking.
+
+CC_LD_LINK = Link -w -d -model far {CC_LD_TOOL_FLAGS}
+
+CC_LD_MWLINK68K = MWLink68K -w {CC_LD_TOOL_FLAGS} -sym on -model far
+
+CC_LD_PPCLINK = PPCLink -main __start -outputformat xcoff
+
+CC_LD_MWLINKPPC = MWLinkPPC -w {CC_LD_TOOL_FLAGS} -sym on
+
+CC_LD_GLD = gC
+
+# Extension for linker output.
+
+PROG_EXT_68K =
+
+PROG_EXT_XCOFF = .xcoff
+
+# Nothing for the default LDFLAGS.
+
+LDFLAGS = -w
+
+CC_LD_TOOL_FLAGS = -c 'MPS ' -t MPST
+
+# Libraries to link against.
+
+# It would appear that the math libraries are not
+# needed except to provide a definition for scalb,
+# which is called from ldexp, which is referenced
+# in the m68k opcodes library.
+
+EXTRALIBS_C = \Option-d
+ "{CLibraries}"StdClib.o \Option-d
+ "{CLibraries}"Math.o \Option-d
+ "{CLibraries}"CSANELib.o \Option-d
+ "{Libraries}"Stubs.o \Option-d
+ "{Libraries}"Runtime.o \Option-d
+ "{Libraries}"Interface.o \Option-d
+ "{Libraries}"ToolLibs.o
+
+EXTRALIBS_MWC68K = \Option-d
+ "{CLibraries}"StdClib.o \Option-d
+ "{CLibraries}"Math.o \Option-d
+ "{CLibraries}"CSANELib.o \Option-d
+ "{Libraries}"Stubs.o \Option-d
+ "{Libraries}"Runtime.o \Option-d
+ "{Libraries}"Interface.o \Option-d
+ "{Libraries}"ToolLibs.o \Option-d
+ "{MW68KLibraries}MPW ANSI (4i) C.68K.Lib"
+
+EXTRALIBS_PPC_XCOFF = \Option-d
+ "{PPCLibraries}"StdCRuntime.o \Option-d
+ "{PPCLibraries}"InterfaceLib.xcoff \Option-d
+ "{PPCLibraries}"MathLib.xcoff \Option-d
+ "{PPCLibraries}"StdCLib.xcoff \Option-d
+ "{PPCLibraries}"PPCToolLibs.o \Option-d
+ "{PPCLibraries}"PPCCRuntime.o \Option-d
+ "{GCCPPCLibraries}"libgcc.xcoff
+
+EXTRALIBS_PPC = \Option-d
+ "{PPCLibraries}"StdCRuntime.o \Option-d
+ "{SharedLibraries}"InterfaceLib \Option-d
+ "{SharedLibraries}"MathLib \Option-d
+ "{SharedLibraries}"StdCLib \Option-d
+ "{PPCLibraries}"PPCToolLibs.o \Option-d
+ "{PPCLibraries}"PPCCRuntime.o \Option-d
+ "{GCCPPCLibraries}"libgcc.xcoff
+
+EXTRALIBS_MWCPPC = \Option-d
+ "{MWPPCLibraries}"MWStdCRuntime.Lib \Option-d
+ "{MWPPCLibraries}"InterfaceLib \Option-d
+ "{MWPPCLibraries}"StdCLib \Option-d
+ "{MWPPCLibraries}"MathLib \Option-d
+ "{MWPPCLibraries}"PPCToolLibs.o
+
+# Tool to make PEF with, if needed.
+
+MAKEPEF_NULL = null-command
+
+MAKEPEF_PPC = MakePEF
+
+MAKEPEF_FLAGS = \Option-d
+ -l InterfaceLib.xcoff=InterfaceLib \Option-d
+ -l MathLib.xcoff=MathLib \Option-d
+ -l StdCLib.xcoff=StdCLib
+
+MAKEPEF_TOOL_FLAGS = -ft MPST -fc 'MPS '
+
+# Resource compiler to use.
+
+REZ_68K = Rez
+
+REZ_PPC = Rez -d WANT_CFRG
+
diff --git a/config/mpw/ChangeLog b/config/mpw/ChangeLog
new file mode 100644
index 00000000000..3cdefbf7a75
--- /dev/null
+++ b/config/mpw/ChangeLog
@@ -0,0 +1,53 @@
+Tue Nov 26 12:34:12 1996 Stan Shebs <shebs@andros.cygnus.com>
+
+ * g-mpw-make.sed: Fix some comments.
+
+Mon Sep 16 14:42:52 1996 Stan Shebs <shebs@andros.cygnus.com>
+
+ * g-mpw-make.sed (HLDENV): Edit out all references.
+
+Thu Aug 15 19:49:23 1996 Stan Shebs <shebs@andros.cygnus.com>
+
+ * true: New script, identical to mpw-true.
+ * g-mpw-make.sed: Add @DASH_C_FLAG@ and @SEGMENT_FLAG()@
+ to the editors for compile commands.
+
+Thu Aug 1 15:01:42 1996 Stan Shebs <shebs@andros.cygnus.com>
+
+ * mpw-true, mpw-touch, null-command: New scripts.
+ * README: Describe usage in more detail.
+
+Tue Dec 12 14:51:51 1995 Stan Shebs <shebs@andros.cygnus.com>
+
+ * g-mpw-make.sed: Don't edit out "version=" occurrences.
+
+Fri Dec 1 11:46:18 1995 Stan Shebs <shebs@andros.cygnus.com>
+
+ * g-mpw-make.sed (bindir, libdir): Edit the positions of
+ pathname separators to work with other pathnames better.
+
+Tue Nov 7 15:08:07 1995 Stan Shebs <shebs@andros.cygnus.com>
+
+ * g-mpw-make.sed: Add comment about Duplicate vs Catenate,
+ add additional pattern for editing link-compile commands.
+
+Tue Oct 24 14:28:51 1995 Stan Shebs <shebs@andros.cygnus.com>
+
+ * g-mpw-make.sed: Add handling for *.tab.[hc] files.
+ (CHILL_FOR_TARGET, CHILL_LIB): Edit out tricky definitions
+ of these.
+
+Thu Sep 28 21:05:10 1995 Stan Shebs <shebs@andros.cygnus.com>
+
+ * g-mpw-make.sed: New file, generic sed commands to translate
+ Unix makefiles into MPW makefile syntax.
+
+Fri Mar 17 11:51:20 1995 Stan Shebs <shebs@andros.cygnus.com>
+
+ * README: Clarify instructions.
+ * fi: Remove.
+
+Wed Dec 21 15:45:53 1994 Stan Shebs <shebs@andros.cygnus.com>
+
+ * MoveIfChange, README, fi, forward-include, open-brace,
+ tr-7to8-src: New files.
diff --git a/config/mpw/MoveIfChange b/config/mpw/MoveIfChange
new file mode 100644
index 00000000000..0dbc12582f5
--- /dev/null
+++ b/config/mpw/MoveIfChange
@@ -0,0 +1,19 @@
+# Rename a file only if it is different from a previously existing
+# file of the same name. This is useful for keeping make from doing
+# too much work if the contents of a file haven't changed.
+
+# This is an MPW translation of the standard GNU sh script move-if-change.
+
+Set exit 0
+
+If "`exists -f "{2}"`"
+ Compare "{1}" "{2}" >dev:null
+ If {status} != 0
+ Rename -y "{1}" "{2}"
+ Else
+ Echo "{2}" is unchanged
+ Delete -i -y "{1}"
+ End
+Else
+ Rename -y "{1}" "{2}"
+End
diff --git a/config/mpw/README b/config/mpw/README
new file mode 100644
index 00000000000..554700adc81
--- /dev/null
+++ b/config/mpw/README
@@ -0,0 +1,23 @@
+This directory contains MPW scripts and related files that are needed to
+build Cygnus GNU tools for MPW. The scripts should be somewhere on the
+command path; our usual practice has been to have a separate directory
+for the scripts, and put the tools (byacc, flex, and sed at least) there
+also; then it's easier to drag the support bits around as a group, or to
+upgrade MPW versions. The complete package of scripts and tool binaries
+is usually available as pub/mac/buildtools.cpt.hqx on ftp.cygnus.com.
+
+"tr-7to8-src" is actually the source to an MPW script that transforms
+sequences like "\Option-d" into the actual 8-bit chars that MPW needs.
+It's only the source because it can't itself include any 8-bit chars.
+It *can* be processed into a genuine "tr-7to8" by using itself:
+
+ tr-7to8 tr-7to8-src | sed -e 's/Src//' >new-tr-7to8
+
+Use this to verify:
+
+ compare tr-7to8 new-tr-7to8
+
+If you don't have a working tr-7to8, then you will have to manually
+replace all occurrences of "\Option-d" with real Option-d (which looks
+like a delta), then do similarly with all the other "\Option-..."
+strings, and then change "\SrcOption-d" into the string "\Option-d".
diff --git a/config/mpw/forward-include b/config/mpw/forward-include
new file mode 100644
index 00000000000..ddd6bd71105
--- /dev/null
+++ b/config/mpw/forward-include
@@ -0,0 +1,3 @@
+Echo '#include' ¶""{1}"¶" >"{2}".tem
+MoveIfChange "{2}".tem "{2}"
+
diff --git a/config/mpw/g-mpw-make.sed b/config/mpw/g-mpw-make.sed
new file mode 100644
index 00000000000..e7d3c770736
--- /dev/null
+++ b/config/mpw/g-mpw-make.sed
@@ -0,0 +1,293 @@
+# Sed commands to translate Unix makefiles into MPW makefiles.
+# These are nominally generic, but work best on the makefiles used
+# for GNU programs.
+
+# Whack out any commented-out lines that are probably commands;
+# they can only cause trouble later on.
+/^# /d
+
+# Change dependency char.
+/:$/s/:/ \\Option-f/g
+/^[^ :#][^:]*:/s/\([ ]*\):\([ ]*\)/ \\Option-f /g
+
+# Change syntax of Makefile vars.
+/\$/s/\${\([a-zA-Z0-9_-]*\)}/{\1}/g
+/\$/s/\$(\([a-zA-Z0-9_-]*\))/{\1}/g
+/ $@/s/ $@/ {Targ}/
+
+# Double-$ are literals to Unix but not to MPW make.
+/\$\$/s/\$\$/$/g
+
+# Change pathname syntax.
+/\//s,\.\./\/\.\./,:::,g
+/\//s,\.\./,::,g
+/\.\//s,\./,:,g
+/\//s,/,:,g
+# Undo excess changes.
+/and/s,and:or$,and/or,
+/and/s,and:or ,and/or ,
+/want/s,want:need,want/need,
+# Fixing up sed commands.
+/-e/s_":\([^:]*\):d"_"/\1/d"_g
+/-e/s_":\([^:]*\):,:\([^:]*\):d"_"/\1/,/\2/d"_g
+
+/=/s/ = \.$/ = :/
+
+# Make these go away so that later edits not confused.
+/HLDENV/s/{HLDENV}//
+
+# Comment out any explicit srcdir setting.
+/srcdir/s/^srcdir/# srcdir/
+
+/BASEDIR/s/^BASEDIR =.*$/BASEDIR = "{srcroot}"/
+/{BASEDIR}:/s/{BASEDIR}:/{BASEDIR}/g
+/{srcdir}:/s/{srcdir}:/"{srcdir}"/g
+/"{srcdir}":/s/"{srcdir}":/"{srcdir}"/g
+
+# Tweak some conventions that are backwards for the Mac.
+/bindir/s/{exec_prefix}:bin/{exec_prefix}bin:/
+/libdir/s/{exec_prefix}:lib/{exec_prefix}lib:/
+
+# Comment out settings of anything set by mpw host config.
+/CC/s/^CC *=/#CC =/
+/CFLAGS/s/^CFLAGS *=/#CFLAGS =/
+/AR/s/^AR *=/#AR =/
+/AR_FLAGS/s/^AR_FLAGS *=/#AR_FLAGS =/
+/RANLIB/s/^RANLIB *=/#RANLIB =/
+/CC_LD/s/^CC_LD *=/#CC_LD =/
+/LDFLAGS/s/^LDFLAGS *=/#LDFLAGS =/
+
+# Change -I usages.
+/-I/s/-I\./-i :/g
+/-I/s/-I::bfd/-i ::bfd:/g
+/-I/s/-I::include/-i ::include:/g
+/-I/s/-I/-i /g
+
+# Change -D usage.
+/-D/s/\([ =]\)-D\([^ ]*\)/\1-d \2/g
+
+# Change continuation char.
+/\\$/s/\\$/\\Option-d/
+
+# Change wildcard char.
+/\*/s/\*/\\Option-x/g
+
+# Change path of various types of source files. This rule does not allow
+# for file names with multiple dots in the name.
+/\.[chly]/s/\([ ><=]\)\([-a-zA-Z0-9_${}:"]*\)\.\([chly]\)/\1"{s}"\2.\3/g
+/\.[chly]/s/^\([-a-zA-Z0-9_${}:"]*\)\.\([chly]\)/"{s}"\1.\2/
+# Allow files named *.tab.[ch] as a special case.
+/\.tab\.[ch]/s/\([ ><=]\)\([-a-zA-Z0-9_${}:"]*\.tab\)\.\([ch]\)/\1"{s}"\2.\3/g
+/\.tab\.[ch]/s/^\([-a-zA-Z0-9_${}:"]*\.tab\)\.\([ch]\)/"{s}"\1.\2/
+# Fix some overenthusiasms.
+/{s}/s/"{s}""{srcdir}"/"{srcdir}"/g
+/{s}/s/"{s}"{\([a-zA-Z0-9_]*\)dir}/"{\1dir}"/g
+/{s}/s/"{s}"{\([a-zA-Z0-9_]*\)DIR}/"{\1DIR}"/g
+/{s}/s/"{s}""{\([a-zA-Z0-9_]*\)dir}"/"{\1dir}"/g
+/{s}/s/"{s}""{\([a-zA-Z0-9_]*\)DIR}"/"{\1DIR}"/g
+/{s}/s/"{s}":/:/g
+/{s}/s/^"{s}"//g
+/{s}/s/"{s}""{s}"/"{s}"/g
+/{s}/s/"{s}""{srcdir}"/"{s}"/g
+/{s}/s/"{srcdir}""{s}"/"{s}"/g
+
+# The .def files are also typically source files.
+/\.def/s/\([ ><]\)\([-a-zA-Z0-9_${}:"]*\)\.def/\1"{s}"\2.def/g
+/\.def/s/^\([-a-zA-Z0-9_${}:"]*\)\.def/"{s}"\1.def/g
+
+# Change extension and path of objects.
+/\.o/s/\([ =]\)\([-a-zA-Z0-9_${}:"]*\)\.o/\1"{o}"\2.c.o/g
+/\.o/s/^\([-a-zA-Z0-9_${}:"]*\)\.o/"{o}"\1.c.o/
+# Allow *.tab.o files as a special case of a 2-dot-name file.
+/\.o/s/\([ =]\)\([-a-zA-Z0-9_${}:"]*\)\.tab\.o/\1"{o}"\2.tab.c.o/g
+/\.o/s/^\([-a-zA-Z0-9_${}:"]*\)\.tab\.o/"{o}"\1.tab.c.o/
+# Clean up.
+/"{o}"/s/"{o}""{o}"/"{o}"/g
+/"{o}"/s/^"{o}"\([a-zA-Z0-9_]*\)=/\1=/
+
+# Change extension of libs.
+/\.a/s/lib\([a-z]*\)\.a/lib\1.o/g
+
+# Remove non-fail option.
+/-/s/^\([ ]*\)-/\1/
+# Fix overeagernesses - assumes no one-letter commands.
+/^[ ]*[a-z] /s/^\([ ]*\)\([a-z]\) /\1-\2 /
+
+# Remove non-echo option. (watch out for autoconf things)
+/@/s/^\([ ]*\)@/\1/
+
+# Change cp to Duplicate.
+# Catenate is perhaps more accurate, but the pattern would have to
+# identify the output file and add a '>' redirection into it.
+/cp/s/^\([ ]*\)cp /\1Duplicate -d -y /
+# Change mv to Rename.
+/mv/s/^\([ ]*\)mv /\1Rename -y /
+/Rename/s/^\([ ]*\)Rename -y -f/\1Rename -y/
+# Change rm to Delete.
+/rm -rf/s/^\([ ]*\)rm -rf /\1Delete -i -y /
+/rm -f/s/^\([ ]*\)rm -f /\1Delete -i -y /
+/rm/s/^\([ ]*\)rm /\1Delete -i -y /
+# Note that we don't mess with ln - directory-specific scripts
+# must decide what to do with symlinks.
+# Change cat to Catenate.
+/cat/s/^\([ ]*\)cat /\1Catenate /
+# Change touch to mpw-touch.
+/touch/s/^\([ ]*\)touch /\1mpw-touch /
+# Change mkdir to NewFolder.
+/mkdir/s/^\([ ]*\)mkdir /\1NewFolder /
+# Change var setting to Set.
+/=/s/^\([ ]*\)\([-a-zA-Z0-9_]*\)=\([^;]*\); \\Option-d/\1Set \2 \3/
+
+# Change tests.
+/if /s/if \[ *-f \([^ ]*\) ] *; *\\Option-d/If "`Exists "\1"`" != ""/
+/if /s/if \[ *-f \([^ ]*\) ] *; *then *\\Option-d/If "`Exists "\1"`" != ""/
+/if /s/if \[ ! *-f \([^ ]*\) ] *; *\\Option-d/If "`Exists "\1"`" == ""/
+/if /s/if \[ ! *-f \([^ ]*\) ] *; *then \\Option-d/If "`Exists "\1"`" == ""/
+
+/if /s/if \[ *-d \([^ ]*\) ] *; *\\Option-d/If "`Exists "\1"`" != ""/
+/if /s/if \[ *-d \([^ ]*\) ] *; *then *\\Option-d/If "`Exists "\1"`" != ""/
+/if /s/if \[ ! *-d \([^ ]*\) ] *; *\\Option-d/If "`Exists "\1"`" == ""/
+/if /s/if \[ ! *-d \([^ ]*\) ] *; *then *\\Option-d/If "`Exists "\1"`" == ""/
+
+/if /s/if \[ -d \([^ ]*\) ] *; then true *; else mkdir \([^ ;]*\) *; fi/If "`Exists "\1"`" != "" NewFolder \2 End If/
+
+/if /s/if \[ \([^ ]*\) = \([^ ]*\) ] *; *\\Option-d/If "\1" == "\2"/
+/if /s/if \[ \([^ ]*\) = \([^ ]*\) ] *; *then *\\Option-d/If "\1" == "\2"/
+
+/if /s/if \[ \([^ ]*\) != \([^ ]*\) ] *; *\\Option-d/If "\1" != "\2"/
+/if /s/if \[ \([^ ]*\) != \([^ ]*\) ] *; *then *\\Option-d/If "\1" != "\2"/
+
+/if /s/if \[ \([^ ]*\) -eq \([^ ]*\) ] *; *\\Option-d/If "\1" != "\2"/
+/if /s/if \[ \([^ ]*\) -eq \([^ ]*\) ] *; *then *\\Option-d/If "\1" != "\2"/
+
+/^[ ]*else true$/c\
+ Else\
+ mpw-true\
+
+
+/else/s/^\([ ]*\)else[ ]*$/\1Else/
+/else/s/^\([ ]*\)else[; ]*\\Option-d$/\1Else/
+
+/^[ ]*else[ ]*true[ ]*$/c\
+ Else\
+ mpw-true
+
+/^[ ]*else[ ]*true[; ]*fi$/c\
+ Else\
+ mpw-true\
+ End If
+
+/fi/s/^\([ ]*\)fi *$/\1End/
+/fi/s/^\([ ]*\)fi *; *\\Option-d/\1End/
+
+# Change looping.
+/for/s/^\([ ]*\)for \([-a-zA-Z0-9_]*\) in \([^;]*\); *do *\\Option-d/\1For \2 In \3/
+/^\([ ]*\)do *\\Option-d/d
+/done/s/^\([ ]*\)done *; *\\Option-d/\1End/
+/done/s/^\([ ]*\)done$/\1End/
+
+# Trailing semicolons and continued lines are unneeded sh syntax.
+/; \\Option-d/s/; \\Option-d//
+
+# Change move-if-change to MoveIfChange.
+/move-if-change/s/\([^ ]*\)move-if-change/MoveIfChange/g
+
+# Change $(SHELL) to the script name by itself.
+/SHELL/s/^\([ ]*\){SHELL} /\1/
+
+# Change syntax of default rule dependency.
+/^\.c\.o/s/^\.c\.o \\Option-f$/.c.o \\Option-f .c/
+
+# Change default rule's action.
+/{CC} -c/s/{CC} -c \(.*\) \$<$/{CC} @DASH_C_FLAG@ {DepDir}{Default}.c \1 @SEGMENT_FLAG({Default})@ -o {TargDir}{Default}.c.o/
+
+# This is pretty disgusting, but I can't seem to detect empty rules.
+/Option-f$/s/Option-f$/Option-f _oldest/g
+
+# Remove -c from explicit compiler calls. (but should not if GCC)
+# Handle the case of a source file that is "{xxx}"file.c.
+/ -c /s/{\([A-Z_]*\)CC}\(.*\) -c \(.*\)"\([^"]*\)"\([-a-z_]*\)\.c/{\1CC}\2 @DASH_C_FLAG@ \3"\4"\5.c -o "{o}"\5.c.o/
+# Handle the case of a source file that is "{xxx}"dir:file.c.
+/ -c /s/{\([A-Z_]*\)CC}\(.*\) -c \(.*\)"\([^"]*\)"\([-a-z_]*\):\([-a-z_]*\)\.c/{\1CC}\2 @DASH_C_FLAG@ \3"\4"\5:\6.c -o "{o}"\6.c.o/
+
+# Change linking cc to linking sequence.
+/-o/s/^\([ ]*\){CC} \(.*\){\([A-Z_]*\)CFLAGS} \(.*\){LDFLAGS} \(.*\)-o \([^ ]*\) \(.*\)$/\1{CC_LD} \2 {\3CFLAGS} \4 {LDFLAGS} \5 -o \6{PROG_EXT} \7\
+\1{MAKEPEF} \6{PROG_EXT} -o \6 {MAKEPEF_TOOL_FLAGS} {MAKEPEF_FLAGS}\
+\1{REZ} "{s}"\6.r -o \6 -append -d PROG_NAME='"'\6'"' -d VERSION_STRING='"'{version}'"'/
+/-o/s/^\([ ]*\){CC} \(.*\){\([A-Z_]*\)CFLAGS} \(.*\)-o \([^ ]*\) \(.*\){LDFLAGS} \(.*\)$/\1{CC_LD} \2 {\3CFLAGS} \4 {LDFLAGS} \6 -o \5{PROG_EXT} \7\
+\1{MAKEPEF} \5{PROG_EXT} -o \5 {MAKEPEF_TOOL_FLAGS} {MAKEPEF_FLAGS}\
+\1{REZ} "{s}"\5.r -o \5 -append -d PROG_NAME='"'\5'"' -d VERSION_STRING='"'{version}'"'/
+/-o/s/^\([ ]*\){HOST_CC} \(.*\)-o \([^ ]*\) \(.*\)$/\1{HOST_CC_LD} \2 -o \3{PROG_EXT} \4\
+\1{MAKEPEF} \3{PROG_EXT} -o \3 {MAKEPEF_TOOL_FLAGS} {MAKEPEF_FLAGS}\
+\1{REZ} "{s}"\3.r -o \3 -append -d PROG_NAME='"'\3'"' -d VERSION_STRING='"'{version}'"'/
+
+# Comment out .NOEXPORT rules.
+/\.NOEXPORT/s/^\.NOEXPORT/#\.NOEXPORT/
+# Comment out .PHONY rules.
+/\.PHONY/s/^\.PHONY/#\.PHONY/
+# Comment out .PRECIOUS rules.
+/\.PRECIOUS/s/^\.PRECIOUS/#\.PRECIOUS/
+# Comment out .SUFFIXES rules.
+/\.SUFFIXES/s/^\.SUFFIXES/#\.SUFFIXES/
+
+# Set the install program appropriately.
+/INSTALL/s/^INSTALL *= *`.*`:install.sh -c/INSTALL = Duplicate -y/
+
+# Don't try to decide whether to use the tree's own tools.
+/bison/s/`.*bison:bison.*`/bison -y/
+/byacc/s/`.*byacc:byacc.*`/byacc/
+/flex/s/`.*flex:flex.*`/flex/
+
+# Turn transformed C comments in echo commands back into comments.
+/echo/s,echo '\(.*\):\\Option-x\(.*\)\\Option-x:\(.*\)',echo '\1/*\2*/\3',
+
+# Whack out various clever expressions that search for tools, since
+# the clever code is too /bin/sh specific.
+
+/^AR_FOR_TARGET = `/,/`$/c\
+AR_FOR_TARGET = ::binutils:ar\
+
+
+/^RANLIB_FOR_TARGET = `/,/`$/c\
+RANLIB_FOR_TARGET = ::binutils:ranlib\
+
+
+/^RANLIB_TEST_FOR_TARGET = /,/ranlib ] )$/c\
+RANLIB_TEST_FOR_TARGET = \
+
+
+/^EXPECT = `/,/`$/c\
+EXPECT = \
+
+
+/^RUNTEST = `/,/`$/c\
+RUNTEST = \
+
+
+/^CC_FOR_TARGET = `/,/`$/c\
+CC_FOR_TARGET = \
+
+
+/^CXX_FOR_TARGET = `/,/`$/c\
+CXX_FOR_TARGET = \
+
+
+/^CHILL_FOR_TARGET = `/,/`$/c\
+CHILL_FOR_TARGET = \
+
+
+/^CHILL_LIB = `/,/`$/c\
+CHILL_LIB = \
+
+/sanit/s/{start-sanit...-[a-z0-9]*}//
+/sanit/s/{end-sanit...-[a-z0-9]*}//
+
+# Add standard defines and default rules.
+/^# srcdir/a\
+\
+s = "{srcdir}"\
+\
+o = :\
+\
+"{o}" \\Option-f : "{s}"
+
diff --git a/config/mpw/mpw-touch b/config/mpw/mpw-touch
new file mode 100644
index 00000000000..c743a5122b5
--- /dev/null
+++ b/config/mpw/mpw-touch
@@ -0,0 +1,7 @@
+# "Touch" command.
+
+If "`Exists "{1}"`" != ""
+ SetFile -m . "{1}"
+Else
+ Echo ' ' > "{1}"
+End If
diff --git a/config/mpw/mpw-true b/config/mpw/mpw-true
new file mode 100644
index 00000000000..0506530d3c6
--- /dev/null
+++ b/config/mpw/mpw-true
@@ -0,0 +1 @@
+Exit 0
diff --git a/config/mpw/null-command b/config/mpw/null-command
new file mode 100644
index 00000000000..4844c8ec553
--- /dev/null
+++ b/config/mpw/null-command
@@ -0,0 +1 @@
+# This command does nothing.
diff --git a/config/mpw/open-brace b/config/mpw/open-brace
new file mode 100644
index 00000000000..58465dcc18c
--- /dev/null
+++ b/config/mpw/open-brace
@@ -0,0 +1,4 @@
+# MPW makefiles seem not to have any way to get a literal open
+# brace into a rule anywhere, so this does the job.
+
+Echo '{'
diff --git a/config/mpw/tr-7to8-src b/config/mpw/tr-7to8-src
new file mode 100644
index 00000000000..b20b649c895
--- /dev/null
+++ b/config/mpw/tr-7to8-src
@@ -0,0 +1,9 @@
+StreamEdit -e \Option-d
+ '/\Option-x/ \Option-d
+ Replace /\Option-d\SrcOption-d/ "\Option-d\Option-d" -c \Option-5 ; \Option-d
+ Replace /\Option-d\SrcOption-f/ "\Option-d\Option-f" -c \Option-5 ; \Option-d
+ Replace /\Option-d\SrcOption-8/ "\Option-d\Option-8" -c \Option-5 ; \Option-d
+ Replace /\Option-d\SrcOption-5/ "\Option-d\Option-5" -c \Option-5 ; \Option-d
+ Replace /\Option-d\SrcOption-x/ "\Option-d\Option-x" -c \Option-5 ; \Option-d
+ Replace /\Option-d\SrcOption-r/ "\Option-d\Option-r" -c \Option-5' \Option-d
+ "{1}"
diff --git a/config/mpw/true b/config/mpw/true
new file mode 100644
index 00000000000..0506530d3c6
--- /dev/null
+++ b/config/mpw/true
@@ -0,0 +1 @@
+Exit 0
diff --git a/gcc/ChangeLog.hammer b/gcc/ChangeLog.hammer
new file mode 100644
index 00000000000..49688b838ab
--- /dev/null
+++ b/gcc/ChangeLog.hammer
@@ -0,0 +1,109 @@
+Wed Nov 13 12:08:08 CET 2002 Jan Hubicka <jh@suse.cz>
+ Merge i386 specific optimizations for 3.4-BIB branch.
+
+ Sat Nov 9 00:10:54 CET 2002 Jan Hubicka <jh@suse.cz>
+
+ * i386.c (x86_machine_dependent_reorg): Fix even more side cases.
+
+ Fri Nov 8 13:33:58 CET 2002 Jan Hubicka <jh@suse.cz>
+
+ * i386.md (sse_loadss, sse2_loadsd): Fix expander.
+
+ Fri Nov 8 13:25:41 CET 2002 Jan Hubicka <jh@suse.cz>
+
+ * i386.c (x86_machine_dependent_reorg): Fix handling of empty functions.
+
+ Fri Nov 8 11:36:11 CET 2002 Jan Hubicka <jh@suse.cz>
+
+ * i386.md (sse_movdfcc, sse_movsfcc): Fix typo in previous patch.
+
+ Thu Nov 7 21:54:22 CET 2002 Jan Hubicka <jh@suse.cz>
+
+ * i386.md (sse_movdfcc, sse_movsfcc): Avoid overactive matching.
+ * i386.c (ix86_expand_fp_movcc): Match the reversed cases.
+
+ Tue Nov 5 14:34:36 CET 2002 Jan Hubicka <jh@suse.cz>
+
+ * i386.md (float_truncate SSE splitter): Ensure that operand is not
+ stack register.
+ (float SSE splitters): Reorder conditional.
+
+ Thu Oct 31 18:20:50 CET 2002 Jan Hubicka <jh@suse.cz>
+
+ * i386.md (sse_loadss, sse_loadsd): Canonicalize; add expander
+ (movps, movpd splitters): Use canonical form.
+ (movv2di): Fix merge problem.
+
+ Wed Nov 6 17:16:48 CET 2002 Jan Hubicka <jh@.suse.cz>
+
+ * i386.md (negsf splitter): Accept memory operand in second register.
+ (abssf/absdf splitters): Simplify
+ (sse_loadss, sse_loadsd): Turn into expander.
+
+ Thu Oct 31 16:09:44 CET 2002 Jan Hubicka <jh@suse.cz>
+
+ * i386.md (negdf2_ifs_rex64): Don't allow GPR operand.
+
+ Tue Oct 29 23:28:10 CET 2002 Jan Hubicka <jh@suse.cz>
+
+ * i386.md (negdf splitter): Fix construction of the constant.
+
+ Tue Oct 29 20:47:06 CET 2002 Jan Hubicka <jh@suse.cz>
+
+ * i386.md (negsf, negdf): Reorganize to use vector modes
+ for SSE variants.
+ (abssf, absdf): Use force_reg.
+ (movv4sf, movv2df): New splitters.
+ * i386.h (PREDICATE_CODES): add zero_extended_scalar_load_operand
+ * i386.c (zero_extended_scalar_load_operand
+
+ Wed Oct 23 22:48:44 CEST 2002 Jan Hubicka <jh@suse.cz>
+
+ * i386.md (abs splitters): Do not produce nested subregs.
+
+ Wed Oct 23 12:42:32 CEST 2002 Jan Hubicka <jh@suse.cz>
+
+ * i386.md (movti_rex64): Fix constraints.
+
+ Wed Oct 23 12:01:21 CEST 2002 Jan Hubicka <jh@suse.cz>
+
+ * i386.md (abssf,absdf): Use vector operands for SSE
+ (abssf2_ifs, absdf2_ifs, absdf2_ifs_rex64 and splitters): Update for
+ vector operand.
+
+ Wed Oct 9 21:18:43 CEST 2002 Jan Hubicka <jh@suse.cz>
+
+ * i386.c (*_cost): Add branch costs.
+ (override_options): set ix86_branch_cost.
+ (ix86_expand_int_movcc): Use BRANCH_COST.
+ * i386.h (costs): Add branch_cost.
+
+ Tue Oct 8 01:24:19 CEST 2002 Jan Hubicka <jh@suse.cz>
+
+ * i386.c (x86_sse_partial_reg_dependency, x86_sse_partial_regs,
+ x86_sse_typeless_stores, x86_sse_load0_by_pxor): New global
+ variables.
+ (safe_vector_operand): Update sse_clrv4sf call.
+ (ix86_expand_buildin): Likewise
+ * i386.h (x86_sse_partial_reg_dependency, x86_sse_partial_regs,
+ x86_sse_typeless_stores, x86_sse_load0_by_pxor): Declare.
+ (TARGET_SSE_PARTIAL_REG_DEPENDENCY, TARGET_SSE_PARTIAL_REGS,
+ TARGET_SSE_TYPELESS_STORES, TARGET_SSE_TYPELESS_LOAD0): New
+ macros.
+ * i386.md (movsf*, movdf*, movti, movv4sf, movv2df, movv16qi, movv8hi,
+ movv4si): Obey the new flags.
+ (floatsi2sf, floatdi2sf, truncatedf2sf): Emit extra load of 0 to avoid
+ reformating penalty.
+ (anddf, cmov patterns): Avoid reformating by first converting.
+ (sse_cvtsd2ss): Fix predicate.
+ (sse2_clrti): Fix mode,
+ (sse_clrv4sf): Avoid unspec.
+
+ Sat Oct 5 22:48:06 CEST 2002 Jan Hubicka <jh@suse.cz>
+
+ * athlon.md: rewrite to DFA.
+ * i386 (ix86_adjust_cost): Drop memory latency code.
+ (ia32_use_dfa_pipeline_interface): Return true for Athlon.
+Thu Nov 7 11:18:01 CET 2002 Jan Hubicka <jh@suse.cz>
+
+ * reg-stack.c (compensate_edge): Fix sanity check.
diff --git a/gcc/config/i386/athlon.md b/gcc/config/i386/athlon.md
index d6a52f2cbdd..7113cb88345 100644
--- a/gcc/config/i386/athlon.md
+++ b/gcc/config/i386/athlon.md
@@ -1,34 +1,5 @@
;; AMD Athlon Scheduling
-;; Copyright (C) 2002 Free Software Foundation, Inc.
;;
-;; This file is part of GNU CC.
-;;
-;; GNU CC is free software; you can redistribute it and/or modify
-;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 2, or (at your option)
-;; any later version.
-;;
-;; GNU CC is distributed in the hope that it will be useful,
-;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-;; GNU General Public License for more details.
-;;
-;; You should have received a copy of the GNU General Public License
-;; along with GNU CC; see the file COPYING. If not, write to
-;; the Free Software Foundation, 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA. */
-(define_attr "athlon_decode" "direct,vector"
- (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov")
- (const_string "vector")
- (and (eq_attr "type" "push")
- (match_operand 1 "memory_operand" ""))
- (const_string "vector")
- (and (eq_attr "type" "fmov")
- (and (eq_attr "memory" "load,store")
- (eq_attr "mode" "XF")))
- (const_string "vector")]
- (const_string "direct")))
-
;; The Athlon does contain three pipelined FP units, three integer units and
;; three address generation units.
;;
@@ -46,161 +17,419 @@
;; The load/store queue unit is not attached to the schedulers but
;; communicates with all the execution units separately instead.
-(define_function_unit "athlon_vectordec" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_decode" "vector"))
- 1 1)
-
-(define_function_unit "athlon_directdec" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_decode" "direct"))
- 1 1)
-
-(define_function_unit "athlon_vectordec" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_decode" "direct"))
- 1 1 [(eq_attr "athlon_decode" "vector")])
-
-(define_function_unit "athlon_ieu" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,rotate,ibr,call,callv,icmov,cld,pop,setcc,push,pop"))
- 1 1)
-
-(define_function_unit "athlon_ieu" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "str"))
- 15 15)
-
-(define_function_unit "athlon_ieu" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "imul"))
- 5 0)
-
-(define_function_unit "athlon_ieu" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "idiv"))
- 42 0)
-
-(define_function_unit "athlon_muldiv" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "imul"))
- 5 0)
-
-(define_function_unit "athlon_muldiv" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "idiv"))
- 42 42)
-
-(define_attr "athlon_fpunits" "none,store,mul,add,muladd,any"
- (cond [(eq_attr "type" "fop,fcmp,fistp")
- (const_string "add")
- (eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov")
- (const_string "mul")
- (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both"))
- (const_string "store")
- (and (eq_attr "type" "fmov") (eq_attr "memory" "load"))
- (const_string "any")
+(define_attr "athlon_decode" "direct,vector"
+ (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld")
+ (const_string "vector")
+ (and (eq_attr "type" "push")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")
(and (eq_attr "type" "fmov")
- (ior (match_operand:SI 1 "register_operand" "")
- (match_operand 1 "immediate_operand" "")))
- (const_string "store")
- (eq_attr "type" "fmov")
- (const_string "muladd")]
- (const_string "none")))
-
-;; We use latencies 1 for definitions. This is OK to model colisions
-;; in execution units. The real latencies are modeled in the "fp" pipeline.
-
-;; fsin, fcos: 96-192
-;; fsincos: 107-211
-;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode.
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "fpspc"))
- 100 1)
-
-;; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode.
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "fdiv"))
- 24 1)
-
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "fop,fmul,fistp"))
- 4 1)
-
-;; XFmode loads are slow.
-;; XFmode store is slow too (8 cycles), but we don't need to model it, because
-;; there are no dependent instructions.
+ (and (eq_attr "memory" "load,store")
+ (eq_attr "mode" "XF")))
+ (const_string "vector")]
+ (const_string "direct")))
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (and (eq_attr "type" "fmov")
- (and (eq_attr "memory" "load")
- (eq_attr "mode" "XF"))))
- 10 1)
+;;
+;; decode0 decode1 decode2
+;; \ | /
+;; instruction control unit (72 entry scheduler)
+;; | |
+;; integer scheduler (18) stack map
+;; / | | | | \ stack rename
+;; ieu0 agu0 ieu1 agu1 ieu2 agu2 scheduler
+;; | agu0 | agu1 agu2 register file
+;; | \ | | / | | |
+;; \ /\ | / fadd fmul fstore
+;; \ / \ | / fadd fmul fstore
+;; imul load/store (2x) fadd fmul fstore
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "fmov,fsgn"))
- 2 1)
+(define_automaton "athlon,athlon_mult,athlon_fp")
+(define_cpu_unit "athlon-decode0" "athlon")
+(define_cpu_unit "athlon-decode1" "athlon")
+(define_cpu_unit "athlon-decode2" "athlon")
+(define_reservation "athlon-vector" "(athlon-decode0 + athlon-decode1
+ + athlon-decode2)")
+(define_reservation "athlon-direct" "(athlon-decode0 | athlon-decode1
+ | athlon-decode2)")
-;; fcmp and ftst instructions
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (and (eq_attr "type" "fcmp")
- (eq_attr "athlon_decode" "direct")))
- 3 1)
+;; Agu and ieu unit results in extremly large automatons and
+;; in our approximation they are hardly filled in. Only ieu
+;; unit can, as issue rate is 3 and agu unit is always used
+;; first in the insn reservations. Skip the models.
-;; fcmpi instructions.
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (and (eq_attr "type" "fcmp")
- (eq_attr "athlon_decode" "vector")))
- 3 1)
+;(define_cpu_unit "athlon-ieu0" "athlon_ieu")
+;(define_cpu_unit "athlon-ieu1" "athlon_ieu")
+;(define_cpu_unit "athlon-ieu2" "athlon_ieu")
+;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)")
+(define_reservation "athlon-ieu" "nothing")
+;(define_cpu_unit "athlon-agu0" "athlon_agu")
+;(define_cpu_unit "athlon-agu1" "athlon_agu")
+;(define_cpu_unit "athlon-agu2" "athlon_agu")
+;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)")
+(define_reservation "athlon-agu" "nothing,nothing")
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "fcmov"))
- 7 1)
+(define_cpu_unit "athlon-mult" "athlon_mult")
-(define_function_unit "athlon_fp_mul" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_fpunits" "mul"))
- 1 1)
+(define_cpu_unit "athlon-load0" "athlon")
+(define_cpu_unit "athlon-load1" "athlon")
+(define_reservation "athlon-load" "athlon-agu,
+ (athlon-load0 | athlon-load1)")
+(define_reservation "athlon-store" "nothing")
-(define_function_unit "athlon_fp_add" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_fpunits" "add"))
- 1 1)
+;; The three fp units are fully pipelined with latency of 3
+(define_cpu_unit "athlon-fadd" "athlon_fp")
+(define_cpu_unit "athlon-fmul" "athlon_fp")
+(define_cpu_unit "athlon-fstore" "athlon_fp")
+(define_reservation "athlon-fany" "(athlon-fadd | athlon-fmul | athlon-fstore)")
+(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)")
-(define_function_unit "athlon_fp_muladd" 2 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_fpunits" "muladd,mul,add"))
- 1 1)
-(define_function_unit "athlon_fp_store" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_fpunits" "store"))
- 1 1)
+;; Jump instructions are executed in the branch unit compltetely transparent to us
+(define_insn_reservation "athlon_branch" 0
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "ibr"))
+ "athlon-direct")
+(define_insn_reservation "athlon_call" 0
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "call,callv"))
+ "athlon-vector")
-;; We don't need to model the Address Generation Unit, since we don't model
-;; the re-order buffer yet and thus we never schedule more than three operations
-;; at time. Later we may want to experiment with MD_SCHED macros modeling the
-;; decoders independently on the functional units.
+;; Latency of push operation is 3 cycles, but ESP value is available
+;; earlier
+(define_insn_reservation "athlon_push" 2
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "push"))
+ "athlon-direct,nothing,athlon-store")
+(define_insn_reservation "athlon_pop" 4
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "push"))
+ "athlon-vector,athlon-ieu,athlon-load")
+(define_insn_reservation "athlon_leave" 3
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "push"))
+ "athlon-vector,athlon-load")
-;(define_function_unit "athlon_agu" 3 0
-; (and (eq_attr "cpu" "athlon")
-; (and (eq_attr "memory" "!none")
-; (eq_attr "athlon_fpunits" "none")))
-; 1 1)
+;; Lea executes in AGU unit with 2 cycles latency.
+(define_insn_reservation "athlon_lea" 2
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "lea"))
+ "athlon-direct,athlon-agu")
-;; Model load unit to avoid too long sequences of loads. We don't need to
-;; model store queue, since it is hardly going to be bottleneck.
+;; Mul executes in special multiplier unit attached to IEU0
+(define_insn_reservation "athlon_imul" 5
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "none,unknown")))
+ "athlon-vector,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
+(define_insn_reservation "athlon_imul_mem" 8
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "load,both")))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
+(define_insn_reservation "athlon_idiv" 42
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "memory" "none,unknown")))
+ "athlon-vector,athlon-ieu*42")
+(define_insn_reservation "athlon_idiv_mem" 45
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "memory" "load,both")))
+ "athlon-vector,athlon-load,athlon-ieu*42")
+(define_insn_reservation "athlon_str" 15
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "str")
+ (eq_attr "memory" "load,both,store")))
+ "athlon-vector,athlon-load,athlon-ieu*10")
-(define_function_unit "athlon_load" 2 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "memory" "load,both"))
- 1 1)
+(define_insn_reservation "athlon_idirect" 1
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-direct,athlon-ieu")
+(define_insn_reservation "athlon_ivector" 2
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-vector,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_idirect_loadmov" 3
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load")
+(define_insn_reservation "athlon_idirect_load" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-load,athlon-ieu")
+(define_insn_reservation "athlon_ivector_load" 6
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_idirect_movstore" 1
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "store")))
+ "athlon-direct,athlon-agu,athlon-store")
+(define_insn_reservation "athlon_idirect_both" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "both"))))
+ "athlon-direct,athlon-load,athlon-ieu,
+ athlon-store")
+(define_insn_reservation "athlon_ivector_both" 6
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "both"))))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-ieu,
+ athlon-store")
+(define_insn_reservation "athlon_idirect_store" 1
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "store"))))
+ "athlon-direct,athlon-ieu,
+ athlon-store")
+(define_insn_reservation "athlon_ivector_store" 2
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "store"))))
+ "athlon-vector,athlon-ieu,athlon-ieu,
+ athlon-store")
+;; Athlon floatin point unit
+(define_insn_reservation "athlon_fldxf" 12
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmov")
+ (and (eq_attr "memory" "load")
+ (eq_attr "mode" "XF"))))
+ "athlon-vector,athlon-fany")
+(define_insn_reservation "athlon_fld" 6
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fany,nothing,athlon-load")
+(define_insn_reservation "athlon_fstxf" 10
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmov")
+ (and (eq_attr "memory" "store,both")
+ (eq_attr "mode" "XF"))))
+ "athlon-vector,athlon-fstore")
+(define_insn_reservation "athlon_fst" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "store,both")))
+ "athlon-direct,athlon-fstore,nothing,athlon-store")
+(define_insn_reservation "athlon_fist" 4
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fistp"))
+ "athlon-direct,athlon-fstore,nothing")
+(define_insn_reservation "athlon_fmov" 2
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fmov"))
+ "athlon-direct,athlon-faddmul")
+(define_insn_reservation "athlon_fadd_load" 7
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fop")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_fadd" 4
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fop"))
+ "athlon-direct,athlon-fadd")
+(define_insn_reservation "athlon_fmul_load" 7
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmul")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_fmul" 4
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fmul"))
+ "athlon-direct,athlon-fmul")
+(define_insn_reservation "athlon_fsgn" 2
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fsgn"))
+ "athlon-direct,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_load" 24
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_fdiv" 24
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fdiv"))
+ "athlon-direct,athlon-fmul")
+(define_insn_reservation "athlon_fpspc_load" 103
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fpspc")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_fpspc" 100
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fpspc"))
+ "athlon-vector,athlon-fmul")
+(define_insn_reservation "athlon_fcmov_load" 10
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fcmov")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_fcmov" 7
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fcmov"))
+ "athlon-vector,athlon-fmul")
+(define_insn_reservation "athlon_fcomi_load" 6
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fcmp")
+ (and (eq_attr "athlon_decode" "vector")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_fcomi" 3
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "athlon_decode" "vector")
+ (eq_attr "type" "fcmp")))
+ "athlon-vector,athlon-fadd")
+(define_insn_reservation "athlon_fcom_load" 5
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fcmp")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_fcom" 2
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fcmp"))
+ "athlon-direct,athlon-fadd")
+(define_insn_reservation "athlon_fxch" 2
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fxch"))
+ "athlon-direct,athlon-fany")
+;; Athlon handle MMX operations in the FPU unit with shorter latencies
+(define_insn_reservation "athlon_mmxsseld" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "mmxmov,ssemov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fany,athlon-load")
+(define_insn_reservation "athlon_mmxssest" 2
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "mmxmov,ssemov")
+ (eq_attr "memory" "store,both")))
+ "athlon-direct,athlon-store")
+(define_insn_reservation "athlon_mmxssemov" 2
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "mmxmov,ssemov"))
+ "athlon-direct,athlon-faddmul")
+(define_insn_reservation "athlon_mmxmul_load" 6
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "mmxmul")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_mmxmul" 3
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "mmxmul"))
+ "athlon-direct,athlon-fmul")
+(define_insn_reservation "athlon_mmx_load" 5
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "unit" "mmx")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load,athlon-faddmul")
+(define_insn_reservation "athlon_mmx" 2
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "unit" "mmx"))
+ "athlon-direct,athlon-faddmul")
+;; SSE operations are handled by the i387 unit as well. The latnecy
+;; is same as for i387 operations for scalar operations
+(define_insn_reservation "athlon_sselog_load" 6
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "sselog")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_sselog" 3
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "sselog"))
+ "athlon-vector,athlon-fmul")
+(define_insn_reservation "athlon_ssecmp_load" 5
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_ssecmp" 2
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssecmp")
+ (eq_attr "mode" "SF,DF")))
+ "athlon-direct,athlon-fadd")
+(define_insn_reservation "athlon_ssecmpvector_load" 6
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssecmp")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fadd")
+(define_insn_reservation "athlon_ssecmpvector" 3
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "ssecmp"))
+ "athlon-vector,athlon-fadd")
+(define_insn_reservation "athlon_sseadd_load" 7
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "sseadd")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_sseadd" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "sseadd")
+ (eq_attr "mode" "SF,DF")))
+ "athlon-direct,athlon-fadd")
+(define_insn_reservation "athlon_sseaddvector_load" 8
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "sseadd")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-load,athlon-fadd")
+(define_insn_reservation "athlon_sseaddvector" 5
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "sseadd"))
+ "athlon-vector,athlon-fadd")
+(define_insn_reservation "athlon_ssemul_load" 7
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssemul")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_ssemul" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "mode" "SF,DF")))
+ "athlon-direct,athlon-fmul")
+(define_insn_reservation "athlon_ssemulvector_load" 8
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_ssemulvector" 5
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "ssemul"))
+ "athlon-vector,athlon-fmul")
+(define_insn_reservation "athlon_ssediv_load" 19
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssediv")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_ssediv" 16
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssediv")
+ (eq_attr "mode" "SF,DF")))
+ "athlon-direct,athlon-fmul")
+(define_insn_reservation "athlon_ssedivvector_load" 32
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssediv")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-load,athlon-fmul")
+(define_insn_reservation "athlon_ssedivvector" 29
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "ssediv"))
+ "athlon-vector,athlon-fmul")
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index b02fc4a2092..9e400cdfcd5 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -84,6 +84,7 @@ struct processor_costs size_cost = { /* costs for tunning for size */
3, /* MMX or SSE register to integer */
0, /* size of prefetch block */
0, /* number of parallel prefetches */
+ 1, /* Branch cost */
2, /* cost of FADD and FSUB insns. */
2, /* cost of FMUL instruction. */
2, /* cost of FDIV instruction. */
@@ -128,6 +129,7 @@ struct processor_costs i386_cost = { /* 386 specific costs */
3, /* MMX or SSE register to integer */
0, /* size of prefetch block */
0, /* number of parallel prefetches */
+ 1, /* Branch cost */
23, /* cost of FADD and FSUB insns. */
27, /* cost of FMUL instruction. */
88, /* cost of FDIV instruction. */
@@ -171,6 +173,7 @@ struct processor_costs i486_cost = { /* 486 specific costs */
3, /* MMX or SSE register to integer */
0, /* size of prefetch block */
0, /* number of parallel prefetches */
+ 1, /* Branch cost */
8, /* cost of FADD and FSUB insns. */
16, /* cost of FMUL instruction. */
73, /* cost of FDIV instruction. */
@@ -214,6 +217,7 @@ struct processor_costs pentium_cost = {
3, /* MMX or SSE register to integer */
0, /* size of prefetch block */
0, /* number of parallel prefetches */
+ 2, /* Branch cost */
3, /* cost of FADD and FSUB insns. */
3, /* cost of FMUL instruction. */
39, /* cost of FDIV instruction. */
@@ -257,6 +261,7 @@ struct processor_costs pentiumpro_cost = {
3, /* MMX or SSE register to integer */
32, /* size of prefetch block */
6, /* number of parallel prefetches */
+ 2, /* Branch cost */
3, /* cost of FADD and FSUB insns. */
5, /* cost of FMUL instruction. */
56, /* cost of FDIV instruction. */
@@ -300,6 +305,7 @@ struct processor_costs k6_cost = {
6, /* MMX or SSE register to integer */
32, /* size of prefetch block */
1, /* number of parallel prefetches */
+ 1, /* Branch cost */
2, /* cost of FADD and FSUB insns. */
2, /* cost of FMUL instruction. */
56, /* cost of FDIV instruction. */
@@ -343,6 +349,7 @@ struct processor_costs athlon_cost = {
5, /* MMX or SSE register to integer */
64, /* size of prefetch block */
6, /* number of parallel prefetches */
+ 2, /* Branch cost */
4, /* cost of FADD and FSUB insns. */
4, /* cost of FMUL instruction. */
24, /* cost of FDIV instruction. */
@@ -355,11 +362,11 @@ static const
struct processor_costs pentium4_cost = {
1, /* cost of an add instruction */
1, /* cost of a lea instruction */
- 8, /* variable shift costs */
- 8, /* constant shift costs */
- 30, /* cost of starting a multiply */
+ 4, /* variable shift costs */
+ 4, /* constant shift costs */
+ 15, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
- 112, /* cost of a divide/mod */
+ 56, /* cost of a divide/mod */
1, /* cost of movsx */
1, /* cost of movzx */
16, /* "large" insn */
@@ -386,6 +393,7 @@ struct processor_costs pentium4_cost = {
10, /* MMX or SSE register to integer */
64, /* size of prefetch block */
6, /* number of parallel prefetches */
+ 2, /* Branch cost */
5, /* cost of FADD and FSUB insns. */
7, /* cost of FMUL instruction. */
43, /* cost of FDIV instruction. */
@@ -445,6 +453,13 @@ const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
const int x86_decompose_lea = m_PENT4;
const int x86_shift1 = ~m_486;
const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
+const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
+/* Set for machines where the type and dependencies are resolved on SSE register
+ parts insetad of whole registers, so we may maintain just lower part of
+ scalar values in proper format leaving the upper part undefined. */
+const int x86_sse_partial_regs = m_ATHLON;
+const int x86_sse_typeless_stores = m_ATHLON;
+const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
/* In case the avreage insn count for single function invocation is
lower than this constant, emit fast (but longer) prologue and
@@ -934,17 +949,16 @@ override_options ()
const int align_jump;
const int align_jump_max_skip;
const int align_func;
- const int branch_cost;
}
const processor_target_table[PROCESSOR_max] =
{
- {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
- {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
- {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
- {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
- {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
- {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
- {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
+ {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
+ {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
+ {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
+ {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
+ {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
+ {&athlon_cost, 0, 0, 16, 7, 64, 7, 16},
+ {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0}
};
static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
@@ -1212,7 +1226,7 @@ override_options ()
}
/* Validate -mbranch-cost= value, or provide default. */
- ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
+ ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
if (ix86_branch_cost_string)
{
i = atoi (ix86_branch_cost_string);
@@ -3404,6 +3418,34 @@ non_q_regs_operand (op, mode)
/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
insns. */
int
+zero_extended_scalar_load_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ unsigned n_elts;
+ if (GET_CODE (op) != MEM)
+ return 0;
+ op = maybe_get_pool_constant (op);
+ if (!op)
+ return 0;
+ if (GET_CODE (op) != CONST_VECTOR)
+ return 0;
+ n_elts =
+ (GET_MODE_SIZE (GET_MODE (op)) /
+ GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
+ for (n_elts--; n_elts > 0; n_elts--)
+ {
+ rtx elt = CONST_VECTOR_ELT (op, n_elts);
+ if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
+ return 0;
+ }
+ return 1;
+}
+
+
+/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
+ insns. */
+int
sse_comparison_operator (op, mode)
rtx op;
enum machine_mode mode ATTRIBUTE_UNUSED;
@@ -9296,12 +9338,9 @@ ix86_expand_int_movcc (operands)
* This is reasonably steep, but branch mispredict costs are
* high on modern cpus, so consider failing only if optimizing
* for space.
- *
- * %%% Parameterize branch_cost on the tuning architecture, then
- * use that. The 80386 couldn't care less about mispredicts.
*/
- if (!optimize_size && !TARGET_CMOVE)
+ if (!TARGET_CMOVE && BRANCH_COST >= 2)
{
if (cf == 0)
{
@@ -9379,7 +9418,7 @@ ix86_expand_int_movcc (operands)
optab op;
rtx var, orig_out, out, tmp;
- if (optimize_size)
+ if (BRANCH_COST >= 2)
return 0; /* FAIL */
/* If one of the two operands is an interesting constant, load a
@@ -9514,8 +9553,14 @@ ix86_expand_fp_movcc (operands)
if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
{
/* Check for min operation. */
- if (code == LT)
+ if (code == LT || code == UNLE)
{
+ if (code == UNLE)
+ {
+ rtx tmp = op0;
+ op0 = op1;
+ op1 = tmp;
+ }
operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
if (memory_operand (op0, VOIDmode))
op0 = force_reg (GET_MODE (operands[0]), op0);
@@ -9526,8 +9571,14 @@ ix86_expand_fp_movcc (operands)
return 1;
}
/* Check for max operation. */
- if (code == GT)
+ if (code == GT || code == UNGE)
{
+ if (code == UNGE)
+ {
+ rtx tmp = op0;
+ op0 = op1;
+ op1 = tmp;
+ }
operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
if (memory_operand (op0, VOIDmode))
op0 = force_reg (GET_MODE (operands[0]), op0);
@@ -11305,13 +11356,6 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
memory = get_attr_memory (insn);
dep_memory = get_attr_memory (dep_insn);
- if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
- {
- if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
- cost += 2;
- else
- cost += 3;
- }
/* Show ability of reorder buffer to hide latency of load by executing
in parallel with previous instruction in case
previous instruction is not needed to compute the address. */
@@ -11585,7 +11629,7 @@ ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
static int
ia32_use_dfa_pipeline_interface ()
{
- if (ix86_cpu == PROCESSOR_PENTIUM)
+ if (ix86_cpu == PROCESSOR_PENTIUM || ix86_cpu == PROCESSOR_ATHLON)
return 1;
return 0;
}
@@ -12795,7 +12839,8 @@ safe_vector_operand (x, mode)
: gen_rtx_SUBREG (DImode, x, 0)));
else
emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
- : gen_rtx_SUBREG (V4SFmode, x, 0)));
+ : gen_rtx_SUBREG (V4SFmode, x, 0),
+ CONST0_RTX (V4SFmode)));
return x;
}
@@ -13465,7 +13510,7 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
case IX86_BUILTIN_SSE_ZERO:
target = gen_reg_rtx (V4SFmode);
- emit_insn (gen_sse_clrv4sf (target));
+ emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
return target;
case IX86_BUILTIN_MMX_ZERO:
@@ -14319,21 +14364,27 @@ x86_machine_dependent_reorg (first)
if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
continue;
- prev = prev_nonnote_insn (ret);
+ for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
+ if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
+ break;
if (prev && GET_CODE (prev) == CODE_LABEL)
{
edge e;
for (e = bb->pred; e; e = e->pred_next)
- if (EDGE_FREQUENCY (e) && e->src->index > 0
+ if (EDGE_FREQUENCY (e) && e->src->index >= 0
&& !(e->flags & EDGE_FALLTHRU))
insert = 1;
}
if (!insert)
{
- prev = prev_real_insn (ret);
+ prev = prev_active_insn (ret);
if (prev && GET_CODE (prev) == JUMP_INSN
&& any_condjump_p (prev))
insert = 1;
+ /* Empty functions get branch misspredict even when the jump destination
+ is not visible to us. */
+ if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
+ insert = 1;
}
if (insert)
emit_insn_before (gen_nop (), ret);
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 8e331765847..3971ef5cdc4 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -75,6 +75,7 @@ struct processor_costs {
const int prefetch_block; /* bytes moved to cache for prefetch. */
const int simultaneous_prefetches; /* number of parallel prefetch
operations. */
+ const int branch_cost; /* Default value for BRANCH_COST. */
const int fadd; /* cost of FADD and FSUB instructions. */
const int fmul; /* cost of FMUL instruction. */
const int fdiv; /* cost of FDIV instruction. */
@@ -221,6 +222,8 @@ extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall;
extern const int x86_accumulate_outgoing_args, x86_prologue_using_move;
extern const int x86_epilogue_using_move, x86_decompose_lea;
extern const int x86_arch_always_fancy_math_387, x86_shift1;
+extern const int x86_sse_partial_reg_dependency, x86_sse_partial_regs;
+extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor;
extern int x86_prefetch_sse;
#define TARGET_USE_LEAVE (x86_use_leave & CPUMASK)
@@ -257,6 +260,12 @@ extern int x86_prefetch_sse;
#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & CPUMASK)
#define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & CPUMASK)
#define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & CPUMASK)
+#define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
+ (x86_sse_partial_reg_dependency & CPUMASK)
+#define TARGET_SSE_PARTIAL_REGS (x86_sse_partial_regs & CPUMASK)
+#define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & CPUMASK)
+#define TARGET_SSE_TYPELESS_LOAD0 (x86_sse_typeless_load0 & CPUMASK)
+#define TARGET_SSE_LOAD0_BY_PXOR (x86_sse_load0_by_pxor & CPUMASK)
#define TARGET_MEMORY_MISMATCH_STALL (x86_memory_mismatch_stall & CPUMASK)
#define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & CPUMASK)
#define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & CPUMASK)
@@ -3286,6 +3295,7 @@ do { \
{"register_and_not_any_fp_reg_operand", {REG}}, \
{"fp_register_operand", {REG}}, \
{"register_and_not_fp_reg_operand", {REG}}, \
+ {"zero_extended_scalar_load_operand", {MEM}}, \
/* A list of predicates that do special things with modes, and so
should not elicit warnings for VOIDmode match_operand. */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4596cee7f9e..4ee6b71d382 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2133,12 +2133,12 @@
case 4:
return "mov{l}\t{%1, %0|%0, %1}";
case 5:
- if (TARGET_SSE2 && !TARGET_ATHLON)
+ if (get_attr_mode (insn) == MODE_TI)
return "pxor\t%0, %0";
else
return "xorps\t%0, %0";
case 6:
- if (TARGET_PARTIAL_REG_DEPENDENCY)
+ if (get_attr_mode (insn) == MODE_V4SF)
return "movaps\t{%1, %0|%0, %1}";
else
return "movss\t{%1, %0|%0, %1}";
@@ -2158,7 +2158,40 @@
}
}
[(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov")
- (set_attr "mode" "SF,SF,SF,SI,SI,TI,SF,SF,SF,SI,SI,DI")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "3,4,9,10")
+ (const_string "SI")
+ (eq_attr "alternative" "5")
+ (if_then_else
+ (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (ne (symbol_ref "TARGET_SSE2")
+ (const_int 0)))
+ (eq (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "TI")
+ (const_string "V4SF"))
+ /* For architectures resolving dependencies on
+ whole SSE registers use APS move to break dependency
+ chains, otherwise use short move to avoid extra work.
+
+ Do the same for architectures resolving dependencies on
+ the parts. While in DF mode it is better to always handle
+ just register parts, the SF mode is different due to lack
+ of instructions to load just part of the register. It is
+ better to maintain the whole registers in single format
+ to avoid problems on using packed logical operations. */
+ (eq_attr "alternative" "6")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "SF"))
+ (eq_attr "alternative" "11")
+ (const_string "DI")]
+ (const_string "SF")))])
(define_insn "*swapsf"
[(set (match_operand:SF 0 "register_operand" "+f")
@@ -2319,25 +2352,78 @@
case 4:
return "#";
case 5:
- if (TARGET_ATHLON)
- return "xorpd\t%0, %0";
- else
- return "pxor\t%0, %0";
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "xorps\t%0, %0";
+ case MODE_V2DF:
+ return "xorpd\t%0, %0";
+ case MODE_TI:
+ return "pxor\t%0, %0";
+ default:
+ abort ();
+ }
case 6:
- if (TARGET_PARTIAL_REG_DEPENDENCY)
- return "movapd\t{%1, %0|%0, %1}";
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "movaps\t{%1, %0|%0, %1}";
+ case MODE_V2DF:
+ return "movapd\t{%1, %0|%0, %1}";
+ case MODE_DF:
+ return "movsd\t{%1, %0|%0, %1}";
+ default:
+ abort ();
+ }
+ case 7:
+ if (get_attr_mode (insn) == MODE_V2DF)
+ return "movlpd\t{%1, %0|%0, %1}";
else
return "movsd\t{%1, %0|%0, %1}";
- case 7:
case 8:
- return "movsd\t{%1, %0|%0, %1}";
+ return "movsd\t{%1, %0|%0, %1}";
default:
abort();
}
}
[(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
- (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "3,4")
+ (const_string "SI")
+ /* xorps is one byte shorter. */
+ (eq_attr "alternative" "5")
+ (cond [(ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (const_string "TI")]
+ (const_string "V2DF"))
+ /* For architectures resolving dependencies on
+ whole SSE registers use APD move to break dependency
+ chains, otherwise use short move to avoid extra work.
+
+ movaps encodes one byte shorter. */
+ (eq_attr "alternative" "6")
+ (cond
+ [(ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (const_string "V2DF")]
+ (const_string "DF"))
+ /* For achitectures resolving dependencies on register
+ parts we may avoid extra work to zero out upper part
+ of register. */
+ (eq_attr "alternative" "7")
+ (if_then_else
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
+ (const_int 0))
+ (const_string "V2DF")
+ (const_string "DF"))]
+ (const_string "DF")))])
(define_insn "*movdf_integer"
[(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m")
@@ -2381,16 +2467,34 @@
return "#";
case 5:
- if (TARGET_ATHLON)
- return "xorpd\t%0, %0";
- else
- return "pxor\t%0, %0";
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "xorps\t%0, %0";
+ case MODE_V2DF:
+ return "xorpd\t%0, %0";
+ case MODE_TI:
+ return "pxor\t%0, %0";
+ default:
+ abort ();
+ }
case 6:
- if (TARGET_PARTIAL_REG_DEPENDENCY)
- return "movapd\t{%1, %0|%0, %1}";
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "movaps\t{%1, %0|%0, %1}";
+ case MODE_V2DF:
+ return "movapd\t{%1, %0|%0, %1}";
+ case MODE_DF:
+ return "movsd\t{%1, %0|%0, %1}";
+ default:
+ abort ();
+ }
+ case 7:
+ if (get_attr_mode (insn) == MODE_V2DF)
+ return "movlpd\t{%1, %0|%0, %1}";
else
return "movsd\t{%1, %0|%0, %1}";
- case 7:
case 8:
return "movsd\t{%1, %0|%0, %1}";
@@ -2399,7 +2503,42 @@
}
}
[(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
- (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "3,4")
+ (const_string "SI")
+ /* xorps is one byte shorter. */
+ (eq_attr "alternative" "5")
+ (cond [(ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (const_string "TI")]
+ (const_string "V2DF"))
+ /* For architectures resolving dependencies on
+ whole SSE registers use APD move to break dependency
+ chains, otherwise use short move to avoid extra work.
+
+ movaps encodes one byte shorter. */
+ (eq_attr "alternative" "6")
+ (cond
+ [(ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (const_string "V2DF")]
+ (const_string "DF"))
+ /* For achitectures resolving dependencies on register
+ parts we may avoid extra work to zero out upper part
+ of register. */
+ (eq_attr "alternative" "7")
+ (if_then_else
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
+ (const_int 0))
+ (const_string "V2DF")
+ (const_string "DF"))]
+ (const_string "DF")))])
(define_split
[(set (match_operand:DF 0 "nonimmediate_operand" "")
@@ -3706,7 +3845,7 @@
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY")))
(clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))]
- "TARGET_80387 && TARGET_SSE2"
+ "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS"
{
switch (which_alternative)
{
@@ -3716,7 +3855,30 @@
else
return "fst%z0\t%y0";
case 4:
- return "cvtsd2ss\t{%1, %0|%0, %1}";
+ return "#";
+ default:
+ abort ();
+ }
+}
+ [(set_attr "type" "fmov,multi,multi,multi,ssecvt")
+ (set_attr "mode" "SF,SF,SF,SF,DF")])
+
+(define_insn "*truncdfsf2_1_sse_nooverlap"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m,?f#rx,?r#fx,?x#rf,&Y")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY")))
+ (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))]
+ "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+ case 4:
+ return "#";
default:
abort ();
}
@@ -3728,7 +3890,7 @@
[(set (match_operand:SF 0 "nonimmediate_operand" "=Y,!m")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "mY,f")))]
- "TARGET_80387 && TARGET_SSE2
+ "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
{
switch (which_alternative)
@@ -3747,7 +3909,30 @@
[(set_attr "type" "ssecvt,fmov")
(set_attr "mode" "DF,SF")])
-(define_insn "truncdfsf2_3"
+(define_insn "*truncdfsf2_2_nooverlap"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=&Y,!m")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "mY,f")))]
+ "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "#";
+ case 1:
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+ default:
+ abort ();
+ }
+}
+ [(set_attr "type" "ssecvt,fmov")
+ (set_attr "mode" "DF,SF")])
+
+(define_insn "*truncdfsf2_3"
[(set (match_operand:SF 0 "memory_operand" "=m")
(float_truncate:SF
(match_operand:DF 1 "register_operand" "f")))]
@@ -3765,11 +3950,20 @@
[(set (match_operand:SF 0 "register_operand" "=Y")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "mY")))]
- "!TARGET_80387 && TARGET_SSE2"
+ "!TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS"
"cvtsd2ss\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "DF")])
+(define_insn "*truncdfsf2_sse_only_nooverlap"
+ [(set (match_operand:SF 0 "register_operand" "=&Y")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "mY")))]
+ "!TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS"
+ "#"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DF")])
+
(define_split
[(set (match_operand:SF 0 "memory_operand" "")
(float_truncate:SF
@@ -3779,15 +3973,56 @@
[(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
"")
+; Avoid possible reformating penalty on the destination by first
+; zeroing it out
(define_split
- [(set (match_operand:SF 0 "nonimmediate_operand" "")
+ [(set (match_operand:SF 0 "register_operand" "")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "")))
(clobber (match_operand 2 "" ""))]
+ "TARGET_80387 && reload_completed && TARGET_SSE_PARTIAL_REGS
+ && SSE_REG_P (operands[0])
+ && !STACK_REG_P (operands[1])"
+ [(const_int 0)]
+{
+ rtx src, dest;
+ if (!TARGET_SSE_PARTIAL_REGS)
+ emit_insn (gen_truncdfsf2_sse_only (operands[0], operands[1]));
+ else
+ {
+ dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+ src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
+ /* simplify_gen_subreg refuses to widen memory references. */
+ if (GET_CODE (src) == SUBREG)
+ alter_subreg (&src);
+ if (reg_overlap_mentioned_p (operands[0], operands[1]))
+ abort ();
+ emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
+ emit_insn (gen_cvtsd2ss (dest, dest, src));
+ }
+ DONE;
+})
+
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "")))]
"TARGET_80387 && reload_completed
- && !FP_REG_P (operands[0]) && !FP_REG_P (operands[1])"
- [(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
- "")
+ && SSE_REG_P (operands[0]) && TARGET_SSE_PARTIAL_REGS"
+ [(const_int 0)]
+{
+ rtx src, dest;
+ dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+ src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
+ /* simplify_gen_subreg refuses to widen memory references. */
+ if (GET_CODE (src) == SUBREG)
+ alter_subreg (&src);
+ if (reg_overlap_mentioned_p (operands[0], operands[1]))
+ abort ();
+ emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
+ emit_insn (gen_cvtsd2ss (dest, dest, src));
+ DONE;
+})
(define_split
[(set (match_operand:SF 0 "register_operand" "")
@@ -4491,6 +4726,22 @@
(set_attr "mode" "SF")
(set_attr "fp_int_src" "true")])
+; Avoid possible reformating penalty on the destination by first
+; zeroing it out
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 && reload_completed && TARGET_SSE_PARTIAL_REGS
+ && SSE_REG_P (operands[0])"
+ [(const_int 0)]
+{
+ rtx dest;
+ dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+ emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
+ emit_insn (gen_cvtsi2ss (dest, dest, operands[1]));
+ DONE;
+})
+
(define_expand "floatdisf2"
[(set (match_operand:SF 0 "register_operand" "")
(float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
@@ -4529,6 +4780,22 @@
(set_attr "mode" "SF")
(set_attr "fp_int_src" "true")])
+; Avoid possible reformating penalty on the destination by first
+; zeroing it out
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 && reload_completed && TARGET_SSE_PARTIAL_REGS
+ && SSE_REG_P (operands[0])"
+ [(const_int 0)]
+{
+ rtx dest;
+ dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+ emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
+ emit_insn (gen_cvtsi2ssq (dest, dest, operands[1]));
+ DONE;
+})
+
(define_insn "floathidf2"
[(set (match_operand:DF 0 "register_operand" "=f,f")
(float:DF (match_operand:HI 1 "nonimmediate_operand" "m,r")))]
@@ -9266,12 +9533,15 @@
in register. */
rtx reg = gen_reg_rtx (SFmode);
rtx dest = operands[0];
+ rtx imm = gen_lowpart (SFmode, gen_int_mode (0x80000000, SImode));
operands[1] = force_reg (SFmode, operands[1]);
operands[0] = force_reg (SFmode, operands[0]);
- emit_move_insn (reg,
- gen_lowpart (SFmode,
- gen_int_mode (0x80000000, SImode)));
+ reg = force_reg (V4SFmode,
+ gen_rtx_CONST_VECTOR (V4SFmode,
+ gen_rtvec (4, imm, CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode))));
emit_insn (gen_negsf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
@@ -9289,8 +9559,8 @@
(define_insn "negsf2_ifs"
[(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf")
- (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,x#fr,0,0")))
- (use (match_operand:SF 2 "nonmemory_operand" "x,0#x,*g#x,*g#x"))
+ (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,xm#fr,0,0")))
+ (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,*x*rm,*x*rm"))
(clobber (reg:CC 17))]
"TARGET_SSE
&& (reload_in_progress || reload_completed
@@ -9311,7 +9581,7 @@
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(neg:SF (match_operand:SF 1 "register_operand" "")))
- (use (match_operand:SF 2 "" ""))
+ (use (match_operand:V4SF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
@@ -9320,8 +9590,8 @@
(define_split
[(set (match_operand:SF 0 "register_operand" "")
- (neg:SF (match_operand:SF 1 "register_operand" "")))
- (use (match_operand:SF 2 "register_operand" ""))
+ (neg:SF (match_operand:SF 1 "nonimmediate_operand" "")))
+ (use (match_operand:V4SF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
@@ -9400,7 +9670,7 @@
{
/* Using SSE is tricky, since we need bitwise negation of -0
in register. */
- rtx reg = gen_reg_rtx (DFmode);
+ rtx reg;
#if HOST_BITS_PER_WIDE_INT >= 64
rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode);
#else
@@ -9410,7 +9680,10 @@
operands[1] = force_reg (DFmode, operands[1]);
operands[0] = force_reg (DFmode, operands[0]);
- emit_move_insn (reg, gen_lowpart (DFmode, imm));
+ imm = gen_lowpart (DFmode, imm);
+ reg = force_reg (V2DFmode,
+ gen_rtx_CONST_VECTOR (V2DFmode,
+ gen_rtvec (2, imm, CONST0_RTX (DFmode))));
emit_insn (gen_negdf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
@@ -9428,8 +9701,8 @@
(define_insn "negdf2_ifs"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,f#Yr,rm#Yf")
- (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0")))
- (use (match_operand:DF 2 "nonmemory_operand" "Y,0,*g#Y,*g#Y"))
+ (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Ym#fr,0,0")))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,*Y*rm,*Y*rm"))
(clobber (reg:CC 17))]
"!TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
@@ -9439,8 +9712,8 @@
(define_insn "*negdf2_ifs_rex64"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#f,Y#f,fm#Y")
- (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#f,0")))
- (use (match_operand:DF 2 "general_operand" "Y,0,*g#Y*r"))
+ (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0")))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,*Y*rm"))
(clobber (reg:CC 17))]
"TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
@@ -9451,7 +9724,7 @@
(define_split
[(set (match_operand:DF 0 "memory_operand" "")
(neg:DF (match_operand:DF 1 "memory_operand" "")))
- (use (match_operand:DF 2 "" ""))
+ (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
""
[(parallel [(set (match_dup 0)
@@ -9461,7 +9734,7 @@
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(neg:DF (match_operand:DF 1 "register_operand" "")))
- (use (match_operand:DF 2 "" ""))
+ (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])
&& (!TARGET_64BIT || FP_REG_P (operands[0]))"
@@ -9472,7 +9745,7 @@
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(neg:DF (match_operand:DF 1 "register_operand" "")))
- (use (match_operand:DF 2 "" ""))
+ (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
"TARGET_64BIT && reload_completed && GENERAL_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
@@ -9484,14 +9757,18 @@
(define_split
[(set (match_operand:DF 0 "register_operand" "")
- (neg:DF (match_operand:DF 1 "register_operand" "")))
- (use (match_operand:DF 2 "register_operand" ""))
+ (neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
(xor:TI (subreg:TI (match_dup 1) 0)
(subreg:TI (match_dup 2) 0)))]
{
+ operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
+ /* Avoid possible reformating on the operands. */
+ if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
+ emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
if (operands_match_p (operands[0], operands[2]))
{
rtx tmp;
@@ -9724,14 +10001,18 @@
{
/* Using SSE is tricky, since we need bitwise negation of -0
in register. */
- rtx reg = gen_reg_rtx (SFmode);
+ rtx reg = gen_reg_rtx (V4SFmode);
rtx dest = operands[0];
+ rtx imm;
operands[1] = force_reg (SFmode, operands[1]);
operands[0] = force_reg (SFmode, operands[0]);
- emit_move_insn (reg,
- gen_lowpart (SFmode,
- gen_int_mode (0x80000000, SImode)));
+ imm = gen_lowpart (SFmode, gen_int_mode(~0x80000000, SImode));
+ reg = force_reg (V4SFmode,
+ gen_rtx_CONST_VECTOR (V4SFmode,
+ gen_rtvec (4, imm, CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode))));
emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
@@ -9748,9 +10029,9 @@
"#")
(define_insn "abssf2_ifs"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,f#xr,rm#xf")
- (abs:SF (match_operand:SF 1 "nonimmediate_operand" "x,0,0")))
- (use (match_operand:SF 2 "nonmemory_operand" "*0#x,*g#x,*g#x"))
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf")
+ (abs:SF (match_operand:SF 1 "nonimmediate_operand" "0,x,0,0")))
+ (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,x*rm,x*rm"))
(clobber (reg:CC 17))]
"TARGET_SSE
&& (reload_in_progress || reload_completed
@@ -9761,7 +10042,7 @@
(define_split
[(set (match_operand:SF 0 "memory_operand" "")
(abs:SF (match_operand:SF 1 "memory_operand" "")))
- (use (match_operand:SF 2 "" ""))
+ (use (match_operand:V4SF 2 "" ""))
(clobber (reg:CC 17))]
""
[(parallel [(set (match_dup 0)
@@ -9771,7 +10052,7 @@
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(abs:SF (match_operand:SF 1 "register_operand" "")))
- (use (match_operand:SF 2 "" ""))
+ (use (match_operand:V4SF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
@@ -9780,13 +10061,22 @@
(define_split
[(set (match_operand:SF 0 "register_operand" "")
- (abs:SF (match_operand:SF 1 "register_operand" "")))
- (use (match_operand:SF 2 "register_operand" ""))
+ (abs:SF (match_operand:SF 1 "nonimmediate_operand" "")))
+ (use (match_operand:V4SF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
- (and:TI (not:TI (subreg:TI (match_dup 2) 0))
- (subreg:TI (match_dup 1) 0)))])
+ (and:TI (subreg:TI (match_dup 1) 0)
+ (subreg:TI (match_dup 2) 0)))]
+{
+ if (operands_match_p (operands[0], operands[2]))
+ {
+ rtx tmp;
+ tmp = operands[1];
+ operands[1] = operands[2];
+ operands[2] = tmp;
+ }
+})
;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
;; because of secondary memory needed to reload from class FLOAT_INT_REGS
@@ -9849,17 +10139,22 @@
{
/* Using SSE is tricky, since we need bitwise negation of -0
in register. */
- rtx reg = gen_reg_rtx (DFmode);
+ rtx reg = gen_reg_rtx (V2DFmode);
#if HOST_BITS_PER_WIDE_INT >= 64
- rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode);
+ rtx imm = gen_int_mode (~(((HOST_WIDE_INT)1) << 63), DImode);
#else
- rtx imm = immed_double_const (0, 0x80000000, DImode);
+ rtx imm = immed_double_const (~0, ~0x80000000, DImode);
#endif
rtx dest = operands[0];
operands[1] = force_reg (DFmode, operands[1]);
operands[0] = force_reg (DFmode, operands[0]);
- emit_move_insn (reg, gen_lowpart (DFmode, imm));
+
+ /* Produce LONG_DOUBLE with the proper immediate argument. */
+ imm = gen_lowpart (DFmode, imm);
+ reg = force_reg (V2DFmode,
+ gen_rtx_CONST_VECTOR (V2DFmode,
+ gen_rtvec (2, imm, CONST0_RTX (DFmode))));
emit_insn (gen_absdf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
@@ -9876,9 +10171,9 @@
"#")
(define_insn "absdf2_ifs"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr,mr#Yf")
- (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0,0")))
- (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y,*g#Y"))
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr,mr#Yf")
+ (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y,0,0")))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Y*rm,Y*rm"))
(clobber (reg:CC 17))]
"!TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
@@ -9887,9 +10182,9 @@
"#")
(define_insn "*absdf2_ifs_rex64"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr")
- (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0")))
- (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y"))
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr")
+ (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Ym,0")))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,*0,*Y*rm"))
(clobber (reg:CC 17))]
"TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
@@ -9900,7 +10195,7 @@
(define_split
[(set (match_operand:DF 0 "memory_operand" "")
(abs:DF (match_operand:DF 1 "memory_operand" "")))
- (use (match_operand:DF 2 "" ""))
+ (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
""
[(parallel [(set (match_dup 0)
@@ -9910,7 +10205,7 @@
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(abs:DF (match_operand:DF 1 "register_operand" "")))
- (use (match_operand:DF 2 "" ""))
+ (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
@@ -9919,13 +10214,26 @@
(define_split
[(set (match_operand:DF 0 "register_operand" "")
- (abs:DF (match_operand:DF 1 "register_operand" "")))
- (use (match_operand:DF 2 "register_operand" ""))
+ (abs:DF (match_operand:DF 1 "nonimmediate_operand" "")))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
- (and:TI (not:TI (subreg:TI (match_dup 2) 0))
- (subreg:TI (match_dup 1) 0)))])
+ (and:TI (subreg:TI (match_dup 1) 0)
+ (subreg:TI (match_dup 2) 0)))]
+{
+ operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
+ /* Avoid possible reformating on the operands. */
+ if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
+ emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
+ if (operands_match_p (operands[0], operands[2]))
+ {
+ rtx tmp;
+ tmp = operands[1];
+ operands[1] = operands[2];
+ operands[2] = tmp;
+ }
+})
;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
@@ -16547,6 +16855,12 @@
(clobber (reg:CC 17))]
"TARGET_SSE
&& (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)
+ /* Avoid combine from being smart and converting min/max
+ instruction patterns into conditional moves. */
+ && ((GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != GT
+ && GET_CODE (operands[1]) != UNLE && GET_CODE (operands[1]) != UNGE)
+ || !rtx_equal_p (operands[4], operands[2])
+ || !rtx_equal_p (operands[5], operands[3]))
&& (!TARGET_IEEE_FP
|| (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))"
"#")
@@ -16574,6 +16888,12 @@
(clobber (reg:CC 17))]
"TARGET_SSE2
&& (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)
+ /* Avoid combine from being smart and converting min/max
+ instruction patterns into conditional moves. */
+ && ((GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != GT
+ && GET_CODE (operands[1]) != UNLE && GET_CODE (operands[1]) != UNGE)
+ || !rtx_equal_p (operands[4], operands[2])
+ || !rtx_equal_p (operands[5], operands[3]))
&& (!TARGET_IEEE_FP
|| (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))"
"#")
@@ -16635,6 +16955,14 @@
(set (subreg:TI (match_dup 0) 0) (ior:TI (subreg:TI (match_dup 6) 0)
(subreg:TI (match_dup 7) 0)))]
{
+ if (GET_MODE (operands[2]) == DFmode
+ && TARGET_SSE_PARTIAL_REGS && !optimize_size)
+ {
+ rtx op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
+ emit_insn (gen_sse2_unpcklpd (op, op, op));
+ op = gen_rtx_SUBREG (V2DFmode, operands[3], 0);
+ emit_insn (gen_sse2_unpcklpd (op, op, op));
+ }
/* If op2 == op3, op3 will be clobbered before it is used.
This should be optimized out though. */
if (operands_match_p (operands[2], operands[3]))
@@ -16743,6 +17071,20 @@
(set (subreg:TI (match_dup 0) 0) (and:TI (match_dup 6)
(subreg:TI (match_dup 7) 0)))]
{
+ if (TARGET_SSE_PARTIAL_REGS && !optimize_size
+ && GET_MODE (operands[2]) == DFmode)
+ {
+ if (REG_P (operands[2]))
+ {
+ rtx op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
+ emit_insn (gen_sse2_unpcklpd (op, op, op));
+ }
+ if (REG_P (operands[3]))
+ {
+ rtx op = gen_rtx_SUBREG (V2DFmode, operands[3], 0);
+ emit_insn (gen_sse2_unpcklpd (op, op, op));
+ }
+ }
PUT_MODE (operands[1], GET_MODE (operands[0]));
if (!sse_comparison_operator (operands[1], VOIDmode))
{
@@ -17849,28 +18191,93 @@
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
(match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE"
- ;; @@@ let's try to use movaps here.
"movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF")])
+(define_split
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
+ "TARGET_SSE"
+ [(set (match_dup 0)
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF (match_dup 1))
+ (match_dup 2)
+ (const_int 1)))]
+{
+ operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
+ operands[2] = CONST0_RTX (V4SFmode);
+})
+
(define_insn "movv4si_internal"
[(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
(match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE"
- ;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
+{
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "1")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "TI")))])
(define_insn "movv2di_internal"
[(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,m")
(match_operand:V2DI 1 "nonimmediate_operand" "xm,x"))]
- "TARGET_SSE"
- ;; @@@ let's try to use movaps here.
- "movdqa\t{%1, %0|%0, %1}"
+ "TARGET_SSE2"
+{
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "1")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "TI")))])
+
+(define_split
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
+ "TARGET_SSE2"
+ [(set (match_dup 0)
+ (vec_merge:V2DF
+ (vec_duplicate:V2DF (match_dup 1))
+ (match_dup 2)
+ (const_int 1)))]
+{
+ operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
+ operands[2] = CONST0_RTX (V2DFmode);
+})
(define_insn "movv8qi_internal"
[(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
@@ -17920,28 +18327,85 @@
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
(match_operand:V2DF 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE2"
- ;; @@@ let's try to use movaps here.
- "movapd\t{%1, %0|%0, %1}"
+{
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movapd\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "ssemov")
- (set_attr "mode" "V2DF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "V2DF"))
+ (eq_attr "alternative" "1")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "V2DF"))]
+ (const_string "V2DF")))])
(define_insn "movv8hi_internal"
[(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
(match_operand:V8HI 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE2"
- ;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
+{
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "1")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "TI")))])
(define_insn "movv16qi_internal"
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
(match_operand:V16QI 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE2"
- ;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
+{
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "1")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "TI")))])
(define_expand "movv2df"
[(set (match_operand:V2DF 0 "general_operand" "")
@@ -18158,26 +18622,83 @@
[(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
(match_operand:TI 1 "general_operand" "C,xm,x"))]
"TARGET_SSE && !TARGET_64BIT"
- "@
- xorps\t%0, %0
- movaps\t{%1, %0|%0, %1}
- movaps\t{%1, %0|%0, %1}"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "xorps\t%0, %0";
+ else
+ return "pxor\t%0, %0";
+ case 1:
+ case 2:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+ default:
+ abort ();
+ }
+}
[(set_attr "type" "ssemov,ssemov,ssemov")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0,1")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "2")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "TI")))])
(define_insn "*movti_rex64"
- [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
- (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,x,xm")
+ (match_operand:TI 1 "general_operand" "riFo,riF,O,xm,x"))]
"TARGET_64BIT
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
- "@
- #
- #
- xorps\t%0, %0
- movaps\\t{%1, %0|%0, %1}
- movaps\\t{%1, %0|%0, %1}"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return "#";
+ case 2:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "xorps\t%0, %0";
+ else
+ return "pxor\t%0, %0";
+ case 3:
+ case 4:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+ default:
+ abort ();
+ }
+}
[(set_attr "type" "*,*,ssemov,ssemov,ssemov")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "2,3")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "4")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "DI")))])
(define_split
[(set (match_operand:TI 0 "nonimmediate_operand" "")
@@ -18327,11 +18848,21 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
-(define_insn "sse_loadss"
+(define_expand "sse_loadss"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:SF 1 "memory_operand" "")]
+ "TARGET_SSE"
+{
+ emit_insn (gen_sse_loadss_1 (operands[0], operands[1],
+ CONST0_RTX (V4SFmode)));
+ DONE;
+})
+
+(define_insn "sse_loadss_1"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
- (match_operand:V4SF 1 "memory_operand" "m")
- (vec_duplicate:V4SF (float:SF (const_int 0)))
+ (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m"))
+ (match_operand:V4SF 2 "const0_operand" "X")
(const_int 1)))]
"TARGET_SSE"
"movss\t{%1, %0|%0, %1}"
@@ -18854,12 +19385,26 @@
;; this insn.
(define_insn "sse_clrv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (unspec:V4SF [(const_int 0)] UNSPEC_NOP))]
+ (match_operand:V4SF 1 "const0_operand" "X"))]
"TARGET_SSE"
- "xorps\t{%0, %0|%0, %0}"
+{
+ if (get_attr_mode (insn) == MODE_TI)
+ return "pxor\t{%0, %0|%0, %0}";
+ else
+ return "xorps\t{%0, %0|%0, %0}";
+}
[(set_attr "type" "sselog")
(set_attr "memory" "none")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (if_then_else
+ (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (ne (symbol_ref "TARGET_SSE2")
+ (const_int 0)))
+ (eq (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "TI")
+ (const_string "V4SF")))])
;; Use xor, but don't show input operands so they aren't live before
;; this insn.
@@ -19091,6 +19636,18 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "SF")])
+(define_insn "cvtsi2ssq"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (const_int 14)))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvtsi2ssq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SF")])
+
(define_insn "cvtss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
(vec_select:SI
@@ -20718,7 +21275,7 @@
(vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
(vec_duplicate:V4SF
(float_truncate:V2SF
- (match_operand:V2DF 2 "register_operand" "xm")))
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
(const_int 14)))]
"TARGET_SSE2"
"cvtsd2ss\t{%2, %0|%0, %2}"
@@ -20730,7 +21287,7 @@
(vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0")
(float_extend:V2DF
(vec_select:V2SF
- (match_operand:V4SF 2 "register_operand" "xm")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
(const_int 1)])))
(const_int 2)))]
@@ -21006,10 +21563,20 @@
(define_insn "sse2_clrti"
[(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))]
"TARGET_SSE2"
- "pxor\t{%0, %0|%0, %0}"
- [(set_attr "type" "sseiadd")
+{
+ if (get_attr_mode (insn) == MODE_TI)
+ return "pxor\t%0, %0";
+ else
+ return "xorps\t%0, %0";
+}
+ [(set_attr "type" "ssemov")
(set_attr "memory" "none")
- (set_attr "mode" "TI")])
+ (set (attr "mode")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI")))])
;; MMX unsigned averages/sum of absolute differences
@@ -21714,11 +22281,21 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "V2DF")])
-(define_insn "sse2_loadsd"
+(define_expand "sse2_loadsd"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand:DF 1 "memory_operand" "")]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1],
+ CONST0_RTX (V2DFmode)));
+ DONE;
+})
+
+(define_insn "sse2_loadsd_1"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_merge:V2DF
- (match_operand:DF 1 "memory_operand" "m")
- (vec_duplicate:DF (float:DF (const_int 0)))
+ (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m"))
+ (match_operand:V2DF 2 "const0_operand" "X")
(const_int 1)))]
"TARGET_SSE2"
"movsd\t{%1, %0|%0, %1}"
diff --git a/gcc/config/i386/scodbx.h b/gcc/config/i386/scodbx.h
deleted file mode 100644
index 7da93053256..00000000000
--- a/gcc/config/i386/scodbx.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/* Definitions for Intel 386 running SCO Unix System V,
- using dbx-in-coff encapsulation.
- Copyright (C) 1992, 1995, 1996, 1999 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING. If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA. */
-
-#include "i386/svr3dbx.h"
-
-/* Overridden defines for SCO systems from sco.h. */
-
-/* By default, target has a 80387, uses IEEE compatible arithmetic,
- and returns float values in the 387, ie,
- (TARGET_80387 | TARGET_FLOAT_RETURNS_IN_80387)
-
- SCO's software emulation of a 387 fails to handle the `fucomp'
- opcode. fucomp is only used when generating IEEE compliant code.
- So don't make TARGET_IEEE_FP default for SCO. */
-
-#undef TARGET_SUBTARGET_DEFAULT
-#define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_FLOAT_RETURNS)
-
-/* Use crt1.o as a startup file and crtn.o as a closing file. */
-
-#undef STARTFILE_SPEC
-#define STARTFILE_SPEC \
- "%{!r:%{!z:svr3.ifile%s}%{z:svr3z.ifile%s}}\
- %{pg:gcrt1.o%s}%{!pg:%{p:mcrt1.o%s}%{!p:crt1.o%s}}"
-
-/* Library spec, including SCO international language support. */
-
-#undef LIB_SPEC
-#define LIB_SPEC \
- "%{p:-L/usr/lib/libp}%{pg:-L/usr/lib/libp} %{scointl:libintl.a%s} -lc"
-
-/* Specify predefined symbols in preprocessor. */
-
-#undef CPP_PREDEFINES
-#define CPP_PREDEFINES "-Dunix -DM_UNIX -DM_I386 -DM_COFF -DM_WORDSWAP -Asystem=svr3"
-
-#undef CPP_SPEC
-#define CPP_SPEC "%(cpp_cpu) %{scointl:-DM_INTERNAT}"
-
-/* This spec is used for telling cpp whether char is signed or not. */
-
-#undef SIGNED_CHAR_SPEC
-#if DEFAULT_SIGNED_CHAR
-#define SIGNED_CHAR_SPEC \
- "%{funsigned-char:-D__CHAR_UNSIGNED__ -D_CHAR_UNSIGNED}"
-#else
-#define SIGNED_CHAR_SPEC \
- "%{!fsigned-char:-D__CHAR_UNSIGNED__ -D_CHAR_UNSIGNED}"
-#endif
-
-/* caller has to pop the extra argument passed to functions that return
- structures. */
-
-#undef RETURN_POPS_ARGS
-#define RETURN_POPS_ARGS(FUNDECL,FUNTYPE,SIZE) \
- ((FUNDECL) && TREE_CODE (FUNDECL) == IDENTIFIER_NODE ? 0 \
- : (TARGET_RTD \
- && (TYPE_ARG_TYPES (FUNTYPE) == 0 \
- || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (FUNTYPE))) \
- == void_type_node))) ? (SIZE) \
- : 0)
-/* On other 386 systems, the last line looks like this:
- : (aggregate_value_p (TREE_TYPE (FUNTYPE))) ? GET_MODE_SIZE (Pmode) : 0) */
-
-/* Handle #pragma pack. */
-#define HANDLE_SYSV_PRAGMA
diff --git a/gcc/config/i386/t-darwin b/gcc/config/i386/t-darwin
new file mode 100644
index 00000000000..51285570d68
--- /dev/null
+++ b/gcc/config/i386/t-darwin
@@ -0,0 +1,11 @@
+darwin.o: $(srcdir)/config/darwin.c $(CONFIG_H) $(SYSTEM_H) $(RTL_BASE_H) \
+ $(REGS_H) hard-reg-set.h insn-config.h conditions.h output.h \
+ insn-attr.h flags.h $(TREE_H) $(EXPR_H) reload.h \
+ function.h $(GGC_H) $(TM_P_H) gt-darwin.h
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+darwin-c.o: $(srcdir)/config/darwin-c.c $(CONFIG_H) $(SYSTEM_H) \
+ $(TREE_H) $(C_TREE_H) c-pragma.h toplev.h cpplib.h $(TM_P_H)
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+gt-darwin.h : s-gtype ; @true
diff --git a/gcc/config/i386/xm-dgux.h b/gcc/config/i386/xm-dgux.h
deleted file mode 100644
index 881c5c7be9d..00000000000
--- a/gcc/config/i386/xm-dgux.h
+++ /dev/null
@@ -1,4 +0,0 @@
-/* Configuration for GCC for Intel i386 running DG/ux */
-
-/* looks just like sysv4 for now */
-#include "xm-svr4.h"
diff --git a/gcc/config/i386/xm-sun.h b/gcc/config/i386/xm-sun.h
deleted file mode 100644
index 6c0f0a25630..00000000000
--- a/gcc/config/i386/xm-sun.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Configuration for GNU C-compiler for Intel 80386 running SunOS 4.0.
- Copyright (C) 1988, 1997 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING. If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA. */
-
-#define USG
diff --git a/gcc/config/i386/xm-sysv3.h b/gcc/config/i386/xm-sysv3.h
deleted file mode 100644
index 9a655443ff5..00000000000
--- a/gcc/config/i386/xm-sysv3.h
+++ /dev/null
@@ -1,3 +0,0 @@
-/* Configuration for GCC for Intel i386 running System V Release 3. */
-
-#include "xm-svr3.h"
diff --git a/gcc/testsuite/g++.old-deja/g++.robertl/eb42.C b/gcc/testsuite/g++.old-deja/g++.robertl/eb42.C
deleted file mode 100644
index c27aa8d2df7..00000000000
--- a/gcc/testsuite/g++.old-deja/g++.robertl/eb42.C
+++ /dev/null
@@ -1,19 +0,0 @@
-//Build don't link:
-#include <vector>
-#include <algorithm>
-
-template <class T> class Expr
-{
-public :
-Expr(){};
-Expr(const T&){};
-};
-
-template <class T >
-inline bool compare(const Expr<T> a, const Expr<T> b){ return true; };
-
-int main()
-{
- std::vector<int> a(3);
- std::sort( a.begin(), a.end(), compare ); // ERROR - no matching function
-}