From de824608e40e2fe2b72a0709bdaedecfab8f093e Mon Sep 17 00:00:00 2001 From: "(no author)" <(no author)@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Wed, 13 Nov 2002 11:19:59 +0000 Subject: This commit was manufactured by cvs2svn to create tag 'hammer-3_3-merge-20021211'. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/tags/hammer-3_3-merge-20021211@59074 138bc75d-0d04-0410-961f-82ee72b054a4 --- config/mpw-mh-mpw | 157 +++++ config/mpw/ChangeLog | 53 ++ config/mpw/MoveIfChange | 19 + config/mpw/README | 23 + config/mpw/forward-include | 3 + config/mpw/g-mpw-make.sed | 293 +++++++++ config/mpw/mpw-touch | 7 + config/mpw/mpw-true | 1 + config/mpw/null-command | 1 + config/mpw/open-brace | 4 + config/mpw/tr-7to8-src | 9 + config/mpw/true | 1 + gcc/ChangeLog.hammer | 109 ++++ gcc/config/i386/athlon.md | 573 ++++++++++++------ gcc/config/i386/i386.c | 117 ++-- gcc/config/i386/i386.h | 10 + gcc/config/i386/i386.md | 821 ++++++++++++++++++++++---- gcc/config/i386/scodbx.h | 84 --- gcc/config/i386/t-darwin | 11 + gcc/config/i386/xm-dgux.h | 4 - gcc/config/i386/xm-sun.h | 21 - gcc/config/i386/xm-sysv3.h | 3 - gcc/testsuite/g++.old-deja/g++.robertl/eb42.C | 19 - 23 files changed, 1885 insertions(+), 458 deletions(-) create mode 100644 config/mpw-mh-mpw create mode 100644 config/mpw/ChangeLog create mode 100644 config/mpw/MoveIfChange create mode 100644 config/mpw/README create mode 100644 config/mpw/forward-include create mode 100644 config/mpw/g-mpw-make.sed create mode 100644 config/mpw/mpw-touch create mode 100644 config/mpw/mpw-true create mode 100644 config/mpw/null-command create mode 100644 config/mpw/open-brace create mode 100644 config/mpw/tr-7to8-src create mode 100644 config/mpw/true create mode 100644 gcc/ChangeLog.hammer delete mode 100644 gcc/config/i386/scodbx.h create mode 100644 gcc/config/i386/t-darwin delete mode 100644 gcc/config/i386/xm-dgux.h delete mode 100644 gcc/config/i386/xm-sun.h delete mode 100644 gcc/config/i386/xm-sysv3.h delete mode 100644 gcc/testsuite/g++.old-deja/g++.robertl/eb42.C diff --git a/config/mpw-mh-mpw b/config/mpw-mh-mpw new file mode 100644 index 00000000000..543ef4fb2a1 --- /dev/null +++ b/config/mpw-mh-mpw @@ -0,0 +1,157 @@ +# This is an MPW makefile fragment. + +# Since there are a multiplicity of Mac compilers and two different +# processors, this file is primarily a library of options for each +# compiler. Somebody else (such as a configure or build script) will +# make the actual choice. + +# Compiler to use for compiling. + +CC_MPW_C = C -d MPW_C -d ALMOST_STDC -d ANSI_PROTOTYPES -d MPW -mc68020 -model far -b -w + +CC_SC = SC -d ALMOST_STDC -d ANSI_PROTOTYPES -d MPW -mc68020 -model far -b -i '' -i : + +CC_MWC68K = MWC68K -d MPW -enum int -mpw_chars -sym on -w off -mc68020 -model far + +CC_PPCC = PPCC -d powerc=1 -d pascal= -d ALMOST_STDC -d ANSI_PROTOTYPES -d MPW -w + +CC_MRC = MrC -d powerc=1 -d pascal= -d ALMOST_STDC -d ANSI_PROTOTYPES -d MPW -i '' -i : -jm + +CC_SMrC = SMrC -d MPW + +# "-mpw_chars" is necessary because GNU sources often mix signed and +# unsigned casually. +# "-w off" is not a great idea, but CW7 is complaining about enum +# assignments. +# "-opt global,peep,l4,speed" is sometimes good, and sometimes bad. +# We must use {CIncludes} so that MPW tools will work; {MWCIncludes} +# defines stdout, islower, etc, in ways that are incompatible with MPW's +# runtime. However, this cannot be done via -i "{CIncludes}", since +# that does not affect how <>-type includes happen; instead, the variable +# MWCIncludes must be set to point at {CIncludes}. + +CC_MWCPPC = MWCPPC -d MPW -enum int -mpw_chars -sym on -w off + +# Note that GCC does *not* wire in a definition of "pascal", so that +# it can be handled in another way if desired. + +CC_68K_GCC = gC -Dpascal= -DANSI_PROTOTYPES -DMPW + +CC_PPC_GCC = gC -Dpowerc=1 -Dpascal= -DANSI_PROTOTYPES -DMPW + +# Nothing for the default CFLAGS. + +CFLAGS = + +# Tool to use for making libraries/archives. + +AR_LIB = Lib + +AR_MWLINK68K = MWLink68K -xm library + +AR_PPCLINK = PPCLink -xm library + +AR_MWLINKPPC = MWLinkPPC -xm library + +AR_AR = ar + +AR_FLAGS = -o + +RANLIB_NULL = null-command + +RANLIB_RANLIB = ranlib + +# Compiler and/or linker to use for linking. + +CC_LD_LINK = Link -w -d -model far {CC_LD_TOOL_FLAGS} + +CC_LD_MWLINK68K = MWLink68K -w {CC_LD_TOOL_FLAGS} -sym on -model far + +CC_LD_PPCLINK = PPCLink -main __start -outputformat xcoff + +CC_LD_MWLINKPPC = MWLinkPPC -w {CC_LD_TOOL_FLAGS} -sym on + +CC_LD_GLD = gC + +# Extension for linker output. + +PROG_EXT_68K = + +PROG_EXT_XCOFF = .xcoff + +# Nothing for the default LDFLAGS. + +LDFLAGS = -w + +CC_LD_TOOL_FLAGS = -c 'MPS ' -t MPST + +# Libraries to link against. + +# It would appear that the math libraries are not +# needed except to provide a definition for scalb, +# which is called from ldexp, which is referenced +# in the m68k opcodes library. + +EXTRALIBS_C = \Option-d + "{CLibraries}"StdClib.o \Option-d + "{CLibraries}"Math.o \Option-d + "{CLibraries}"CSANELib.o \Option-d + "{Libraries}"Stubs.o \Option-d + "{Libraries}"Runtime.o \Option-d + "{Libraries}"Interface.o \Option-d + "{Libraries}"ToolLibs.o + +EXTRALIBS_MWC68K = \Option-d + "{CLibraries}"StdClib.o \Option-d + "{CLibraries}"Math.o \Option-d + "{CLibraries}"CSANELib.o \Option-d + "{Libraries}"Stubs.o \Option-d + "{Libraries}"Runtime.o \Option-d + "{Libraries}"Interface.o \Option-d + "{Libraries}"ToolLibs.o \Option-d + "{MW68KLibraries}MPW ANSI (4i) C.68K.Lib" + +EXTRALIBS_PPC_XCOFF = \Option-d + "{PPCLibraries}"StdCRuntime.o \Option-d + "{PPCLibraries}"InterfaceLib.xcoff \Option-d + "{PPCLibraries}"MathLib.xcoff \Option-d + "{PPCLibraries}"StdCLib.xcoff \Option-d + "{PPCLibraries}"PPCToolLibs.o \Option-d + "{PPCLibraries}"PPCCRuntime.o \Option-d + "{GCCPPCLibraries}"libgcc.xcoff + +EXTRALIBS_PPC = \Option-d + "{PPCLibraries}"StdCRuntime.o \Option-d + "{SharedLibraries}"InterfaceLib \Option-d + "{SharedLibraries}"MathLib \Option-d + "{SharedLibraries}"StdCLib \Option-d + "{PPCLibraries}"PPCToolLibs.o \Option-d + "{PPCLibraries}"PPCCRuntime.o \Option-d + "{GCCPPCLibraries}"libgcc.xcoff + +EXTRALIBS_MWCPPC = \Option-d + "{MWPPCLibraries}"MWStdCRuntime.Lib \Option-d + "{MWPPCLibraries}"InterfaceLib \Option-d + "{MWPPCLibraries}"StdCLib \Option-d + "{MWPPCLibraries}"MathLib \Option-d + "{MWPPCLibraries}"PPCToolLibs.o + +# Tool to make PEF with, if needed. + +MAKEPEF_NULL = null-command + +MAKEPEF_PPC = MakePEF + +MAKEPEF_FLAGS = \Option-d + -l InterfaceLib.xcoff=InterfaceLib \Option-d + -l MathLib.xcoff=MathLib \Option-d + -l StdCLib.xcoff=StdCLib + +MAKEPEF_TOOL_FLAGS = -ft MPST -fc 'MPS ' + +# Resource compiler to use. + +REZ_68K = Rez + +REZ_PPC = Rez -d WANT_CFRG + diff --git a/config/mpw/ChangeLog b/config/mpw/ChangeLog new file mode 100644 index 00000000000..3cdefbf7a75 --- /dev/null +++ b/config/mpw/ChangeLog @@ -0,0 +1,53 @@ +Tue Nov 26 12:34:12 1996 Stan Shebs + + * g-mpw-make.sed: Fix some comments. + +Mon Sep 16 14:42:52 1996 Stan Shebs + + * g-mpw-make.sed (HLDENV): Edit out all references. + +Thu Aug 15 19:49:23 1996 Stan Shebs + + * true: New script, identical to mpw-true. + * g-mpw-make.sed: Add @DASH_C_FLAG@ and @SEGMENT_FLAG()@ + to the editors for compile commands. + +Thu Aug 1 15:01:42 1996 Stan Shebs + + * mpw-true, mpw-touch, null-command: New scripts. + * README: Describe usage in more detail. + +Tue Dec 12 14:51:51 1995 Stan Shebs + + * g-mpw-make.sed: Don't edit out "version=" occurrences. + +Fri Dec 1 11:46:18 1995 Stan Shebs + + * g-mpw-make.sed (bindir, libdir): Edit the positions of + pathname separators to work with other pathnames better. + +Tue Nov 7 15:08:07 1995 Stan Shebs + + * g-mpw-make.sed: Add comment about Duplicate vs Catenate, + add additional pattern for editing link-compile commands. + +Tue Oct 24 14:28:51 1995 Stan Shebs + + * g-mpw-make.sed: Add handling for *.tab.[hc] files. + (CHILL_FOR_TARGET, CHILL_LIB): Edit out tricky definitions + of these. + +Thu Sep 28 21:05:10 1995 Stan Shebs + + * g-mpw-make.sed: New file, generic sed commands to translate + Unix makefiles into MPW makefile syntax. + +Fri Mar 17 11:51:20 1995 Stan Shebs + + * README: Clarify instructions. + * fi: Remove. + +Wed Dec 21 15:45:53 1994 Stan Shebs + + * MoveIfChange, README, fi, forward-include, open-brace, + tr-7to8-src: New files. diff --git a/config/mpw/MoveIfChange b/config/mpw/MoveIfChange new file mode 100644 index 00000000000..0dbc12582f5 --- /dev/null +++ b/config/mpw/MoveIfChange @@ -0,0 +1,19 @@ +# Rename a file only if it is different from a previously existing +# file of the same name. This is useful for keeping make from doing +# too much work if the contents of a file haven't changed. + +# This is an MPW translation of the standard GNU sh script move-if-change. + +Set exit 0 + +If "`exists -f "{2}"`" + Compare "{1}" "{2}" >dev:null + If {status} != 0 + Rename -y "{1}" "{2}" + Else + Echo "{2}" is unchanged + Delete -i -y "{1}" + End +Else + Rename -y "{1}" "{2}" +End diff --git a/config/mpw/README b/config/mpw/README new file mode 100644 index 00000000000..554700adc81 --- /dev/null +++ b/config/mpw/README @@ -0,0 +1,23 @@ +This directory contains MPW scripts and related files that are needed to +build Cygnus GNU tools for MPW. The scripts should be somewhere on the +command path; our usual practice has been to have a separate directory +for the scripts, and put the tools (byacc, flex, and sed at least) there +also; then it's easier to drag the support bits around as a group, or to +upgrade MPW versions. The complete package of scripts and tool binaries +is usually available as pub/mac/buildtools.cpt.hqx on ftp.cygnus.com. + +"tr-7to8-src" is actually the source to an MPW script that transforms +sequences like "\Option-d" into the actual 8-bit chars that MPW needs. +It's only the source because it can't itself include any 8-bit chars. +It *can* be processed into a genuine "tr-7to8" by using itself: + + tr-7to8 tr-7to8-src | sed -e 's/Src//' >new-tr-7to8 + +Use this to verify: + + compare tr-7to8 new-tr-7to8 + +If you don't have a working tr-7to8, then you will have to manually +replace all occurrences of "\Option-d" with real Option-d (which looks +like a delta), then do similarly with all the other "\Option-..." +strings, and then change "\SrcOption-d" into the string "\Option-d". diff --git a/config/mpw/forward-include b/config/mpw/forward-include new file mode 100644 index 00000000000..ddd6bd71105 --- /dev/null +++ b/config/mpw/forward-include @@ -0,0 +1,3 @@ +Echo '#include' ¶""{1}"¶" >"{2}".tem +MoveIfChange "{2}".tem "{2}" + diff --git a/config/mpw/g-mpw-make.sed b/config/mpw/g-mpw-make.sed new file mode 100644 index 00000000000..e7d3c770736 --- /dev/null +++ b/config/mpw/g-mpw-make.sed @@ -0,0 +1,293 @@ +# Sed commands to translate Unix makefiles into MPW makefiles. +# These are nominally generic, but work best on the makefiles used +# for GNU programs. + +# Whack out any commented-out lines that are probably commands; +# they can only cause trouble later on. +/^# /d + +# Change dependency char. +/:$/s/:/ \\Option-f/g +/^[^ :#][^:]*:/s/\([ ]*\):\([ ]*\)/ \\Option-f /g + +# Change syntax of Makefile vars. +/\$/s/\${\([a-zA-Z0-9_-]*\)}/{\1}/g +/\$/s/\$(\([a-zA-Z0-9_-]*\))/{\1}/g +/ $@/s/ $@/ {Targ}/ + +# Double-$ are literals to Unix but not to MPW make. +/\$\$/s/\$\$/$/g + +# Change pathname syntax. +/\//s,\.\./\/\.\./,:::,g +/\//s,\.\./,::,g +/\.\//s,\./,:,g +/\//s,/,:,g +# Undo excess changes. +/and/s,and:or$,and/or, +/and/s,and:or ,and/or , +/want/s,want:need,want/need, +# Fixing up sed commands. +/-e/s_":\([^:]*\):d"_"/\1/d"_g +/-e/s_":\([^:]*\):,:\([^:]*\):d"_"/\1/,/\2/d"_g + +/=/s/ = \.$/ = :/ + +# Make these go away so that later edits not confused. +/HLDENV/s/{HLDENV}// + +# Comment out any explicit srcdir setting. +/srcdir/s/^srcdir/# srcdir/ + +/BASEDIR/s/^BASEDIR =.*$/BASEDIR = "{srcroot}"/ +/{BASEDIR}:/s/{BASEDIR}:/{BASEDIR}/g +/{srcdir}:/s/{srcdir}:/"{srcdir}"/g +/"{srcdir}":/s/"{srcdir}":/"{srcdir}"/g + +# Tweak some conventions that are backwards for the Mac. +/bindir/s/{exec_prefix}:bin/{exec_prefix}bin:/ +/libdir/s/{exec_prefix}:lib/{exec_prefix}lib:/ + +# Comment out settings of anything set by mpw host config. +/CC/s/^CC *=/#CC =/ +/CFLAGS/s/^CFLAGS *=/#CFLAGS =/ +/AR/s/^AR *=/#AR =/ +/AR_FLAGS/s/^AR_FLAGS *=/#AR_FLAGS =/ +/RANLIB/s/^RANLIB *=/#RANLIB =/ +/CC_LD/s/^CC_LD *=/#CC_LD =/ +/LDFLAGS/s/^LDFLAGS *=/#LDFLAGS =/ + +# Change -I usages. +/-I/s/-I\./-i :/g +/-I/s/-I::bfd/-i ::bfd:/g +/-I/s/-I::include/-i ::include:/g +/-I/s/-I/-i /g + +# Change -D usage. +/-D/s/\([ =]\)-D\([^ ]*\)/\1-d \2/g + +# Change continuation char. +/\\$/s/\\$/\\Option-d/ + +# Change wildcard char. +/\*/s/\*/\\Option-x/g + +# Change path of various types of source files. This rule does not allow +# for file names with multiple dots in the name. +/\.[chly]/s/\([ ><=]\)\([-a-zA-Z0-9_${}:"]*\)\.\([chly]\)/\1"{s}"\2.\3/g +/\.[chly]/s/^\([-a-zA-Z0-9_${}:"]*\)\.\([chly]\)/"{s}"\1.\2/ +# Allow files named *.tab.[ch] as a special case. +/\.tab\.[ch]/s/\([ ><=]\)\([-a-zA-Z0-9_${}:"]*\.tab\)\.\([ch]\)/\1"{s}"\2.\3/g +/\.tab\.[ch]/s/^\([-a-zA-Z0-9_${}:"]*\.tab\)\.\([ch]\)/"{s}"\1.\2/ +# Fix some overenthusiasms. +/{s}/s/"{s}""{srcdir}"/"{srcdir}"/g +/{s}/s/"{s}"{\([a-zA-Z0-9_]*\)dir}/"{\1dir}"/g +/{s}/s/"{s}"{\([a-zA-Z0-9_]*\)DIR}/"{\1DIR}"/g +/{s}/s/"{s}""{\([a-zA-Z0-9_]*\)dir}"/"{\1dir}"/g +/{s}/s/"{s}""{\([a-zA-Z0-9_]*\)DIR}"/"{\1DIR}"/g +/{s}/s/"{s}":/:/g +/{s}/s/^"{s}"//g +/{s}/s/"{s}""{s}"/"{s}"/g +/{s}/s/"{s}""{srcdir}"/"{s}"/g +/{s}/s/"{srcdir}""{s}"/"{s}"/g + +# The .def files are also typically source files. +/\.def/s/\([ ><]\)\([-a-zA-Z0-9_${}:"]*\)\.def/\1"{s}"\2.def/g +/\.def/s/^\([-a-zA-Z0-9_${}:"]*\)\.def/"{s}"\1.def/g + +# Change extension and path of objects. +/\.o/s/\([ =]\)\([-a-zA-Z0-9_${}:"]*\)\.o/\1"{o}"\2.c.o/g +/\.o/s/^\([-a-zA-Z0-9_${}:"]*\)\.o/"{o}"\1.c.o/ +# Allow *.tab.o files as a special case of a 2-dot-name file. +/\.o/s/\([ =]\)\([-a-zA-Z0-9_${}:"]*\)\.tab\.o/\1"{o}"\2.tab.c.o/g +/\.o/s/^\([-a-zA-Z0-9_${}:"]*\)\.tab\.o/"{o}"\1.tab.c.o/ +# Clean up. +/"{o}"/s/"{o}""{o}"/"{o}"/g +/"{o}"/s/^"{o}"\([a-zA-Z0-9_]*\)=/\1=/ + +# Change extension of libs. +/\.a/s/lib\([a-z]*\)\.a/lib\1.o/g + +# Remove non-fail option. +/-/s/^\([ ]*\)-/\1/ +# Fix overeagernesses - assumes no one-letter commands. +/^[ ]*[a-z] /s/^\([ ]*\)\([a-z]\) /\1-\2 / + +# Remove non-echo option. (watch out for autoconf things) +/@/s/^\([ ]*\)@/\1/ + +# Change cp to Duplicate. +# Catenate is perhaps more accurate, but the pattern would have to +# identify the output file and add a '>' redirection into it. +/cp/s/^\([ ]*\)cp /\1Duplicate -d -y / +# Change mv to Rename. +/mv/s/^\([ ]*\)mv /\1Rename -y / +/Rename/s/^\([ ]*\)Rename -y -f/\1Rename -y/ +# Change rm to Delete. +/rm -rf/s/^\([ ]*\)rm -rf /\1Delete -i -y / +/rm -f/s/^\([ ]*\)rm -f /\1Delete -i -y / +/rm/s/^\([ ]*\)rm /\1Delete -i -y / +# Note that we don't mess with ln - directory-specific scripts +# must decide what to do with symlinks. +# Change cat to Catenate. +/cat/s/^\([ ]*\)cat /\1Catenate / +# Change touch to mpw-touch. +/touch/s/^\([ ]*\)touch /\1mpw-touch / +# Change mkdir to NewFolder. +/mkdir/s/^\([ ]*\)mkdir /\1NewFolder / +# Change var setting to Set. +/=/s/^\([ ]*\)\([-a-zA-Z0-9_]*\)=\([^;]*\); \\Option-d/\1Set \2 \3/ + +# Change tests. +/if /s/if \[ *-f \([^ ]*\) ] *; *\\Option-d/If "`Exists "\1"`" != ""/ +/if /s/if \[ *-f \([^ ]*\) ] *; *then *\\Option-d/If "`Exists "\1"`" != ""/ +/if /s/if \[ ! *-f \([^ ]*\) ] *; *\\Option-d/If "`Exists "\1"`" == ""/ +/if /s/if \[ ! *-f \([^ ]*\) ] *; *then \\Option-d/If "`Exists "\1"`" == ""/ + +/if /s/if \[ *-d \([^ ]*\) ] *; *\\Option-d/If "`Exists "\1"`" != ""/ +/if /s/if \[ *-d \([^ ]*\) ] *; *then *\\Option-d/If "`Exists "\1"`" != ""/ +/if /s/if \[ ! *-d \([^ ]*\) ] *; *\\Option-d/If "`Exists "\1"`" == ""/ +/if /s/if \[ ! *-d \([^ ]*\) ] *; *then *\\Option-d/If "`Exists "\1"`" == ""/ + +/if /s/if \[ -d \([^ ]*\) ] *; then true *; else mkdir \([^ ;]*\) *; fi/If "`Exists "\1"`" != "" NewFolder \2 End If/ + +/if /s/if \[ \([^ ]*\) = \([^ ]*\) ] *; *\\Option-d/If "\1" == "\2"/ +/if /s/if \[ \([^ ]*\) = \([^ ]*\) ] *; *then *\\Option-d/If "\1" == "\2"/ + +/if /s/if \[ \([^ ]*\) != \([^ ]*\) ] *; *\\Option-d/If "\1" != "\2"/ +/if /s/if \[ \([^ ]*\) != \([^ ]*\) ] *; *then *\\Option-d/If "\1" != "\2"/ + +/if /s/if \[ \([^ ]*\) -eq \([^ ]*\) ] *; *\\Option-d/If "\1" != "\2"/ +/if /s/if \[ \([^ ]*\) -eq \([^ ]*\) ] *; *then *\\Option-d/If "\1" != "\2"/ + +/^[ ]*else true$/c\ + Else\ + mpw-true\ + + +/else/s/^\([ ]*\)else[ ]*$/\1Else/ +/else/s/^\([ ]*\)else[; ]*\\Option-d$/\1Else/ + +/^[ ]*else[ ]*true[ ]*$/c\ + Else\ + mpw-true + +/^[ ]*else[ ]*true[; ]*fi$/c\ + Else\ + mpw-true\ + End If + +/fi/s/^\([ ]*\)fi *$/\1End/ +/fi/s/^\([ ]*\)fi *; *\\Option-d/\1End/ + +# Change looping. +/for/s/^\([ ]*\)for \([-a-zA-Z0-9_]*\) in \([^;]*\); *do *\\Option-d/\1For \2 In \3/ +/^\([ ]*\)do *\\Option-d/d +/done/s/^\([ ]*\)done *; *\\Option-d/\1End/ +/done/s/^\([ ]*\)done$/\1End/ + +# Trailing semicolons and continued lines are unneeded sh syntax. +/; \\Option-d/s/; \\Option-d// + +# Change move-if-change to MoveIfChange. +/move-if-change/s/\([^ ]*\)move-if-change/MoveIfChange/g + +# Change $(SHELL) to the script name by itself. +/SHELL/s/^\([ ]*\){SHELL} /\1/ + +# Change syntax of default rule dependency. +/^\.c\.o/s/^\.c\.o \\Option-f$/.c.o \\Option-f .c/ + +# Change default rule's action. +/{CC} -c/s/{CC} -c \(.*\) \$<$/{CC} @DASH_C_FLAG@ {DepDir}{Default}.c \1 @SEGMENT_FLAG({Default})@ -o {TargDir}{Default}.c.o/ + +# This is pretty disgusting, but I can't seem to detect empty rules. +/Option-f$/s/Option-f$/Option-f _oldest/g + +# Remove -c from explicit compiler calls. (but should not if GCC) +# Handle the case of a source file that is "{xxx}"file.c. +/ -c /s/{\([A-Z_]*\)CC}\(.*\) -c \(.*\)"\([^"]*\)"\([-a-z_]*\)\.c/{\1CC}\2 @DASH_C_FLAG@ \3"\4"\5.c -o "{o}"\5.c.o/ +# Handle the case of a source file that is "{xxx}"dir:file.c. +/ -c /s/{\([A-Z_]*\)CC}\(.*\) -c \(.*\)"\([^"]*\)"\([-a-z_]*\):\([-a-z_]*\)\.c/{\1CC}\2 @DASH_C_FLAG@ \3"\4"\5:\6.c -o "{o}"\6.c.o/ + +# Change linking cc to linking sequence. +/-o/s/^\([ ]*\){CC} \(.*\){\([A-Z_]*\)CFLAGS} \(.*\){LDFLAGS} \(.*\)-o \([^ ]*\) \(.*\)$/\1{CC_LD} \2 {\3CFLAGS} \4 {LDFLAGS} \5 -o \6{PROG_EXT} \7\ +\1{MAKEPEF} \6{PROG_EXT} -o \6 {MAKEPEF_TOOL_FLAGS} {MAKEPEF_FLAGS}\ +\1{REZ} "{s}"\6.r -o \6 -append -d PROG_NAME='"'\6'"' -d VERSION_STRING='"'{version}'"'/ +/-o/s/^\([ ]*\){CC} \(.*\){\([A-Z_]*\)CFLAGS} \(.*\)-o \([^ ]*\) \(.*\){LDFLAGS} \(.*\)$/\1{CC_LD} \2 {\3CFLAGS} \4 {LDFLAGS} \6 -o \5{PROG_EXT} \7\ +\1{MAKEPEF} \5{PROG_EXT} -o \5 {MAKEPEF_TOOL_FLAGS} {MAKEPEF_FLAGS}\ +\1{REZ} "{s}"\5.r -o \5 -append -d PROG_NAME='"'\5'"' -d VERSION_STRING='"'{version}'"'/ +/-o/s/^\([ ]*\){HOST_CC} \(.*\)-o \([^ ]*\) \(.*\)$/\1{HOST_CC_LD} \2 -o \3{PROG_EXT} \4\ +\1{MAKEPEF} \3{PROG_EXT} -o \3 {MAKEPEF_TOOL_FLAGS} {MAKEPEF_FLAGS}\ +\1{REZ} "{s}"\3.r -o \3 -append -d PROG_NAME='"'\3'"' -d VERSION_STRING='"'{version}'"'/ + +# Comment out .NOEXPORT rules. +/\.NOEXPORT/s/^\.NOEXPORT/#\.NOEXPORT/ +# Comment out .PHONY rules. +/\.PHONY/s/^\.PHONY/#\.PHONY/ +# Comment out .PRECIOUS rules. +/\.PRECIOUS/s/^\.PRECIOUS/#\.PRECIOUS/ +# Comment out .SUFFIXES rules. +/\.SUFFIXES/s/^\.SUFFIXES/#\.SUFFIXES/ + +# Set the install program appropriately. +/INSTALL/s/^INSTALL *= *`.*`:install.sh -c/INSTALL = Duplicate -y/ + +# Don't try to decide whether to use the tree's own tools. +/bison/s/`.*bison:bison.*`/bison -y/ +/byacc/s/`.*byacc:byacc.*`/byacc/ +/flex/s/`.*flex:flex.*`/flex/ + +# Turn transformed C comments in echo commands back into comments. +/echo/s,echo '\(.*\):\\Option-x\(.*\)\\Option-x:\(.*\)',echo '\1/*\2*/\3', + +# Whack out various clever expressions that search for tools, since +# the clever code is too /bin/sh specific. + +/^AR_FOR_TARGET = `/,/`$/c\ +AR_FOR_TARGET = ::binutils:ar\ + + +/^RANLIB_FOR_TARGET = `/,/`$/c\ +RANLIB_FOR_TARGET = ::binutils:ranlib\ + + +/^RANLIB_TEST_FOR_TARGET = /,/ranlib ] )$/c\ +RANLIB_TEST_FOR_TARGET = \ + + +/^EXPECT = `/,/`$/c\ +EXPECT = \ + + +/^RUNTEST = `/,/`$/c\ +RUNTEST = \ + + +/^CC_FOR_TARGET = `/,/`$/c\ +CC_FOR_TARGET = \ + + +/^CXX_FOR_TARGET = `/,/`$/c\ +CXX_FOR_TARGET = \ + + +/^CHILL_FOR_TARGET = `/,/`$/c\ +CHILL_FOR_TARGET = \ + + +/^CHILL_LIB = `/,/`$/c\ +CHILL_LIB = \ + +/sanit/s/{start-sanit...-[a-z0-9]*}// +/sanit/s/{end-sanit...-[a-z0-9]*}// + +# Add standard defines and default rules. +/^# srcdir/a\ +\ +s = "{srcdir}"\ +\ +o = :\ +\ +"{o}" \\Option-f : "{s}" + diff --git a/config/mpw/mpw-touch b/config/mpw/mpw-touch new file mode 100644 index 00000000000..c743a5122b5 --- /dev/null +++ b/config/mpw/mpw-touch @@ -0,0 +1,7 @@ +# "Touch" command. + +If "`Exists "{1}"`" != "" + SetFile -m . "{1}" +Else + Echo ' ' > "{1}" +End If diff --git a/config/mpw/mpw-true b/config/mpw/mpw-true new file mode 100644 index 00000000000..0506530d3c6 --- /dev/null +++ b/config/mpw/mpw-true @@ -0,0 +1 @@ +Exit 0 diff --git a/config/mpw/null-command b/config/mpw/null-command new file mode 100644 index 00000000000..4844c8ec553 --- /dev/null +++ b/config/mpw/null-command @@ -0,0 +1 @@ +# This command does nothing. diff --git a/config/mpw/open-brace b/config/mpw/open-brace new file mode 100644 index 00000000000..58465dcc18c --- /dev/null +++ b/config/mpw/open-brace @@ -0,0 +1,4 @@ +# MPW makefiles seem not to have any way to get a literal open +# brace into a rule anywhere, so this does the job. + +Echo '{' diff --git a/config/mpw/tr-7to8-src b/config/mpw/tr-7to8-src new file mode 100644 index 00000000000..b20b649c895 --- /dev/null +++ b/config/mpw/tr-7to8-src @@ -0,0 +1,9 @@ +StreamEdit -e \Option-d + '/\Option-x/ \Option-d + Replace /\Option-d\SrcOption-d/ "\Option-d\Option-d" -c \Option-5 ; \Option-d + Replace /\Option-d\SrcOption-f/ "\Option-d\Option-f" -c \Option-5 ; \Option-d + Replace /\Option-d\SrcOption-8/ "\Option-d\Option-8" -c \Option-5 ; \Option-d + Replace /\Option-d\SrcOption-5/ "\Option-d\Option-5" -c \Option-5 ; \Option-d + Replace /\Option-d\SrcOption-x/ "\Option-d\Option-x" -c \Option-5 ; \Option-d + Replace /\Option-d\SrcOption-r/ "\Option-d\Option-r" -c \Option-5' \Option-d + "{1}" diff --git a/config/mpw/true b/config/mpw/true new file mode 100644 index 00000000000..0506530d3c6 --- /dev/null +++ b/config/mpw/true @@ -0,0 +1 @@ +Exit 0 diff --git a/gcc/ChangeLog.hammer b/gcc/ChangeLog.hammer new file mode 100644 index 00000000000..49688b838ab --- /dev/null +++ b/gcc/ChangeLog.hammer @@ -0,0 +1,109 @@ +Wed Nov 13 12:08:08 CET 2002 Jan Hubicka + Merge i386 specific optimizations for 3.4-BIB branch. + + Sat Nov 9 00:10:54 CET 2002 Jan Hubicka + + * i386.c (x86_machine_dependent_reorg): Fix even more side cases. + + Fri Nov 8 13:33:58 CET 2002 Jan Hubicka + + * i386.md (sse_loadss, sse2_loadsd): Fix expander. + + Fri Nov 8 13:25:41 CET 2002 Jan Hubicka + + * i386.c (x86_machine_dependent_reorg): Fix handling of empty functions. + + Fri Nov 8 11:36:11 CET 2002 Jan Hubicka + + * i386.md (sse_movdfcc, sse_movsfcc): Fix typo in previous patch. + + Thu Nov 7 21:54:22 CET 2002 Jan Hubicka + + * i386.md (sse_movdfcc, sse_movsfcc): Avoid overactive matching. + * i386.c (ix86_expand_fp_movcc): Match the reversed cases. + + Tue Nov 5 14:34:36 CET 2002 Jan Hubicka + + * i386.md (float_truncate SSE splitter): Ensure that operand is not + stack register. + (float SSE splitters): Reorder conditional. + + Thu Oct 31 18:20:50 CET 2002 Jan Hubicka + + * i386.md (sse_loadss, sse_loadsd): Canonicalize; add expander + (movps, movpd splitters): Use canonical form. + (movv2di): Fix merge problem. + + Wed Nov 6 17:16:48 CET 2002 Jan Hubicka + + * i386.md (negsf splitter): Accept memory operand in second register. + (abssf/absdf splitters): Simplify + (sse_loadss, sse_loadsd): Turn into expander. + + Thu Oct 31 16:09:44 CET 2002 Jan Hubicka + + * i386.md (negdf2_ifs_rex64): Don't allow GPR operand. + + Tue Oct 29 23:28:10 CET 2002 Jan Hubicka + + * i386.md (negdf splitter): Fix construction of the constant. + + Tue Oct 29 20:47:06 CET 2002 Jan Hubicka + + * i386.md (negsf, negdf): Reorganize to use vector modes + for SSE variants. + (abssf, absdf): Use force_reg. + (movv4sf, movv2df): New splitters. + * i386.h (PREDICATE_CODES): add zero_extended_scalar_load_operand + * i386.c (zero_extended_scalar_load_operand + + Wed Oct 23 22:48:44 CEST 2002 Jan Hubicka + + * i386.md (abs splitters): Do not produce nested subregs. + + Wed Oct 23 12:42:32 CEST 2002 Jan Hubicka + + * i386.md (movti_rex64): Fix constraints. + + Wed Oct 23 12:01:21 CEST 2002 Jan Hubicka + + * i386.md (abssf,absdf): Use vector operands for SSE + (abssf2_ifs, absdf2_ifs, absdf2_ifs_rex64 and splitters): Update for + vector operand. + + Wed Oct 9 21:18:43 CEST 2002 Jan Hubicka + + * i386.c (*_cost): Add branch costs. + (override_options): set ix86_branch_cost. + (ix86_expand_int_movcc): Use BRANCH_COST. + * i386.h (costs): Add branch_cost. + + Tue Oct 8 01:24:19 CEST 2002 Jan Hubicka + + * i386.c (x86_sse_partial_reg_dependency, x86_sse_partial_regs, + x86_sse_typeless_stores, x86_sse_load0_by_pxor): New global + variables. + (safe_vector_operand): Update sse_clrv4sf call. + (ix86_expand_buildin): Likewise + * i386.h (x86_sse_partial_reg_dependency, x86_sse_partial_regs, + x86_sse_typeless_stores, x86_sse_load0_by_pxor): Declare. + (TARGET_SSE_PARTIAL_REG_DEPENDENCY, TARGET_SSE_PARTIAL_REGS, + TARGET_SSE_TYPELESS_STORES, TARGET_SSE_TYPELESS_LOAD0): New + macros. + * i386.md (movsf*, movdf*, movti, movv4sf, movv2df, movv16qi, movv8hi, + movv4si): Obey the new flags. + (floatsi2sf, floatdi2sf, truncatedf2sf): Emit extra load of 0 to avoid + reformating penalty. + (anddf, cmov patterns): Avoid reformating by first converting. + (sse_cvtsd2ss): Fix predicate. + (sse2_clrti): Fix mode, + (sse_clrv4sf): Avoid unspec. + + Sat Oct 5 22:48:06 CEST 2002 Jan Hubicka + + * athlon.md: rewrite to DFA. + * i386 (ix86_adjust_cost): Drop memory latency code. + (ia32_use_dfa_pipeline_interface): Return true for Athlon. +Thu Nov 7 11:18:01 CET 2002 Jan Hubicka + + * reg-stack.c (compensate_edge): Fix sanity check. diff --git a/gcc/config/i386/athlon.md b/gcc/config/i386/athlon.md index d6a52f2cbdd..7113cb88345 100644 --- a/gcc/config/i386/athlon.md +++ b/gcc/config/i386/athlon.md @@ -1,34 +1,5 @@ ;; AMD Athlon Scheduling -;; Copyright (C) 2002 Free Software Foundation, Inc. ;; -;; This file is part of GNU CC. -;; -;; GNU CC is free software; you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) -;; any later version. -;; -;; GNU CC is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. -;; -;; You should have received a copy of the GNU General Public License -;; along with GNU CC; see the file COPYING. If not, write to -;; the Free Software Foundation, 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. */ -(define_attr "athlon_decode" "direct,vector" - (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov") - (const_string "vector") - (and (eq_attr "type" "push") - (match_operand 1 "memory_operand" "")) - (const_string "vector") - (and (eq_attr "type" "fmov") - (and (eq_attr "memory" "load,store") - (eq_attr "mode" "XF"))) - (const_string "vector")] - (const_string "direct"))) - ;; The Athlon does contain three pipelined FP units, three integer units and ;; three address generation units. ;; @@ -46,161 +17,419 @@ ;; The load/store queue unit is not attached to the schedulers but ;; communicates with all the execution units separately instead. -(define_function_unit "athlon_vectordec" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_decode" "vector")) - 1 1) - -(define_function_unit "athlon_directdec" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_decode" "direct")) - 1 1) - -(define_function_unit "athlon_vectordec" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_decode" "direct")) - 1 1 [(eq_attr "athlon_decode" "vector")]) - -(define_function_unit "athlon_ieu" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,rotate,ibr,call,callv,icmov,cld,pop,setcc,push,pop")) - 1 1) - -(define_function_unit "athlon_ieu" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "str")) - 15 15) - -(define_function_unit "athlon_ieu" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "imul")) - 5 0) - -(define_function_unit "athlon_ieu" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "idiv")) - 42 0) - -(define_function_unit "athlon_muldiv" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "imul")) - 5 0) - -(define_function_unit "athlon_muldiv" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "idiv")) - 42 42) - -(define_attr "athlon_fpunits" "none,store,mul,add,muladd,any" - (cond [(eq_attr "type" "fop,fcmp,fistp") - (const_string "add") - (eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov") - (const_string "mul") - (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both")) - (const_string "store") - (and (eq_attr "type" "fmov") (eq_attr "memory" "load")) - (const_string "any") +(define_attr "athlon_decode" "direct,vector" + (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld") + (const_string "vector") + (and (eq_attr "type" "push") + (match_operand 1 "memory_operand" "")) + (const_string "vector") (and (eq_attr "type" "fmov") - (ior (match_operand:SI 1 "register_operand" "") - (match_operand 1 "immediate_operand" ""))) - (const_string "store") - (eq_attr "type" "fmov") - (const_string "muladd")] - (const_string "none"))) - -;; We use latencies 1 for definitions. This is OK to model colisions -;; in execution units. The real latencies are modeled in the "fp" pipeline. - -;; fsin, fcos: 96-192 -;; fsincos: 107-211 -;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode. -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fpspc")) - 100 1) - -;; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode. -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fdiv")) - 24 1) - -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fop,fmul,fistp")) - 4 1) - -;; XFmode loads are slow. -;; XFmode store is slow too (8 cycles), but we don't need to model it, because -;; there are no dependent instructions. + (and (eq_attr "memory" "load,store") + (eq_attr "mode" "XF"))) + (const_string "vector")] + (const_string "direct"))) -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (and (eq_attr "type" "fmov") - (and (eq_attr "memory" "load") - (eq_attr "mode" "XF")))) - 10 1) +;; +;; decode0 decode1 decode2 +;; \ | / +;; instruction control unit (72 entry scheduler) +;; | | +;; integer scheduler (18) stack map +;; / | | | | \ stack rename +;; ieu0 agu0 ieu1 agu1 ieu2 agu2 scheduler +;; | agu0 | agu1 agu2 register file +;; | \ | | / | | | +;; \ /\ | / fadd fmul fstore +;; \ / \ | / fadd fmul fstore +;; imul load/store (2x) fadd fmul fstore -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fmov,fsgn")) - 2 1) +(define_automaton "athlon,athlon_mult,athlon_fp") +(define_cpu_unit "athlon-decode0" "athlon") +(define_cpu_unit "athlon-decode1" "athlon") +(define_cpu_unit "athlon-decode2" "athlon") +(define_reservation "athlon-vector" "(athlon-decode0 + athlon-decode1 + + athlon-decode2)") +(define_reservation "athlon-direct" "(athlon-decode0 | athlon-decode1 + | athlon-decode2)") -;; fcmp and ftst instructions -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (and (eq_attr "type" "fcmp") - (eq_attr "athlon_decode" "direct"))) - 3 1) +;; Agu and ieu unit results in extremly large automatons and +;; in our approximation they are hardly filled in. Only ieu +;; unit can, as issue rate is 3 and agu unit is always used +;; first in the insn reservations. Skip the models. -;; fcmpi instructions. -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (and (eq_attr "type" "fcmp") - (eq_attr "athlon_decode" "vector"))) - 3 1) +;(define_cpu_unit "athlon-ieu0" "athlon_ieu") +;(define_cpu_unit "athlon-ieu1" "athlon_ieu") +;(define_cpu_unit "athlon-ieu2" "athlon_ieu") +;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)") +(define_reservation "athlon-ieu" "nothing") +;(define_cpu_unit "athlon-agu0" "athlon_agu") +;(define_cpu_unit "athlon-agu1" "athlon_agu") +;(define_cpu_unit "athlon-agu2" "athlon_agu") +;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)") +(define_reservation "athlon-agu" "nothing,nothing") -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fcmov")) - 7 1) +(define_cpu_unit "athlon-mult" "athlon_mult") -(define_function_unit "athlon_fp_mul" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_fpunits" "mul")) - 1 1) +(define_cpu_unit "athlon-load0" "athlon") +(define_cpu_unit "athlon-load1" "athlon") +(define_reservation "athlon-load" "athlon-agu, + (athlon-load0 | athlon-load1)") +(define_reservation "athlon-store" "nothing") -(define_function_unit "athlon_fp_add" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_fpunits" "add")) - 1 1) +;; The three fp units are fully pipelined with latency of 3 +(define_cpu_unit "athlon-fadd" "athlon_fp") +(define_cpu_unit "athlon-fmul" "athlon_fp") +(define_cpu_unit "athlon-fstore" "athlon_fp") +(define_reservation "athlon-fany" "(athlon-fadd | athlon-fmul | athlon-fstore)") +(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)") -(define_function_unit "athlon_fp_muladd" 2 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_fpunits" "muladd,mul,add")) - 1 1) -(define_function_unit "athlon_fp_store" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_fpunits" "store")) - 1 1) +;; Jump instructions are executed in the branch unit compltetely transparent to us +(define_insn_reservation "athlon_branch" 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "ibr")) + "athlon-direct") +(define_insn_reservation "athlon_call" 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "call,callv")) + "athlon-vector") -;; We don't need to model the Address Generation Unit, since we don't model -;; the re-order buffer yet and thus we never schedule more than three operations -;; at time. Later we may want to experiment with MD_SCHED macros modeling the -;; decoders independently on the functional units. +;; Latency of push operation is 3 cycles, but ESP value is available +;; earlier +(define_insn_reservation "athlon_push" 2 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "push")) + "athlon-direct,nothing,athlon-store") +(define_insn_reservation "athlon_pop" 4 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "push")) + "athlon-vector,athlon-ieu,athlon-load") +(define_insn_reservation "athlon_leave" 3 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "push")) + "athlon-vector,athlon-load") -;(define_function_unit "athlon_agu" 3 0 -; (and (eq_attr "cpu" "athlon") -; (and (eq_attr "memory" "!none") -; (eq_attr "athlon_fpunits" "none"))) -; 1 1) +;; Lea executes in AGU unit with 2 cycles latency. +(define_insn_reservation "athlon_lea" 2 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "lea")) + "athlon-direct,athlon-agu") -;; Model load unit to avoid too long sequences of loads. We don't need to -;; model store queue, since it is hardly going to be bottleneck. +;; Mul executes in special multiplier unit attached to IEU0 +(define_insn_reservation "athlon_imul" 5 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "imul") + (eq_attr "memory" "none,unknown"))) + "athlon-vector,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu") +(define_insn_reservation "athlon_imul_mem" 8 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "imul") + (eq_attr "memory" "load,both"))) + "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu") +(define_insn_reservation "athlon_idiv" 42 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "idiv") + (eq_attr "memory" "none,unknown"))) + "athlon-vector,athlon-ieu*42") +(define_insn_reservation "athlon_idiv_mem" 45 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "idiv") + (eq_attr "memory" "load,both"))) + "athlon-vector,athlon-load,athlon-ieu*42") +(define_insn_reservation "athlon_str" 15 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "str") + (eq_attr "memory" "load,both,store"))) + "athlon-vector,athlon-load,athlon-ieu*10") -(define_function_unit "athlon_load" 2 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "memory" "load,both")) - 1 1) +(define_insn_reservation "athlon_idirect" 1 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "athlon_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "none,unknown")))) + "athlon-direct,athlon-ieu") +(define_insn_reservation "athlon_ivector" 2 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "none,unknown")))) + "athlon-vector,athlon-ieu,athlon-ieu") +(define_insn_reservation "athlon_idirect_loadmov" 3 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "imov") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-load") +(define_insn_reservation "athlon_idirect_load" 4 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "athlon_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-load,athlon-ieu") +(define_insn_reservation "athlon_ivector_load" 6 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "load")))) + "athlon-vector,athlon-load,athlon-ieu,athlon-ieu") +(define_insn_reservation "athlon_idirect_movstore" 1 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "imov") + (eq_attr "memory" "store"))) + "athlon-direct,athlon-agu,athlon-store") +(define_insn_reservation "athlon_idirect_both" 4 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "athlon_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "both")))) + "athlon-direct,athlon-load,athlon-ieu, + athlon-store") +(define_insn_reservation "athlon_ivector_both" 6 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "both")))) + "athlon-vector,athlon-load,athlon-ieu,athlon-ieu, + athlon-store") +(define_insn_reservation "athlon_idirect_store" 1 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "athlon_decode" "direct") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "store")))) + "athlon-direct,athlon-ieu, + athlon-store") +(define_insn_reservation "athlon_ivector_store" 2 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "athlon_decode" "vector") + (and (eq_attr "unit" "integer,unknown") + (eq_attr "memory" "store")))) + "athlon-vector,athlon-ieu,athlon-ieu, + athlon-store") +;; Athlon floatin point unit +(define_insn_reservation "athlon_fldxf" 12 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "load") + (eq_attr "mode" "XF")))) + "athlon-vector,athlon-fany") +(define_insn_reservation "athlon_fld" 6 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fany,nothing,athlon-load") +(define_insn_reservation "athlon_fstxf" 10 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "store,both") + (eq_attr "mode" "XF")))) + "athlon-vector,athlon-fstore") +(define_insn_reservation "athlon_fst" 4 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "store,both"))) + "athlon-direct,athlon-fstore,nothing,athlon-store") +(define_insn_reservation "athlon_fist" 4 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fistp")) + "athlon-direct,athlon-fstore,nothing") +(define_insn_reservation "athlon_fmov" 2 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fmov")) + "athlon-direct,athlon-faddmul") +(define_insn_reservation "athlon_fadd_load" 7 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fop") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-load,athlon-fadd") +(define_insn_reservation "athlon_fadd" 4 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fop")) + "athlon-direct,athlon-fadd") +(define_insn_reservation "athlon_fmul_load" 7 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmul") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-load,athlon-fmul") +(define_insn_reservation "athlon_fmul" 4 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fmul")) + "athlon-direct,athlon-fmul") +(define_insn_reservation "athlon_fsgn" 2 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fsgn")) + "athlon-direct,athlon-fmul") +(define_insn_reservation "athlon_fdiv_load" 24 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fdiv") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-load,athlon-fmul") +(define_insn_reservation "athlon_fdiv" 24 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fdiv")) + "athlon-direct,athlon-fmul") +(define_insn_reservation "athlon_fpspc_load" 103 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fpspc") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-load,athlon-fmul") +(define_insn_reservation "athlon_fpspc" 100 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fpspc")) + "athlon-vector,athlon-fmul") +(define_insn_reservation "athlon_fcmov_load" 10 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fcmov") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-load,athlon-fmul") +(define_insn_reservation "athlon_fcmov" 7 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fcmov")) + "athlon-vector,athlon-fmul") +(define_insn_reservation "athlon_fcomi_load" 6 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fcmp") + (and (eq_attr "athlon_decode" "vector") + (eq_attr "memory" "load")))) + "athlon-vector,athlon-load,athlon-fadd") +(define_insn_reservation "athlon_fcomi" 3 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "athlon_decode" "vector") + (eq_attr "type" "fcmp"))) + "athlon-vector,athlon-fadd") +(define_insn_reservation "athlon_fcom_load" 5 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fcmp") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-load,athlon-fadd") +(define_insn_reservation "athlon_fcom" 2 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fcmp")) + "athlon-direct,athlon-fadd") +(define_insn_reservation "athlon_fxch" 2 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fxch")) + "athlon-direct,athlon-fany") +;; Athlon handle MMX operations in the FPU unit with shorter latencies +(define_insn_reservation "athlon_mmxsseld" 4 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "mmxmov,ssemov") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-fany,athlon-load") +(define_insn_reservation "athlon_mmxssest" 2 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "mmxmov,ssemov") + (eq_attr "memory" "store,both"))) + "athlon-direct,athlon-store") +(define_insn_reservation "athlon_mmxssemov" 2 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "mmxmov,ssemov")) + "athlon-direct,athlon-faddmul") +(define_insn_reservation "athlon_mmxmul_load" 6 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "mmxmul") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-load,athlon-fmul") +(define_insn_reservation "athlon_mmxmul" 3 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "mmxmul")) + "athlon-direct,athlon-fmul") +(define_insn_reservation "athlon_mmx_load" 5 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "unit" "mmx") + (eq_attr "memory" "load"))) + "athlon-direct,athlon-load,athlon-faddmul") +(define_insn_reservation "athlon_mmx" 2 + (and (eq_attr "cpu" "athlon") + (eq_attr "unit" "mmx")) + "athlon-direct,athlon-faddmul") +;; SSE operations are handled by the i387 unit as well. The latnecy +;; is same as for i387 operations for scalar operations +(define_insn_reservation "athlon_sselog_load" 6 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "sselog") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-load,athlon-fmul") +(define_insn_reservation "athlon_sselog" 3 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "sselog")) + "athlon-vector,athlon-fmul") +(define_insn_reservation "athlon_ssecmp_load" 5 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssecmp") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load")))) + "athlon-vector,athlon-load,athlon-fadd") +(define_insn_reservation "athlon_ssecmp" 2 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssecmp") + (eq_attr "mode" "SF,DF"))) + "athlon-direct,athlon-fadd") +(define_insn_reservation "athlon_ssecmpvector_load" 6 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssecmp") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-fadd") +(define_insn_reservation "athlon_ssecmpvector" 3 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "ssecmp")) + "athlon-vector,athlon-fadd") +(define_insn_reservation "athlon_sseadd_load" 7 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "sseadd") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-load,athlon-fadd") +(define_insn_reservation "athlon_sseadd" 4 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "sseadd") + (eq_attr "mode" "SF,DF"))) + "athlon-direct,athlon-fadd") +(define_insn_reservation "athlon_sseaddvector_load" 8 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "sseadd") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-load,athlon-fadd") +(define_insn_reservation "athlon_sseaddvector" 5 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "sseadd")) + "athlon-vector,athlon-fadd") +(define_insn_reservation "athlon_ssemul_load" 7 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssemul") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-load,athlon-fmul") +(define_insn_reservation "athlon_ssemul" 4 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssemul") + (eq_attr "mode" "SF,DF"))) + "athlon-direct,athlon-fmul") +(define_insn_reservation "athlon_ssemulvector_load" 8 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssemul") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-load,athlon-fmul") +(define_insn_reservation "athlon_ssemulvector" 5 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "ssemul")) + "athlon-vector,athlon-fmul") +(define_insn_reservation "athlon_ssediv_load" 19 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssediv") + (and (eq_attr "mode" "SF,DF") + (eq_attr "memory" "load")))) + "athlon-direct,athlon-load,athlon-fmul") +(define_insn_reservation "athlon_ssediv" 16 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssediv") + (eq_attr "mode" "SF,DF"))) + "athlon-direct,athlon-fmul") +(define_insn_reservation "athlon_ssedivvector_load" 32 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "ssediv") + (eq_attr "memory" "load"))) + "athlon-vector,athlon-load,athlon-fmul") +(define_insn_reservation "athlon_ssedivvector" 29 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "ssediv")) + "athlon-vector,athlon-fmul") diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index b02fc4a2092..9e400cdfcd5 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -84,6 +84,7 @@ struct processor_costs size_cost = { /* costs for tunning for size */ 3, /* MMX or SSE register to integer */ 0, /* size of prefetch block */ 0, /* number of parallel prefetches */ + 1, /* Branch cost */ 2, /* cost of FADD and FSUB insns. */ 2, /* cost of FMUL instruction. */ 2, /* cost of FDIV instruction. */ @@ -128,6 +129,7 @@ struct processor_costs i386_cost = { /* 386 specific costs */ 3, /* MMX or SSE register to integer */ 0, /* size of prefetch block */ 0, /* number of parallel prefetches */ + 1, /* Branch cost */ 23, /* cost of FADD and FSUB insns. */ 27, /* cost of FMUL instruction. */ 88, /* cost of FDIV instruction. */ @@ -171,6 +173,7 @@ struct processor_costs i486_cost = { /* 486 specific costs */ 3, /* MMX or SSE register to integer */ 0, /* size of prefetch block */ 0, /* number of parallel prefetches */ + 1, /* Branch cost */ 8, /* cost of FADD and FSUB insns. */ 16, /* cost of FMUL instruction. */ 73, /* cost of FDIV instruction. */ @@ -214,6 +217,7 @@ struct processor_costs pentium_cost = { 3, /* MMX or SSE register to integer */ 0, /* size of prefetch block */ 0, /* number of parallel prefetches */ + 2, /* Branch cost */ 3, /* cost of FADD and FSUB insns. */ 3, /* cost of FMUL instruction. */ 39, /* cost of FDIV instruction. */ @@ -257,6 +261,7 @@ struct processor_costs pentiumpro_cost = { 3, /* MMX or SSE register to integer */ 32, /* size of prefetch block */ 6, /* number of parallel prefetches */ + 2, /* Branch cost */ 3, /* cost of FADD and FSUB insns. */ 5, /* cost of FMUL instruction. */ 56, /* cost of FDIV instruction. */ @@ -300,6 +305,7 @@ struct processor_costs k6_cost = { 6, /* MMX or SSE register to integer */ 32, /* size of prefetch block */ 1, /* number of parallel prefetches */ + 1, /* Branch cost */ 2, /* cost of FADD and FSUB insns. */ 2, /* cost of FMUL instruction. */ 56, /* cost of FDIV instruction. */ @@ -343,6 +349,7 @@ struct processor_costs athlon_cost = { 5, /* MMX or SSE register to integer */ 64, /* size of prefetch block */ 6, /* number of parallel prefetches */ + 2, /* Branch cost */ 4, /* cost of FADD and FSUB insns. */ 4, /* cost of FMUL instruction. */ 24, /* cost of FDIV instruction. */ @@ -355,11 +362,11 @@ static const struct processor_costs pentium4_cost = { 1, /* cost of an add instruction */ 1, /* cost of a lea instruction */ - 8, /* variable shift costs */ - 8, /* constant shift costs */ - 30, /* cost of starting a multiply */ + 4, /* variable shift costs */ + 4, /* constant shift costs */ + 15, /* cost of starting a multiply */ 0, /* cost of multiply per each bit set */ - 112, /* cost of a divide/mod */ + 56, /* cost of a divide/mod */ 1, /* cost of movsx */ 1, /* cost of movzx */ 16, /* "large" insn */ @@ -386,6 +393,7 @@ struct processor_costs pentium4_cost = { 10, /* MMX or SSE register to integer */ 64, /* size of prefetch block */ 6, /* number of parallel prefetches */ + 2, /* Branch cost */ 5, /* cost of FADD and FSUB insns. */ 7, /* cost of FMUL instruction. */ 43, /* cost of FDIV instruction. */ @@ -445,6 +453,13 @@ const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO; const int x86_decompose_lea = m_PENT4; const int x86_shift1 = ~m_486; const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4; +const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO; +/* Set for machines where the type and dependencies are resolved on SSE register + parts insetad of whole registers, so we may maintain just lower part of + scalar values in proper format leaving the upper part undefined. */ +const int x86_sse_partial_regs = m_ATHLON; +const int x86_sse_typeless_stores = m_ATHLON; +const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4; /* In case the avreage insn count for single function invocation is lower than this constant, emit fast (but longer) prologue and @@ -934,17 +949,16 @@ override_options () const int align_jump; const int align_jump_max_skip; const int align_func; - const int branch_cost; } const processor_target_table[PROCESSOR_max] = { - {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1}, - {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1}, - {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1}, - {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1}, - {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1}, - {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1}, - {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1} + {&i386_cost, 0, 0, 4, 3, 4, 3, 4}, + {&i486_cost, 0, 0, 16, 15, 16, 15, 16}, + {&pentium_cost, 0, 0, 16, 7, 16, 7, 16}, + {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16}, + {&k6_cost, 0, 0, 32, 7, 32, 7, 32}, + {&athlon_cost, 0, 0, 16, 7, 64, 7, 16}, + {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0} }; static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES; @@ -1212,7 +1226,7 @@ override_options () } /* Validate -mbranch-cost= value, or provide default. */ - ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost; + ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost; if (ix86_branch_cost_string) { i = atoi (ix86_branch_cost_string); @@ -3401,6 +3415,34 @@ non_q_regs_operand (op, mode) return NON_QI_REG_P (op); } +/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS + insns. */ +int +zero_extended_scalar_load_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + unsigned n_elts; + if (GET_CODE (op) != MEM) + return 0; + op = maybe_get_pool_constant (op); + if (!op) + return 0; + if (GET_CODE (op) != CONST_VECTOR) + return 0; + n_elts = + (GET_MODE_SIZE (GET_MODE (op)) / + GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op)))); + for (n_elts--; n_elts > 0; n_elts--) + { + rtx elt = CONST_VECTOR_ELT (op, n_elts); + if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op)))) + return 0; + } + return 1; +} + + /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS insns. */ int @@ -9296,12 +9338,9 @@ ix86_expand_int_movcc (operands) * This is reasonably steep, but branch mispredict costs are * high on modern cpus, so consider failing only if optimizing * for space. - * - * %%% Parameterize branch_cost on the tuning architecture, then - * use that. The 80386 couldn't care less about mispredicts. */ - if (!optimize_size && !TARGET_CMOVE) + if (!TARGET_CMOVE && BRANCH_COST >= 2) { if (cf == 0) { @@ -9379,7 +9418,7 @@ ix86_expand_int_movcc (operands) optab op; rtx var, orig_out, out, tmp; - if (optimize_size) + if (BRANCH_COST >= 2) return 0; /* FAIL */ /* If one of the two operands is an interesting constant, load a @@ -9514,8 +9553,14 @@ ix86_expand_fp_movcc (operands) if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1)) { /* Check for min operation. */ - if (code == LT) + if (code == LT || code == UNLE) { + if (code == UNLE) + { + rtx tmp = op0; + op0 = op1; + op1 = tmp; + } operands[0] = force_reg (GET_MODE (operands[0]), operands[0]); if (memory_operand (op0, VOIDmode)) op0 = force_reg (GET_MODE (operands[0]), op0); @@ -9526,8 +9571,14 @@ ix86_expand_fp_movcc (operands) return 1; } /* Check for max operation. */ - if (code == GT) + if (code == GT || code == UNGE) { + if (code == UNGE) + { + rtx tmp = op0; + op0 = op1; + op1 = tmp; + } operands[0] = force_reg (GET_MODE (operands[0]), operands[0]); if (memory_operand (op0, VOIDmode)) op0 = force_reg (GET_MODE (operands[0]), op0); @@ -11305,13 +11356,6 @@ ix86_adjust_cost (insn, link, dep_insn, cost) memory = get_attr_memory (insn); dep_memory = get_attr_memory (dep_insn); - if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH) - { - if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV) - cost += 2; - else - cost += 3; - } /* Show ability of reorder buffer to hide latency of load by executing in parallel with previous instruction in case previous instruction is not needed to compute the address. */ @@ -11585,7 +11629,7 @@ ix86_variable_issue (dump, sched_verbose, insn, can_issue_more) static int ia32_use_dfa_pipeline_interface () { - if (ix86_cpu == PROCESSOR_PENTIUM) + if (ix86_cpu == PROCESSOR_PENTIUM || ix86_cpu == PROCESSOR_ATHLON) return 1; return 0; } @@ -12795,7 +12839,8 @@ safe_vector_operand (x, mode) : gen_rtx_SUBREG (DImode, x, 0))); else emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x - : gen_rtx_SUBREG (V4SFmode, x, 0))); + : gen_rtx_SUBREG (V4SFmode, x, 0), + CONST0_RTX (V4SFmode))); return x; } @@ -13465,7 +13510,7 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) case IX86_BUILTIN_SSE_ZERO: target = gen_reg_rtx (V4SFmode); - emit_insn (gen_sse_clrv4sf (target)); + emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode))); return target; case IX86_BUILTIN_MMX_ZERO: @@ -14319,21 +14364,27 @@ x86_machine_dependent_reorg (first) if (!returnjump_p (ret) || !maybe_hot_bb_p (bb)) continue; - prev = prev_nonnote_insn (ret); + for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) + if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL) + break; if (prev && GET_CODE (prev) == CODE_LABEL) { edge e; for (e = bb->pred; e; e = e->pred_next) - if (EDGE_FREQUENCY (e) && e->src->index > 0 + if (EDGE_FREQUENCY (e) && e->src->index >= 0 && !(e->flags & EDGE_FALLTHRU)) insert = 1; } if (!insert) { - prev = prev_real_insn (ret); + prev = prev_active_insn (ret); if (prev && GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev)) insert = 1; + /* Empty functions get branch misspredict even when the jump destination + is not visible to us. */ + if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) + insert = 1; } if (insert) emit_insn_before (gen_nop (), ret); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 8e331765847..3971ef5cdc4 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -75,6 +75,7 @@ struct processor_costs { const int prefetch_block; /* bytes moved to cache for prefetch. */ const int simultaneous_prefetches; /* number of parallel prefetch operations. */ + const int branch_cost; /* Default value for BRANCH_COST. */ const int fadd; /* cost of FADD and FSUB instructions. */ const int fmul; /* cost of FMUL instruction. */ const int fdiv; /* cost of FDIV instruction. */ @@ -221,6 +222,8 @@ extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall; extern const int x86_accumulate_outgoing_args, x86_prologue_using_move; extern const int x86_epilogue_using_move, x86_decompose_lea; extern const int x86_arch_always_fancy_math_387, x86_shift1; +extern const int x86_sse_partial_reg_dependency, x86_sse_partial_regs; +extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor; extern int x86_prefetch_sse; #define TARGET_USE_LEAVE (x86_use_leave & CPUMASK) @@ -257,6 +260,12 @@ extern int x86_prefetch_sse; #define TARGET_SUB_ESP_8 (x86_sub_esp_8 & CPUMASK) #define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & CPUMASK) #define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & CPUMASK) +#define TARGET_SSE_PARTIAL_REG_DEPENDENCY \ + (x86_sse_partial_reg_dependency & CPUMASK) +#define TARGET_SSE_PARTIAL_REGS (x86_sse_partial_regs & CPUMASK) +#define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & CPUMASK) +#define TARGET_SSE_TYPELESS_LOAD0 (x86_sse_typeless_load0 & CPUMASK) +#define TARGET_SSE_LOAD0_BY_PXOR (x86_sse_load0_by_pxor & CPUMASK) #define TARGET_MEMORY_MISMATCH_STALL (x86_memory_mismatch_stall & CPUMASK) #define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & CPUMASK) #define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & CPUMASK) @@ -3286,6 +3295,7 @@ do { \ {"register_and_not_any_fp_reg_operand", {REG}}, \ {"fp_register_operand", {REG}}, \ {"register_and_not_fp_reg_operand", {REG}}, \ + {"zero_extended_scalar_load_operand", {MEM}}, \ /* A list of predicates that do special things with modes, and so should not elicit warnings for VOIDmode match_operand. */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4596cee7f9e..4ee6b71d382 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2133,12 +2133,12 @@ case 4: return "mov{l}\t{%1, %0|%0, %1}"; case 5: - if (TARGET_SSE2 && !TARGET_ATHLON) + if (get_attr_mode (insn) == MODE_TI) return "pxor\t%0, %0"; else return "xorps\t%0, %0"; case 6: - if (TARGET_PARTIAL_REG_DEPENDENCY) + if (get_attr_mode (insn) == MODE_V4SF) return "movaps\t{%1, %0|%0, %1}"; else return "movss\t{%1, %0|%0, %1}"; @@ -2158,7 +2158,40 @@ } } [(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov") - (set_attr "mode" "SF,SF,SF,SI,SI,TI,SF,SF,SF,SI,SI,DI")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4,9,10") + (const_string "SI") + (eq_attr "alternative" "5") + (if_then_else + (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE2") + (const_int 0))) + (eq (symbol_ref "optimize_size") + (const_int 0))) + (const_string "TI") + (const_string "V4SF")) + /* For architectures resolving dependencies on + whole SSE registers use APS move to break dependency + chains, otherwise use short move to avoid extra work. + + Do the same for architectures resolving dependencies on + the parts. While in DF mode it is better to always handle + just register parts, the SF mode is different due to lack + of instructions to load just part of the register. It is + better to maintain the whole registers in single format + to avoid problems on using packed logical operations. */ + (eq_attr "alternative" "6") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS") + (const_int 0))) + (const_string "V4SF") + (const_string "SF")) + (eq_attr "alternative" "11") + (const_string "DI")] + (const_string "SF")))]) (define_insn "*swapsf" [(set (match_operand:SF 0 "register_operand" "+f") @@ -2319,25 +2352,78 @@ case 4: return "#"; case 5: - if (TARGET_ATHLON) - return "xorpd\t%0, %0"; - else - return "pxor\t%0, %0"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "xorps\t%0, %0"; + case MODE_V2DF: + return "xorpd\t%0, %0"; + case MODE_TI: + return "pxor\t%0, %0"; + default: + abort (); + } case 6: - if (TARGET_PARTIAL_REG_DEPENDENCY) - return "movapd\t{%1, %0|%0, %1}"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "movaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + return "movapd\t{%1, %0|%0, %1}"; + case MODE_DF: + return "movsd\t{%1, %0|%0, %1}"; + default: + abort (); + } + case 7: + if (get_attr_mode (insn) == MODE_V2DF) + return "movlpd\t{%1, %0|%0, %1}"; else return "movsd\t{%1, %0|%0, %1}"; - case 7: case 8: - return "movsd\t{%1, %0|%0, %1}"; + return "movsd\t{%1, %0|%0, %1}"; default: abort(); } } [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov") - (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4") + (const_string "SI") + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI")] + (const_string "V2DF")) + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF")] + (const_string "DF")) + /* For achitectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS") + (const_int 0)) + (const_string "V2DF") + (const_string "DF"))] + (const_string "DF")))]) (define_insn "*movdf_integer" [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m") @@ -2381,16 +2467,34 @@ return "#"; case 5: - if (TARGET_ATHLON) - return "xorpd\t%0, %0"; - else - return "pxor\t%0, %0"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "xorps\t%0, %0"; + case MODE_V2DF: + return "xorpd\t%0, %0"; + case MODE_TI: + return "pxor\t%0, %0"; + default: + abort (); + } case 6: - if (TARGET_PARTIAL_REG_DEPENDENCY) - return "movapd\t{%1, %0|%0, %1}"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "movaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + return "movapd\t{%1, %0|%0, %1}"; + case MODE_DF: + return "movsd\t{%1, %0|%0, %1}"; + default: + abort (); + } + case 7: + if (get_attr_mode (insn) == MODE_V2DF) + return "movlpd\t{%1, %0|%0, %1}"; else return "movsd\t{%1, %0|%0, %1}"; - case 7: case 8: return "movsd\t{%1, %0|%0, %1}"; @@ -2399,7 +2503,42 @@ } } [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov") - (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4") + (const_string "SI") + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI")] + (const_string "V2DF")) + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF")] + (const_string "DF")) + /* For achitectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS") + (const_int 0)) + (const_string "V2DF") + (const_string "DF"))] + (const_string "DF")))]) (define_split [(set (match_operand:DF 0 "nonimmediate_operand" "") @@ -3706,7 +3845,7 @@ (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY"))) (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))] - "TARGET_80387 && TARGET_SSE2" + "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS" { switch (which_alternative) { @@ -3716,7 +3855,30 @@ else return "fst%z0\t%y0"; case 4: - return "cvtsd2ss\t{%1, %0|%0, %1}"; + return "#"; + default: + abort (); + } +} + [(set_attr "type" "fmov,multi,multi,multi,ssecvt") + (set_attr "mode" "SF,SF,SF,SF,DF")]) + +(define_insn "*truncdfsf2_1_sse_nooverlap" + [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m,?f#rx,?r#fx,?x#rf,&Y") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))] + "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS" +{ + switch (which_alternative) + { + case 0: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + case 4: + return "#"; default: abort (); } @@ -3728,7 +3890,7 @@ [(set (match_operand:SF 0 "nonimmediate_operand" "=Y,!m") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand" "mY,f")))] - "TARGET_80387 && TARGET_SSE2 + "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { switch (which_alternative) @@ -3747,7 +3909,30 @@ [(set_attr "type" "ssecvt,fmov") (set_attr "mode" "DF,SF")]) -(define_insn "truncdfsf2_3" +(define_insn "*truncdfsf2_2_nooverlap" + [(set (match_operand:SF 0 "nonimmediate_operand" "=&Y,!m") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "mY,f")))] + "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + return "#"; + case 1: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + default: + abort (); + } +} + [(set_attr "type" "ssecvt,fmov") + (set_attr "mode" "DF,SF")]) + +(define_insn "*truncdfsf2_3" [(set (match_operand:SF 0 "memory_operand" "=m") (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))] @@ -3765,11 +3950,20 @@ [(set (match_operand:SF 0 "register_operand" "=Y") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand" "mY")))] - "!TARGET_80387 && TARGET_SSE2" + "!TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS" "cvtsd2ss\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "mode" "DF")]) +(define_insn "*truncdfsf2_sse_only_nooverlap" + [(set (match_operand:SF 0 "register_operand" "=&Y") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "mY")))] + "!TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS" + "#" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + (define_split [(set (match_operand:SF 0 "memory_operand" "") (float_truncate:SF @@ -3779,15 +3973,56 @@ [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] "") +; Avoid possible reformating penalty on the destination by first +; zeroing it out (define_split - [(set (match_operand:SF 0 "nonimmediate_operand" "") + [(set (match_operand:SF 0 "register_operand" "") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand" ""))) (clobber (match_operand 2 "" ""))] + "TARGET_80387 && reload_completed && TARGET_SSE_PARTIAL_REGS + && SSE_REG_P (operands[0]) + && !STACK_REG_P (operands[1])" + [(const_int 0)] +{ + rtx src, dest; + if (!TARGET_SSE_PARTIAL_REGS) + emit_insn (gen_truncdfsf2_sse_only (operands[0], operands[1])); + else + { + dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0); + /* simplify_gen_subreg refuses to widen memory references. */ + if (GET_CODE (src) == SUBREG) + alter_subreg (&src); + if (reg_overlap_mentioned_p (operands[0], operands[1])) + abort (); + emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode))); + emit_insn (gen_cvtsd2ss (dest, dest, src)); + } + DONE; +}) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "")))] "TARGET_80387 && reload_completed - && !FP_REG_P (operands[0]) && !FP_REG_P (operands[1])" - [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] - "") + && SSE_REG_P (operands[0]) && TARGET_SSE_PARTIAL_REGS" + [(const_int 0)] +{ + rtx src, dest; + dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0); + /* simplify_gen_subreg refuses to widen memory references. */ + if (GET_CODE (src) == SUBREG) + alter_subreg (&src); + if (reg_overlap_mentioned_p (operands[0], operands[1])) + abort (); + emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode))); + emit_insn (gen_cvtsd2ss (dest, dest, src)); + DONE; +}) (define_split [(set (match_operand:SF 0 "register_operand" "") @@ -4491,6 +4726,22 @@ (set_attr "mode" "SF") (set_attr "fp_int_src" "true")]) +; Avoid possible reformating penalty on the destination by first +; zeroing it out +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))] + "TARGET_80387 && reload_completed && TARGET_SSE_PARTIAL_REGS + && SSE_REG_P (operands[0])" + [(const_int 0)] +{ + rtx dest; + dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode))); + emit_insn (gen_cvtsi2ss (dest, dest, operands[1])); + DONE; +}) + (define_expand "floatdisf2" [(set (match_operand:SF 0 "register_operand" "") (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))] @@ -4529,6 +4780,22 @@ (set_attr "mode" "SF") (set_attr "fp_int_src" "true")]) +; Avoid possible reformating penalty on the destination by first +; zeroing it out +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))] + "TARGET_80387 && reload_completed && TARGET_SSE_PARTIAL_REGS + && SSE_REG_P (operands[0])" + [(const_int 0)] +{ + rtx dest; + dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode))); + emit_insn (gen_cvtsi2ssq (dest, dest, operands[1])); + DONE; +}) + (define_insn "floathidf2" [(set (match_operand:DF 0 "register_operand" "=f,f") (float:DF (match_operand:HI 1 "nonimmediate_operand" "m,r")))] @@ -9266,12 +9533,15 @@ in register. */ rtx reg = gen_reg_rtx (SFmode); rtx dest = operands[0]; + rtx imm = gen_lowpart (SFmode, gen_int_mode (0x80000000, SImode)); operands[1] = force_reg (SFmode, operands[1]); operands[0] = force_reg (SFmode, operands[0]); - emit_move_insn (reg, - gen_lowpart (SFmode, - gen_int_mode (0x80000000, SImode))); + reg = force_reg (V4SFmode, + gen_rtx_CONST_VECTOR (V4SFmode, + gen_rtvec (4, imm, CONST0_RTX (SFmode), + CONST0_RTX (SFmode), + CONST0_RTX (SFmode)))); emit_insn (gen_negsf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -9289,8 +9559,8 @@ (define_insn "negsf2_ifs" [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf") - (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,x#fr,0,0"))) - (use (match_operand:SF 2 "nonmemory_operand" "x,0#x,*g#x,*g#x")) + (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,xm#fr,0,0"))) + (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,*x*rm,*x*rm")) (clobber (reg:CC 17))] "TARGET_SSE && (reload_in_progress || reload_completed @@ -9311,7 +9581,7 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") (neg:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "" "")) + (use (match_operand:V4SF 2 "" "")) (clobber (reg:CC 17))] "reload_completed && !SSE_REG_P (operands[0])" [(parallel [(set (match_dup 0) @@ -9320,8 +9590,8 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") - (neg:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "register_operand" "")) + (neg:SF (match_operand:SF 1 "nonimmediate_operand" ""))) + (use (match_operand:V4SF 2 "nonimmediate_operand" "")) (clobber (reg:CC 17))] "reload_completed && SSE_REG_P (operands[0])" [(set (subreg:TI (match_dup 0) 0) @@ -9400,7 +9670,7 @@ { /* Using SSE is tricky, since we need bitwise negation of -0 in register. */ - rtx reg = gen_reg_rtx (DFmode); + rtx reg; #if HOST_BITS_PER_WIDE_INT >= 64 rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode); #else @@ -9410,7 +9680,10 @@ operands[1] = force_reg (DFmode, operands[1]); operands[0] = force_reg (DFmode, operands[0]); - emit_move_insn (reg, gen_lowpart (DFmode, imm)); + imm = gen_lowpart (DFmode, imm); + reg = force_reg (V2DFmode, + gen_rtx_CONST_VECTOR (V2DFmode, + gen_rtvec (2, imm, CONST0_RTX (DFmode)))); emit_insn (gen_negdf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -9428,8 +9701,8 @@ (define_insn "negdf2_ifs" [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,f#Yr,rm#Yf") - (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0"))) - (use (match_operand:DF 2 "nonmemory_operand" "Y,0,*g#Y,*g#Y")) + (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Ym#fr,0,0"))) + (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,*Y*rm,*Y*rm")) (clobber (reg:CC 17))] "!TARGET_64BIT && TARGET_SSE2 && (reload_in_progress || reload_completed @@ -9439,8 +9712,8 @@ (define_insn "*negdf2_ifs_rex64" [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#f,Y#f,fm#Y") - (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#f,0"))) - (use (match_operand:DF 2 "general_operand" "Y,0,*g#Y*r")) + (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0"))) + (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,*Y*rm")) (clobber (reg:CC 17))] "TARGET_64BIT && TARGET_SSE2 && (reload_in_progress || reload_completed @@ -9451,7 +9724,7 @@ (define_split [(set (match_operand:DF 0 "memory_operand" "") (neg:DF (match_operand:DF 1 "memory_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "" [(parallel [(set (match_dup 0) @@ -9461,7 +9734,7 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") (neg:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "reload_completed && !SSE_REG_P (operands[0]) && (!TARGET_64BIT || FP_REG_P (operands[0]))" @@ -9472,7 +9745,7 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") (neg:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "TARGET_64BIT && reload_completed && GENERAL_REG_P (operands[0])" [(parallel [(set (match_dup 0) @@ -9484,14 +9757,18 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") - (neg:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "register_operand" "")) + (neg:DF (match_operand:DF 1 "nonimmediate_operand" ""))) + (use (match_operand:V2DF 2 "nonimmediate_operand" "")) (clobber (reg:CC 17))] "reload_completed && SSE_REG_P (operands[0])" [(set (subreg:TI (match_dup 0) 0) (xor:TI (subreg:TI (match_dup 1) 0) (subreg:TI (match_dup 2) 0)))] { + operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); + /* Avoid possible reformating on the operands. */ + if (TARGET_SSE_PARTIAL_REGS && !optimize_size) + emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0])); if (operands_match_p (operands[0], operands[2])) { rtx tmp; @@ -9724,14 +10001,18 @@ { /* Using SSE is tricky, since we need bitwise negation of -0 in register. */ - rtx reg = gen_reg_rtx (SFmode); + rtx reg = gen_reg_rtx (V4SFmode); rtx dest = operands[0]; + rtx imm; operands[1] = force_reg (SFmode, operands[1]); operands[0] = force_reg (SFmode, operands[0]); - emit_move_insn (reg, - gen_lowpart (SFmode, - gen_int_mode (0x80000000, SImode))); + imm = gen_lowpart (SFmode, gen_int_mode(~0x80000000, SImode)); + reg = force_reg (V4SFmode, + gen_rtx_CONST_VECTOR (V4SFmode, + gen_rtvec (4, imm, CONST0_RTX (SFmode), + CONST0_RTX (SFmode), + CONST0_RTX (SFmode)))); emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -9748,9 +10029,9 @@ "#") (define_insn "abssf2_ifs" - [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,f#xr,rm#xf") - (abs:SF (match_operand:SF 1 "nonimmediate_operand" "x,0,0"))) - (use (match_operand:SF 2 "nonmemory_operand" "*0#x,*g#x,*g#x")) + [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf") + (abs:SF (match_operand:SF 1 "nonimmediate_operand" "0,x,0,0"))) + (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,x*rm,x*rm")) (clobber (reg:CC 17))] "TARGET_SSE && (reload_in_progress || reload_completed @@ -9761,7 +10042,7 @@ (define_split [(set (match_operand:SF 0 "memory_operand" "") (abs:SF (match_operand:SF 1 "memory_operand" ""))) - (use (match_operand:SF 2 "" "")) + (use (match_operand:V4SF 2 "" "")) (clobber (reg:CC 17))] "" [(parallel [(set (match_dup 0) @@ -9771,7 +10052,7 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") (abs:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "" "")) + (use (match_operand:V4SF 2 "" "")) (clobber (reg:CC 17))] "reload_completed && !SSE_REG_P (operands[0])" [(parallel [(set (match_dup 0) @@ -9780,13 +10061,22 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") - (abs:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "register_operand" "")) + (abs:SF (match_operand:SF 1 "nonimmediate_operand" ""))) + (use (match_operand:V4SF 2 "nonimmediate_operand" "")) (clobber (reg:CC 17))] "reload_completed && SSE_REG_P (operands[0])" [(set (subreg:TI (match_dup 0) 0) - (and:TI (not:TI (subreg:TI (match_dup 2) 0)) - (subreg:TI (match_dup 1) 0)))]) + (and:TI (subreg:TI (match_dup 1) 0) + (subreg:TI (match_dup 2) 0)))] +{ + if (operands_match_p (operands[0], operands[2])) + { + rtx tmp; + tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } +}) ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems ;; because of secondary memory needed to reload from class FLOAT_INT_REGS @@ -9849,17 +10139,22 @@ { /* Using SSE is tricky, since we need bitwise negation of -0 in register. */ - rtx reg = gen_reg_rtx (DFmode); + rtx reg = gen_reg_rtx (V2DFmode); #if HOST_BITS_PER_WIDE_INT >= 64 - rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode); + rtx imm = gen_int_mode (~(((HOST_WIDE_INT)1) << 63), DImode); #else - rtx imm = immed_double_const (0, 0x80000000, DImode); + rtx imm = immed_double_const (~0, ~0x80000000, DImode); #endif rtx dest = operands[0]; operands[1] = force_reg (DFmode, operands[1]); operands[0] = force_reg (DFmode, operands[0]); - emit_move_insn (reg, gen_lowpart (DFmode, imm)); + + /* Produce LONG_DOUBLE with the proper immediate argument. */ + imm = gen_lowpart (DFmode, imm); + reg = force_reg (V2DFmode, + gen_rtx_CONST_VECTOR (V2DFmode, + gen_rtvec (2, imm, CONST0_RTX (DFmode)))); emit_insn (gen_absdf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -9876,9 +10171,9 @@ "#") (define_insn "absdf2_ifs" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr,mr#Yf") - (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0,0"))) - (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y,*g#Y")) + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr,mr#Yf") + (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y,0,0"))) + (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Y*rm,Y*rm")) (clobber (reg:CC 17))] "!TARGET_64BIT && TARGET_SSE2 && (reload_in_progress || reload_completed @@ -9887,9 +10182,9 @@ "#") (define_insn "*absdf2_ifs_rex64" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr") - (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0"))) - (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y")) + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr") + (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Ym,0"))) + (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,*0,*Y*rm")) (clobber (reg:CC 17))] "TARGET_64BIT && TARGET_SSE2 && (reload_in_progress || reload_completed @@ -9900,7 +10195,7 @@ (define_split [(set (match_operand:DF 0 "memory_operand" "") (abs:DF (match_operand:DF 1 "memory_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "" [(parallel [(set (match_dup 0) @@ -9910,7 +10205,7 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") (abs:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "reload_completed && !SSE_REG_P (operands[0])" [(parallel [(set (match_dup 0) @@ -9919,13 +10214,26 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") - (abs:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "register_operand" "")) + (abs:DF (match_operand:DF 1 "nonimmediate_operand" ""))) + (use (match_operand:V2DF 2 "nonimmediate_operand" "")) (clobber (reg:CC 17))] "reload_completed && SSE_REG_P (operands[0])" [(set (subreg:TI (match_dup 0) 0) - (and:TI (not:TI (subreg:TI (match_dup 2) 0)) - (subreg:TI (match_dup 1) 0)))]) + (and:TI (subreg:TI (match_dup 1) 0) + (subreg:TI (match_dup 2) 0)))] +{ + operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); + /* Avoid possible reformating on the operands. */ + if (TARGET_SSE_PARTIAL_REGS && !optimize_size) + emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0])); + if (operands_match_p (operands[0], operands[2])) + { + rtx tmp; + tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } +}) ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems @@ -16547,6 +16855,12 @@ (clobber (reg:CC 17))] "TARGET_SSE && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM) + /* Avoid combine from being smart and converting min/max + instruction patterns into conditional moves. */ + && ((GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != GT + && GET_CODE (operands[1]) != UNLE && GET_CODE (operands[1]) != UNGE) + || !rtx_equal_p (operands[4], operands[2]) + || !rtx_equal_p (operands[5], operands[3])) && (!TARGET_IEEE_FP || (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))" "#") @@ -16574,6 +16888,12 @@ (clobber (reg:CC 17))] "TARGET_SSE2 && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM) + /* Avoid combine from being smart and converting min/max + instruction patterns into conditional moves. */ + && ((GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != GT + && GET_CODE (operands[1]) != UNLE && GET_CODE (operands[1]) != UNGE) + || !rtx_equal_p (operands[4], operands[2]) + || !rtx_equal_p (operands[5], operands[3])) && (!TARGET_IEEE_FP || (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))" "#") @@ -16635,6 +16955,14 @@ (set (subreg:TI (match_dup 0) 0) (ior:TI (subreg:TI (match_dup 6) 0) (subreg:TI (match_dup 7) 0)))] { + if (GET_MODE (operands[2]) == DFmode + && TARGET_SSE_PARTIAL_REGS && !optimize_size) + { + rtx op = gen_rtx_SUBREG (V2DFmode, operands[2], 0); + emit_insn (gen_sse2_unpcklpd (op, op, op)); + op = gen_rtx_SUBREG (V2DFmode, operands[3], 0); + emit_insn (gen_sse2_unpcklpd (op, op, op)); + } /* If op2 == op3, op3 will be clobbered before it is used. This should be optimized out though. */ if (operands_match_p (operands[2], operands[3])) @@ -16743,6 +17071,20 @@ (set (subreg:TI (match_dup 0) 0) (and:TI (match_dup 6) (subreg:TI (match_dup 7) 0)))] { + if (TARGET_SSE_PARTIAL_REGS && !optimize_size + && GET_MODE (operands[2]) == DFmode) + { + if (REG_P (operands[2])) + { + rtx op = gen_rtx_SUBREG (V2DFmode, operands[2], 0); + emit_insn (gen_sse2_unpcklpd (op, op, op)); + } + if (REG_P (operands[3])) + { + rtx op = gen_rtx_SUBREG (V2DFmode, operands[3], 0); + emit_insn (gen_sse2_unpcklpd (op, op, op)); + } + } PUT_MODE (operands[1], GET_MODE (operands[0])); if (!sse_comparison_operator (operands[1], VOIDmode)) { @@ -17849,28 +18191,93 @@ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") (match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE" - ;; @@@ let's try to use movaps here. "movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) +(define_split + [(set (match_operand:V4SF 0 "register_operand" "") + (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))] + "TARGET_SSE" + [(set (match_dup 0) + (vec_merge:V4SF + (vec_duplicate:V4SF (match_dup 1)) + (match_dup 2) + (const_int 1)))] +{ + operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0); + operands[2] = CONST0_RTX (V4SFmode); +}) + (define_insn "movv4si_internal" [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m") (match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE" - ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" +{ + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) (define_insn "movv2di_internal" [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,m") (match_operand:V2DI 1 "nonimmediate_operand" "xm,x"))] - "TARGET_SSE" - ;; @@@ let's try to use movaps here. - "movdqa\t{%1, %0|%0, %1}" + "TARGET_SSE2" +{ + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) + +(define_split + [(set (match_operand:V2DF 0 "register_operand" "") + (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))] + "TARGET_SSE2" + [(set (match_dup 0) + (vec_merge:V2DF + (vec_duplicate:V2DF (match_dup 1)) + (match_dup 2) + (const_int 1)))] +{ + operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0); + operands[2] = CONST0_RTX (V2DFmode); +}) (define_insn "movv8qi_internal" [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m") @@ -17920,28 +18327,85 @@ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") (match_operand:V2DF 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE2" - ;; @@@ let's try to use movaps here. - "movapd\t{%1, %0|%0, %1}" +{ + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movapd\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") - (set_attr "mode" "V2DF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "V2DF")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "V2DF"))] + (const_string "V2DF")))]) (define_insn "movv8hi_internal" [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") (match_operand:V8HI 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE2" - ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" +{ + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) (define_insn "movv16qi_internal" [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") (match_operand:V16QI 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE2" - ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" +{ + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) (define_expand "movv2df" [(set (match_operand:V2DF 0 "general_operand" "") @@ -18158,26 +18622,83 @@ [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") (match_operand:TI 1 "general_operand" "C,xm,x"))] "TARGET_SSE && !TARGET_64BIT" - "@ - xorps\t%0, %0 - movaps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1}" +{ + switch (which_alternative) + { + case 0: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 1: + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + abort (); + } +} [(set_attr "type" "ssemov,ssemov,ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "2") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) (define_insn "*movti_rex64" - [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x") - (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))] + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,x,xm") + (match_operand:TI 1 "general_operand" "riFo,riF,O,xm,x"))] "TARGET_64BIT && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "@ - # - # - xorps\t%0, %0 - movaps\\t{%1, %0|%0, %1} - movaps\\t{%1, %0|%0, %1}" +{ + switch (which_alternative) + { + case 0: + case 1: + return "#"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 3: + case 4: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + abort (); + } +} [(set_attr "type" "*,*,ssemov,ssemov,ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "4") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) (define_split [(set (match_operand:TI 0 "nonimmediate_operand" "") @@ -18327,11 +18848,21 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "V4SF")]) -(define_insn "sse_loadss" +(define_expand "sse_loadss" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:SF 1 "memory_operand" "")] + "TARGET_SSE" +{ + emit_insn (gen_sse_loadss_1 (operands[0], operands[1], + CONST0_RTX (V4SFmode))); + DONE; +}) + +(define_insn "sse_loadss_1" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_merge:V4SF - (match_operand:V4SF 1 "memory_operand" "m") - (vec_duplicate:V4SF (float:SF (const_int 0))) + (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m")) + (match_operand:V4SF 2 "const0_operand" "X") (const_int 1)))] "TARGET_SSE" "movss\t{%1, %0|%0, %1}" @@ -18854,12 +19385,26 @@ ;; this insn. (define_insn "sse_clrv4sf" [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(const_int 0)] UNSPEC_NOP))] + (match_operand:V4SF 1 "const0_operand" "X"))] "TARGET_SSE" - "xorps\t{%0, %0|%0, %0}" +{ + if (get_attr_mode (insn) == MODE_TI) + return "pxor\t{%0, %0|%0, %0}"; + else + return "xorps\t{%0, %0|%0, %0}"; +} [(set_attr "type" "sselog") (set_attr "memory" "none") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (if_then_else + (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE2") + (const_int 0))) + (eq (symbol_ref "optimize_size") + (const_int 0))) + (const_string "TI") + (const_string "V4SF")))]) ;; Use xor, but don't show input operands so they aren't live before ;; this insn. @@ -19091,6 +19636,18 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "SF")]) +(define_insn "cvtsi2ssq" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (vec_duplicate:V4SF + (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (const_int 14)))] + "TARGET_SSE && TARGET_64BIT" + "cvtsi2ssq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF")]) + (define_insn "cvtss2si" [(set (match_operand:SI 0 "register_operand" "=r") (vec_select:SI @@ -20718,7 +21275,7 @@ (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0") (vec_duplicate:V4SF (float_truncate:V2SF - (match_operand:V2DF 2 "register_operand" "xm"))) + (match_operand:V2DF 2 "nonimmediate_operand" "xm"))) (const_int 14)))] "TARGET_SSE2" "cvtsd2ss\t{%2, %0|%0, %2}" @@ -20730,7 +21287,7 @@ (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0") (float_extend:V2DF (vec_select:V2SF - (match_operand:V4SF 2 "register_operand" "xm") + (match_operand:V4SF 2 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 1)]))) (const_int 2)))] @@ -21006,10 +21563,20 @@ (define_insn "sse2_clrti" [(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))] "TARGET_SSE2" - "pxor\t{%0, %0|%0, %0}" - [(set_attr "type" "sseiadd") +{ + if (get_attr_mode (insn) == MODE_TI) + return "pxor\t%0, %0"; + else + return "xorps\t%0, %0"; +} + [(set_attr "type" "ssemov") (set_attr "memory" "none") - (set_attr "mode" "TI")]) + (set (attr "mode") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")))]) ;; MMX unsigned averages/sum of absolute differences @@ -21714,11 +22281,21 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "V2DF")]) -(define_insn "sse2_loadsd" +(define_expand "sse2_loadsd" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand:DF 1 "memory_operand" "")] + "TARGET_SSE2" +{ + emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1], + CONST0_RTX (V2DFmode))); + DONE; +}) + +(define_insn "sse2_loadsd_1" [(set (match_operand:V2DF 0 "register_operand" "=x") (vec_merge:V2DF - (match_operand:DF 1 "memory_operand" "m") - (vec_duplicate:DF (float:DF (const_int 0))) + (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")) + (match_operand:V2DF 2 "const0_operand" "X") (const_int 1)))] "TARGET_SSE2" "movsd\t{%1, %0|%0, %1}" diff --git a/gcc/config/i386/scodbx.h b/gcc/config/i386/scodbx.h deleted file mode 100644 index 7da93053256..00000000000 --- a/gcc/config/i386/scodbx.h +++ /dev/null @@ -1,84 +0,0 @@ -/* Definitions for Intel 386 running SCO Unix System V, - using dbx-in-coff encapsulation. - Copyright (C) 1992, 1995, 1996, 1999 Free Software Foundation, Inc. - -This file is part of GNU CC. - -GNU CC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -GNU CC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GNU CC; see the file COPYING. If not, write to -the Free Software Foundation, 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ - -#include "i386/svr3dbx.h" - -/* Overridden defines for SCO systems from sco.h. */ - -/* By default, target has a 80387, uses IEEE compatible arithmetic, - and returns float values in the 387, ie, - (TARGET_80387 | TARGET_FLOAT_RETURNS_IN_80387) - - SCO's software emulation of a 387 fails to handle the `fucomp' - opcode. fucomp is only used when generating IEEE compliant code. - So don't make TARGET_IEEE_FP default for SCO. */ - -#undef TARGET_SUBTARGET_DEFAULT -#define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_FLOAT_RETURNS) - -/* Use crt1.o as a startup file and crtn.o as a closing file. */ - -#undef STARTFILE_SPEC -#define STARTFILE_SPEC \ - "%{!r:%{!z:svr3.ifile%s}%{z:svr3z.ifile%s}}\ - %{pg:gcrt1.o%s}%{!pg:%{p:mcrt1.o%s}%{!p:crt1.o%s}}" - -/* Library spec, including SCO international language support. */ - -#undef LIB_SPEC -#define LIB_SPEC \ - "%{p:-L/usr/lib/libp}%{pg:-L/usr/lib/libp} %{scointl:libintl.a%s} -lc" - -/* Specify predefined symbols in preprocessor. */ - -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-Dunix -DM_UNIX -DM_I386 -DM_COFF -DM_WORDSWAP -Asystem=svr3" - -#undef CPP_SPEC -#define CPP_SPEC "%(cpp_cpu) %{scointl:-DM_INTERNAT}" - -/* This spec is used for telling cpp whether char is signed or not. */ - -#undef SIGNED_CHAR_SPEC -#if DEFAULT_SIGNED_CHAR -#define SIGNED_CHAR_SPEC \ - "%{funsigned-char:-D__CHAR_UNSIGNED__ -D_CHAR_UNSIGNED}" -#else -#define SIGNED_CHAR_SPEC \ - "%{!fsigned-char:-D__CHAR_UNSIGNED__ -D_CHAR_UNSIGNED}" -#endif - -/* caller has to pop the extra argument passed to functions that return - structures. */ - -#undef RETURN_POPS_ARGS -#define RETURN_POPS_ARGS(FUNDECL,FUNTYPE,SIZE) \ - ((FUNDECL) && TREE_CODE (FUNDECL) == IDENTIFIER_NODE ? 0 \ - : (TARGET_RTD \ - && (TYPE_ARG_TYPES (FUNTYPE) == 0 \ - || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (FUNTYPE))) \ - == void_type_node))) ? (SIZE) \ - : 0) -/* On other 386 systems, the last line looks like this: - : (aggregate_value_p (TREE_TYPE (FUNTYPE))) ? GET_MODE_SIZE (Pmode) : 0) */ - -/* Handle #pragma pack. */ -#define HANDLE_SYSV_PRAGMA diff --git a/gcc/config/i386/t-darwin b/gcc/config/i386/t-darwin new file mode 100644 index 00000000000..51285570d68 --- /dev/null +++ b/gcc/config/i386/t-darwin @@ -0,0 +1,11 @@ +darwin.o: $(srcdir)/config/darwin.c $(CONFIG_H) $(SYSTEM_H) $(RTL_BASE_H) \ + $(REGS_H) hard-reg-set.h insn-config.h conditions.h output.h \ + insn-attr.h flags.h $(TREE_H) $(EXPR_H) reload.h \ + function.h $(GGC_H) $(TM_P_H) gt-darwin.h + $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + +darwin-c.o: $(srcdir)/config/darwin-c.c $(CONFIG_H) $(SYSTEM_H) \ + $(TREE_H) $(C_TREE_H) c-pragma.h toplev.h cpplib.h $(TM_P_H) + $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + +gt-darwin.h : s-gtype ; @true diff --git a/gcc/config/i386/xm-dgux.h b/gcc/config/i386/xm-dgux.h deleted file mode 100644 index 881c5c7be9d..00000000000 --- a/gcc/config/i386/xm-dgux.h +++ /dev/null @@ -1,4 +0,0 @@ -/* Configuration for GCC for Intel i386 running DG/ux */ - -/* looks just like sysv4 for now */ -#include "xm-svr4.h" diff --git a/gcc/config/i386/xm-sun.h b/gcc/config/i386/xm-sun.h deleted file mode 100644 index 6c0f0a25630..00000000000 --- a/gcc/config/i386/xm-sun.h +++ /dev/null @@ -1,21 +0,0 @@ -/* Configuration for GNU C-compiler for Intel 80386 running SunOS 4.0. - Copyright (C) 1988, 1997 Free Software Foundation, Inc. - -This file is part of GNU CC. - -GNU CC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -GNU CC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GNU CC; see the file COPYING. If not, write to -the Free Software Foundation, 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ - -#define USG diff --git a/gcc/config/i386/xm-sysv3.h b/gcc/config/i386/xm-sysv3.h deleted file mode 100644 index 9a655443ff5..00000000000 --- a/gcc/config/i386/xm-sysv3.h +++ /dev/null @@ -1,3 +0,0 @@ -/* Configuration for GCC for Intel i386 running System V Release 3. */ - -#include "xm-svr3.h" diff --git a/gcc/testsuite/g++.old-deja/g++.robertl/eb42.C b/gcc/testsuite/g++.old-deja/g++.robertl/eb42.C deleted file mode 100644 index c27aa8d2df7..00000000000 --- a/gcc/testsuite/g++.old-deja/g++.robertl/eb42.C +++ /dev/null @@ -1,19 +0,0 @@ -//Build don't link: -#include -#include - -template class Expr -{ -public : -Expr(){}; -Expr(const T&){}; -}; - -template -inline bool compare(const Expr a, const Expr b){ return true; }; - -int main() -{ - std::vector a(3); - std::sort( a.begin(), a.end(), compare ); // ERROR - no matching function -} -- cgit v1.2.3