aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/cygwin.h22
-rw-r--r--gcc/config/i386/djgpp.h2
-rw-r--r--gcc/config/i386/freebsd-aout.h12
-rw-r--r--gcc/config/i386/freebsd64.h3
-rw-r--r--gcc/config/i386/i386-protos.h3
-rw-r--r--gcc/config/i386/i386.c362
-rw-r--r--gcc/config/i386/i386.h51
-rw-r--r--gcc/config/i386/i386.md665
-rw-r--r--gcc/config/i386/linux64.h19
-rw-r--r--gcc/config/i386/mingw32.h5
-rw-r--r--gcc/config/i386/mmintrin.h39
-rw-r--r--gcc/config/i386/scodbx.h84
-rw-r--r--gcc/config/i386/t-sco5gas2
-rw-r--r--gcc/config/i386/xm-dgux.h4
-rw-r--r--gcc/config/i386/xm-sun.h21
-rw-r--r--gcc/config/i386/xm-sysv3.h3
-rw-r--r--gcc/config/i386/xmmintrin.h115
17 files changed, 1012 insertions, 400 deletions
diff --git a/gcc/config/i386/cygwin.h b/gcc/config/i386/cygwin.h
index f630d1f8450..03e372e04fa 100644
--- a/gcc/config/i386/cygwin.h
+++ b/gcc/config/i386/cygwin.h
@@ -1,6 +1,6 @@
/* Operating system specific defines to be used when targeting GCC for
hosting on Windows32, using a Unix style C library and tools.
- Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+ Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
Free Software Foundation, Inc.
This file is part of GNU CC.
@@ -134,7 +134,8 @@ Boston, MA 02111-1307, USA. */
by calling the init function from the prologue. */
#undef LIBGCC_SPEC
-#define LIBGCC_SPEC "%{mno-cygwin: %{mthreads:-lmingwthrd} -lmingw32} -lgcc %{mno-cygwin:-lmoldname -lmsvcrt}"
+#define LIBGCC_SPEC "%{mno-cygwin: %{mthreads:-lmingwthrd} -lmingw32} \
+ -lgcc %{mno-cygwin:-lmoldname -lmingwex -lmsvcrt}"
/* This macro defines names of additional specifications to put in the specs
that can be used in various specifications like CC1_SPEC. Its definition
@@ -306,11 +307,13 @@ do { \
#define CHECK_STACK_LIMIT 4000
/* By default, target has a 80387, uses IEEE compatible arithmetic,
- and returns float values in the 387 and needs stack probes */
-#undef TARGET_SUBTARGET_DEFAULT
+ returns float values in the 387 and needs stack probes.
+ We also align doubles to 64-bits for MSVC default compatibility. */
+#undef TARGET_SUBTARGET_DEFAULT
#define TARGET_SUBTARGET_DEFAULT \
- (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_STACK_PROBE)
+ (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_STACK_PROBE \
+ | MASK_ALIGN_DOUBLE)
/* This is how to output an assembler line
that says to advance the location counter
@@ -397,6 +400,15 @@ extern void i386_pe_unique_section PARAMS ((TREE, int));
const0_rtx)); \
}
+/* Java Native Interface (JNI) methods on Win32 are invoked using the
+ stdcall calling convention. */
+#undef MODIFY_JNI_METHOD_CALL
+#define MODIFY_JNI_METHOD_CALL(MDECL) \
+ build_type_attribute_variant ((MDECL), \
+ build_tree_list (get_identifier ("stdcall"), \
+ NULL))
+
+
/* External function declarations. */
extern void i386_pe_record_external_function PARAMS ((const char *));
diff --git a/gcc/config/i386/djgpp.h b/gcc/config/i386/djgpp.h
index a271aa47cde..67807804501 100644
--- a/gcc/config/i386/djgpp.h
+++ b/gcc/config/i386/djgpp.h
@@ -136,6 +136,8 @@ Boston, MA 02111-1307, USA. */
#undef ASM_FILE_START
#define ASM_FILE_START(FILE) \
do { \
+ if (ix86_asm_dialect == ASM_INTEL) \
+ fputs ("\t.intel_syntax\n", FILE); \
output_file_directive (FILE, main_input_filename); \
} while (0)
diff --git a/gcc/config/i386/freebsd-aout.h b/gcc/config/i386/freebsd-aout.h
index a2b616e700b..85e2703f42c 100644
--- a/gcc/config/i386/freebsd-aout.h
+++ b/gcc/config/i386/freebsd-aout.h
@@ -1,6 +1,6 @@
/* Definitions of target machine for GNU compiler for Intel 80386
running FreeBSD.
- Copyright (C) 1988, 1992, 1994, 1996, 1997, 1999, 2000, 2002
+ Copyright (C) 1988, 1992, 1994, 1996, 1997, 1999, 2000, 2002, 2003
Free Software Foundation, Inc.
Contributed by Poul-Henning Kamp <phk@login.dkuug.dk>
Continued development by David O'Brien <obrien@NUXI.org>
@@ -94,6 +94,9 @@ Boston, MA 02111-1307, USA. */
/* Profiling routines, partially copied from i386/osfrose.h. */
+/* Tell final.c that we don't need a label passed to mcount. */
+#define NO_PROFILE_COUNTERS 1
+
#undef MCOUNT_NAME
#define MCOUNT_NAME "mcount"
#undef PROFILE_COUNT_REGISTER
@@ -112,6 +115,7 @@ Boston, MA 02111-1307, USA. */
#define TYPE_ASM_OP "\t.type\t"
#define SIZE_ASM_OP "\t.size\t"
+#define SET_ASM_OP "\t.set\t"
/* The following macro defines the format used to output the second
operand of the .type assembler directive. Different svr4 assemblers
@@ -121,6 +125,12 @@ Boston, MA 02111-1307, USA. */
#define TYPE_OPERAND_FMT "@%s"
+#define HANDLE_SYSV_PRAGMA 1
+
+#define ASM_WEAKEN_LABEL(FILE,NAME) \
+ do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME); \
+ fputc ('\n', FILE); } while (0)
+
/* Write the extra assembler code needed to declare a function's result.
Most svr4 assemblers don't require any special declaration of the
result value, but there are exceptions. */
diff --git a/gcc/config/i386/freebsd64.h b/gcc/config/i386/freebsd64.h
index 699f4c4d344..12ca062301d 100644
--- a/gcc/config/i386/freebsd64.h
+++ b/gcc/config/i386/freebsd64.h
@@ -29,8 +29,7 @@ Boston, MA 02111-1307, USA. */
#undef LINK_SPEC
#define LINK_SPEC "\
- %{!m32:-m elf_x86_64} \
- %{m32:-m elf_i386} \
+ %{m32:-m elf_i386_fbsd} \
%{Wl,*:%*} \
%{v:-V} \
%{assert*} %{R*} %{rpath*} %{defsym*} \
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 4afdf668bd8..d3c9d160190 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -190,6 +190,9 @@ extern void x86_function_profiler PARAMS ((FILE *, int));
#ifdef TREE_CODE
extern void init_cumulative_args PARAMS ((CUMULATIVE_ARGS *, tree, rtx));
extern rtx function_arg PARAMS ((CUMULATIVE_ARGS *, enum machine_mode, tree, int));
+extern int function_arg_pass_by_reference PARAMS ((CUMULATIVE_ARGS *,
+ enum machine_mode,
+ tree, int));
extern void function_arg_advance PARAMS ((CUMULATIVE_ARGS *, enum machine_mode,
tree, int));
extern rtx ix86_function_value PARAMS ((tree));
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 42f6d93d3c3..0f8c8e4c3fc 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1,6 +1,6 @@
/* Subroutines used for code generation on IA-32.
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
- 2002 Free Software Foundation, Inc.
+ 2002, 2003 Free Software Foundation, Inc.
This file is part of GNU CC.
@@ -799,6 +799,7 @@ const struct attribute_spec ix86_attribute_table[];
static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
static int ix86_value_regno PARAMS ((enum machine_mode));
+static bool contains_128bit_aligned_vector_p PARAMS ((tree));
#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
@@ -911,6 +912,12 @@ static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
struct gcc_target targetm = TARGET_INITIALIZER;
+/* The svr4 ABI for the i386 says that records and unions are returned
+ in memory. */
+#ifndef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 1
+#endif
+
/* Sometimes certain combinations of command options do not make
sense on a particular target machine. You can define a macro
`OVERRIDE_OPTIONS' to take account of this. This macro, if
@@ -1021,7 +1028,7 @@ override_options ()
if (flag_asynchronous_unwind_tables == 2)
flag_asynchronous_unwind_tables = 0;
if (flag_pcc_struct_return == 2)
- flag_pcc_struct_return = 1;
+ flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
}
#ifdef SUBTARGET_OVERRIDE_OPTIONS
@@ -2252,6 +2259,9 @@ function_arg (cum, mode, type, named)
break;
case BLKmode:
+ if (bytes < 0)
+ break;
+ /* FALLTHRU */
case DImode:
case SImode:
case HImode:
@@ -2282,6 +2292,90 @@ function_arg (cum, mode, type, named)
return ret;
}
+/* Return true when TYPE should be 128bit aligned for 32bit argument passing
+ ABI */
+static bool
+contains_128bit_aligned_vector_p (type)
+ tree type;
+{
+ enum machine_mode mode = TYPE_MODE (type);
+ if (SSE_REG_MODE_P (mode)
+ && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
+ return true;
+ if (TYPE_ALIGN (type) < 128)
+ return false;
+
+ if (AGGREGATE_TYPE_P (type))
+ {
+ /* Walk the agregates recursivly. */
+ if (TREE_CODE (type) == RECORD_TYPE
+ || TREE_CODE (type) == UNION_TYPE
+ || TREE_CODE (type) == QUAL_UNION_TYPE)
+ {
+ tree field;
+
+ if (TYPE_BINFO (type) != NULL
+ && TYPE_BINFO_BASETYPES (type) != NULL)
+ {
+ tree bases = TYPE_BINFO_BASETYPES (type);
+ int n_bases = TREE_VEC_LENGTH (bases);
+ int i;
+
+ for (i = 0; i < n_bases; ++i)
+ {
+ tree binfo = TREE_VEC_ELT (bases, i);
+ tree type = BINFO_TYPE (binfo);
+
+ if (contains_128bit_aligned_vector_p (type))
+ return true;
+ }
+ }
+ /* And now merge the fields of structure. */
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ {
+ if (TREE_CODE (field) == FIELD_DECL
+ && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
+ return true;
+ }
+ }
+ /* Just for use if some languages passes arrays by value. */
+ else if (TREE_CODE (type) == ARRAY_TYPE)
+ {
+ if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
+ return true;
+ }
+ else
+ abort ();
+ }
+ return false;
+}
+
+/* A C expression that indicates when an argument must be passed by
+ reference. If nonzero for an argument, a copy of that argument is
+ made in memory and a pointer to the argument is passed instead of
+ the argument itself. The pointer is passed in whatever way is
+ appropriate for passing a pointer to that type. */
+
+int
+function_arg_pass_by_reference (cum, mode, type, named)
+ CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+ tree type;
+ int named ATTRIBUTE_UNUSED;
+{
+ if (!TARGET_64BIT)
+ return 0;
+
+ if (type && int_size_in_bytes (type) == -1)
+ {
+ if (TARGET_DEBUG_ARG)
+ fprintf (stderr, "function_arg_pass_by_reference\n");
+ return 1;
+ }
+
+ return 0;
+}
+
/* Gives the alignment boundary, in bits, of an argument with the specified mode
and type. */
@@ -2291,14 +2385,34 @@ ix86_function_arg_boundary (mode, type)
tree type;
{
int align;
- if (!TARGET_64BIT)
- return PARM_BOUNDARY;
if (type)
align = TYPE_ALIGN (type);
else
align = GET_MODE_ALIGNMENT (mode);
if (align < PARM_BOUNDARY)
align = PARM_BOUNDARY;
+ if (!TARGET_64BIT)
+ {
+ /* i386 ABI defines all arguments to be 4 byte aligned. We have to
+ make an exception for SSE modes since these require 128bit
+ alignment.
+
+ The handling here differs from field_alignment. ICC aligns MMX
+ arguments to 4 byte boundaries, while structure fields are aligned
+ to 8 byte boundaries. */
+ if (!type)
+ {
+ if (!SSE_REG_MODE_P (mode))
+ align = PARM_BOUNDARY;
+ }
+ else
+ {
+ if (!contains_128bit_aligned_vector_p (type))
+ align = PARM_BOUNDARY;
+ }
+ if (align != PARM_BOUNDARY && !TARGET_SSE)
+ abort();
+ }
if (align > 128)
align = 128;
return align;
@@ -2488,6 +2602,8 @@ ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
/* Indicate to allocate space on the stack for varargs save area. */
ix86_save_varrargs_registers = 1;
+ cfun->stack_alignment_needed = 128;
+
fntype = TREE_TYPE (current_function_decl);
stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
&& (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
@@ -2637,6 +2753,7 @@ ix86_va_arg (valist, type)
rtx lab_false, lab_over = NULL_RTX;
rtx addr_rtx, r;
rtx container;
+ int indirect_p = 0;
/* Only 64bit target needs something special. */
if (!TARGET_64BIT)
@@ -2656,6 +2773,13 @@ ix86_va_arg (valist, type)
sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
size = int_size_in_bytes (type);
+ if (size == -1)
+ {
+ /* Passed by reference. */
+ indirect_p = 1;
+ type = build_pointer_type (type);
+ size = int_size_in_bytes (type);
+ }
rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
container = construct_container (TYPE_MODE (type), type, 0,
@@ -2760,10 +2884,12 @@ ix86_va_arg (valist, type)
{
int i;
rtx mem;
+ rtx x;
/* Never use the memory itself, as it has the alias set. */
- addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
- mem = gen_rtx_MEM (BLKmode, addr_rtx);
+ x = XEXP (assign_temp (type, 0, 1, 0), 0);
+ mem = gen_rtx_MEM (BLKmode, x);
+ force_operand (x, addr_rtx);
set_mem_alias_set (mem, get_varargs_alias_set ());
set_mem_align (mem, BITS_PER_UNIT);
@@ -2846,6 +2972,13 @@ ix86_va_arg (valist, type)
if (container)
emit_label (lab_over);
+ if (indirect_p)
+ {
+ r = gen_rtx_MEM (Pmode, addr_rtx);
+ set_mem_alias_set (r, get_varargs_alias_set ());
+ emit_move_insn (addr_rtx, r);
+ }
+
return addr_rtx;
}
@@ -3401,6 +3534,19 @@ non_q_regs_operand (op, mode)
return NON_QI_REG_P (op);
}
+/* Return 1 when OP is operand acceptable for standard SSE move. */
+int
+vector_move_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (nonimmediate_operand (op, mode))
+ return 1;
+ if (GET_MODE (op) != mode && mode != VOIDmode)
+ return 0;
+ return (op == CONST0_RTX (GET_MODE (op)));
+}
+
/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
insns. */
int
@@ -4225,7 +4371,8 @@ ix86_save_reg (regno, maybe_eh_return)
&& regno == REAL_PIC_OFFSET_TABLE_REGNUM
&& (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
|| current_function_profile
- || current_function_calls_eh_return))
+ || current_function_calls_eh_return
+ || current_function_uses_const_pool))
{
if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
return 0;
@@ -4516,6 +4663,10 @@ ix86_expand_prologue ()
CALL_INSN_FUNCTION_USAGE (insn)
= gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
CALL_INSN_FUNCTION_USAGE (insn));
+
+ /* Don't allow scheduling pass to move insns across __alloca
+ call. */
+ emit_insn (gen_blockage (const0_rtx));
}
if (use_mov)
{
@@ -7484,12 +7635,12 @@ output_fp_compare (insn, operands, eflags_p, unordered_p)
if (unordered_p)
return "ucomiss\t{%1, %0|%0, %1}";
else
- return "comiss\t{%1, %0|%0, %y}";
+ return "comiss\t{%1, %0|%0, %1}";
else
if (unordered_p)
return "ucomisd\t{%1, %0|%0, %1}";
else
- return "comisd\t{%1, %0|%0, %y}";
+ return "comisd\t{%1, %0|%0, %1}";
}
if (! STACK_TOP_P (cmp_op0))
@@ -7777,9 +7928,17 @@ ix86_expand_move (mode, operands)
if (strict)
;
- else if (GET_CODE (op1) == CONST_DOUBLE
- && register_operand (op0, mode))
- op1 = validize_mem (force_const_mem (mode, op1));
+ else if (GET_CODE (op1) == CONST_DOUBLE)
+ {
+ op1 = validize_mem (force_const_mem (mode, op1));
+ if (!register_operand (op0, mode))
+ {
+ rtx temp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
+ emit_move_insn (op0, temp);
+ return;
+ }
+ }
}
}
@@ -7799,8 +7958,12 @@ ix86_expand_vector_move (mode, operands)
to handle some of them more efficiently. */
if ((reload_in_progress | reload_completed) == 0
&& register_operand (operands[0], mode)
- && CONSTANT_P (operands[1]))
- operands[1] = force_const_mem (mode, operands[1]);
+ && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
+ {
+ operands[1] = force_const_mem (mode, operands[1]);
+ emit_move_insn (operands[0], operands[1]);
+ return;
+ }
/* Make operand1 a register if it isn't already. */
if (!no_new_pseudos
@@ -9219,11 +9382,11 @@ ix86_expand_int_movcc (operands)
/* On x86_64 the lea instruction operates on Pmode, so we need
to get arithmetics done in proper mode to match. */
if (diff == 1)
- tmp = out;
+ tmp = copy_rtx (out);
else
{
rtx out1;
- out1 = out;
+ out1 = copy_rtx (out);
tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
nops++;
if (diff & 1)
@@ -9241,9 +9404,9 @@ ix86_expand_int_movcc (operands)
&& (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
{
if (nops == 1)
- out = force_operand (tmp, out);
+ out = force_operand (tmp, copy_rtx (out));
else
- emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
+ emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
}
if (out != operands[0])
emit_move_insn (operands[0], copy_rtx (out));
@@ -9822,15 +9985,24 @@ ix86_split_long_move (operands)
Do an lea to the last part and use only one colliding move. */
else if (collisions > 1)
{
+ rtx base;
+
collisions = 1;
- emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
- XEXP (part[1][0], 0)));
- part[1][0] = change_address (part[1][0],
- TARGET_64BIT ? DImode : SImode,
- part[0][nparts - 1]);
- part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
+
+ base = part[0][nparts - 1];
+
+ /* Handle the case when the last part isn't valid for lea.
+ Happens in 64-bit mode storing the 12-byte XFmode. */
+ if (GET_MODE (base) != Pmode)
+ base = gen_rtx_REG (Pmode, REGNO (base));
+
+ emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
+ part[1][0] = replace_equiv_address (part[1][0], base);
+ part[1][1] = replace_equiv_address (part[1][1],
+ plus_constant (base, UNITS_PER_WORD));
if (nparts == 3)
- part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
+ part[1][2] = replace_equiv_address (part[1][2],
+ plus_constant (base, 8));
}
}
@@ -10973,7 +11145,8 @@ memory_address_length (addr)
if (disp)
{
if (GET_CODE (disp) == CONST_INT
- && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
+ && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
+ && base)
len = 1;
else
len = 4;
@@ -11036,6 +11209,26 @@ ix86_attr_length_address_default (insn)
rtx insn;
{
int i;
+
+ if (get_attr_type (insn) == TYPE_LEA)
+ {
+ rtx set = PATTERN (insn);
+ if (GET_CODE (set) == SET)
+ ;
+ else if (GET_CODE (set) == PARALLEL
+ && GET_CODE (XVECEXP (set, 0, 0)) == SET)
+ set = XVECEXP (set, 0, 0);
+ else
+ {
+#ifdef ENABLE_CHECKING
+ abort ();
+#endif
+ return 0;
+ }
+
+ return memory_address_length (SET_SRC (set));
+ }
+
extract_insn_cached (insn);
for (i = recog_data.n_operands - 1; i >= 0; --i)
if (GET_CODE (recog_data.operand[i]) == MEM)
@@ -11834,7 +12027,8 @@ x86_initialize_trampoline (tramp, fnaddr, cxt)
#define def_builtin(MASK, NAME, TYPE, CODE) \
do { \
- if ((MASK) & target_flags) \
+ if ((MASK) & target_flags \
+ && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
NULL, NULL_TREE); \
} while (0)
@@ -11851,6 +12045,8 @@ struct builtin_description
/* Used for builtins that are enabled both by -msse and -msse2. */
#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
+#define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT)
+#define MASK_SSE264 (MASK_SSE2 | MASK_64BIT)
static const struct builtin_description bdesc_comi[] =
{
@@ -11933,9 +12129,11 @@ static const struct builtin_description bdesc_2arg[] =
{ MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
{ MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
{ MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
{ MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
{ MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
{ MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
+ { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
{ MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
{ MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
@@ -11984,6 +12182,7 @@ static const struct builtin_description bdesc_2arg[] =
{ MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
{ MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
+ { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
{ MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
{ MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
@@ -12056,11 +12255,11 @@ static const struct builtin_description bdesc_2arg[] =
{ MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
{ MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
{ MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
- { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
{ MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
{ MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
{ MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
- { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
+ { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
{ MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
{ MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
@@ -12134,6 +12333,7 @@ static const struct builtin_description bdesc_2arg[] =
{ MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
+ { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
};
@@ -12149,8 +12349,10 @@ static const struct builtin_description bdesc_1arg[] =
{ MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
{ MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
+ { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
{ MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
{ MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
+ { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
{ MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
{ MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
@@ -12172,6 +12374,8 @@ static const struct builtin_description bdesc_1arg[] =
{ MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
+ { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
+ { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
@@ -12197,7 +12401,11 @@ ix86_init_mmx_sse_builtins ()
size_t i;
tree pchar_type_node = build_pointer_type (char_type_node);
+ tree pcchar_type_node = build_pointer_type (
+ build_type_variant (char_type_node, 1, 0));
tree pfloat_type_node = build_pointer_type (float_type_node);
+ tree pcfloat_type_node = build_pointer_type (
+ build_type_variant (float_type_node, 1, 0));
tree pv2si_type_node = build_pointer_type (V2SI_type_node);
tree pv2di_type_node = build_pointer_type (V2DI_type_node);
tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
@@ -12213,11 +12421,18 @@ ix86_init_mmx_sse_builtins ()
tree int_ftype_v4sf
= build_function_type_list (integer_type_node,
V4SF_type_node, NULL_TREE);
+ tree int64_ftype_v4sf
+ = build_function_type_list (long_long_integer_type_node,
+ V4SF_type_node, NULL_TREE);
tree int_ftype_v8qi
= build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
tree v4sf_ftype_v4sf_int
= build_function_type_list (V4SF_type_node,
V4SF_type_node, integer_type_node, NULL_TREE);
+ tree v4sf_ftype_v4sf_int64
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, long_long_integer_type_node,
+ NULL_TREE);
tree v4sf_ftype_v4sf_v2si
= build_function_type_list (V4SF_type_node,
V4SF_type_node, V2SI_type_node, NULL_TREE);
@@ -12270,8 +12485,8 @@ ix86_init_mmx_sse_builtins ()
= build_function_type_list (void_type_node,
V8QI_type_node, V8QI_type_node,
pchar_type_node, NULL_TREE);
- tree v4sf_ftype_pfloat
- = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
+ tree v4sf_ftype_pcfloat
+ = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
/* @@@ the type is bogus */
tree v4sf_ftype_v4sf_pv2si
= build_function_type_list (V4SF_type_node,
@@ -12326,7 +12541,11 @@ ix86_init_mmx_sse_builtins ()
= build_function_type_list (V2SI_type_node,
V2SF_type_node, V2SF_type_node, NULL_TREE);
tree pint_type_node = build_pointer_type (integer_type_node);
+ tree pcint_type_node = build_pointer_type (
+ build_type_variant (integer_type_node, 1, 0));
tree pdouble_type_node = build_pointer_type (double_type_node);
+ tree pcdouble_type_node = build_pointer_type (
+ build_type_variant (double_type_node, 1, 0));
tree int_ftype_v2df_v2df
= build_function_type_list (integer_type_node,
V2DF_type_node, V2DF_type_node, NULL_TREE);
@@ -12338,8 +12557,8 @@ ix86_init_mmx_sse_builtins ()
tree ti_ftype_ti_ti
= build_function_type_list (intTI_type_node,
intTI_type_node, intTI_type_node, NULL_TREE);
- tree void_ftype_pvoid
- = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
+ tree void_ftype_pcvoid
+ = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
tree v2di_ftype_di
= build_function_type_list (V2DI_type_node,
long_long_unsigned_type_node, NULL_TREE);
@@ -12364,9 +12583,16 @@ ix86_init_mmx_sse_builtins ()
= build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
tree int_ftype_v2df
= build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
+ tree int64_ftype_v2df
+ = build_function_type_list (long_long_integer_type_node,
+ V2DF_type_node, NULL_TREE);
tree v2df_ftype_v2df_int
= build_function_type_list (V2DF_type_node,
V2DF_type_node, integer_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_int64
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, long_long_integer_type_node,
+ NULL_TREE);
tree v4sf_ftype_v4sf_v2df
= build_function_type_list (V4SF_type_node,
V4SF_type_node, V2DF_type_node, NULL_TREE);
@@ -12394,8 +12620,8 @@ ix86_init_mmx_sse_builtins ()
= build_function_type_list (void_type_node,
V16QI_type_node, V16QI_type_node,
pchar_type_node, NULL_TREE);
- tree v2df_ftype_pdouble
- = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
+ tree v2df_ftype_pcdouble
+ = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
tree v2df_ftype_v2df_v2df
= build_function_type_list (V2DF_type_node,
V2DF_type_node, V2DF_type_node, NULL_TREE);
@@ -12454,16 +12680,16 @@ ix86_init_mmx_sse_builtins ()
V16QI_type_node, V16QI_type_node, NULL_TREE);
tree int_ftype_v16qi
= build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
- tree v16qi_ftype_pchar
- = build_function_type_list (V16QI_type_node, pchar_type_node, NULL_TREE);
+ tree v16qi_ftype_pcchar
+ = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
tree void_ftype_pchar_v16qi
= build_function_type_list (void_type_node,
pchar_type_node, V16QI_type_node, NULL_TREE);
- tree v4si_ftype_pchar
- = build_function_type_list (V4SI_type_node, pchar_type_node, NULL_TREE);
- tree void_ftype_pchar_v4si
+ tree v4si_ftype_pcint
+ = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
+ tree void_ftype_pcint_v4si
= build_function_type_list (void_type_node,
- pchar_type_node, V4SI_type_node, NULL_TREE);
+ pcint_type_node, V4SI_type_node, NULL_TREE);
tree v2di_ftype_v2di
= build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
@@ -12539,8 +12765,6 @@ ix86_init_mmx_sse_builtins ()
/* Add the remaining MMX insns with somewhat more complicated types. */
def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
- def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
- def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
@@ -12566,21 +12790,26 @@ ix86_init_mmx_sse_builtins ()
def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
+ def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
+ def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
+ def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
+ def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
+ def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
- def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
- def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
- def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
+ def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
+ def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
+ def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
@@ -12648,9 +12877,9 @@ ix86_init_mmx_sse_builtins ()
def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
- def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
- def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
- def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
@@ -12689,33 +12918,36 @@ ix86_init_mmx_sse_builtins ()
def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
+ def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
+ def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
+ def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
- def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
- def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
- def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
+ def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
- def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQA);
- def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQU);
- def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pchar, IX86_BUILTIN_LOADD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
- def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pchar_v4si, IX86_BUILTIN_STORED);
+ def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
@@ -12795,6 +13027,13 @@ ix86_expand_binop_builtin (icode, arglist, target)
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
target = gen_reg_rtx (tmode);
+ if (GET_MODE (op1) == SImode && mode1 == TImode)
+ {
+ rtx x = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_loadd (x, op1));
+ op1 = gen_lowpart (TImode, x);
+ }
+
/* In case the insn wants input operands in modes different from
the result, abort. */
if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
@@ -12837,9 +13076,7 @@ ix86_expand_store_builtin (icode, arglist)
op1 = safe_vector_operand (op1, mode1);
op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
-
- if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
+ op1 = copy_to_mode_reg (mode1, op1);
pat = GEN_FCN (icode) (op0, op1);
if (pat)
@@ -13828,9 +14065,10 @@ ix86_hard_regno_mode_ok (regno, mode)
if (FP_REGNO_P (regno))
return VALID_FP_MODE_P (mode);
if (SSE_REGNO_P (regno))
- return VALID_SSE_REG_MODE (mode);
+ return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
if (MMX_REGNO_P (regno))
- return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
+ return (TARGET_MMX
+ ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
/* We handle both integer and floats in the general purpose registers.
In future we should be able to handle vector modes as well. */
if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
@@ -14299,7 +14537,7 @@ x86_function_profiler (file, labelno)
else
{
#ifndef NO_PROFILE_COUNTERS
- fprintf (file, "\tmovl\t$%sP%d,%%$s\n", LPREFIX, labelno,
+ fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
PROFILE_COUNT_REGISTER);
#endif
fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 493a2b5bf9c..ffca44fd57b 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -487,9 +487,12 @@ extern int x86_prefetch_sse;
if (TARGET_64BIT) \
{ \
builtin_assert ("cpu=x86_64"); \
- builtin_assert ("machine=x86_64"); \
+ builtin_define ("__amd64"); \
+ builtin_define ("__amd64__"); \
builtin_define ("__x86_64"); \
builtin_define ("__x86_64__"); \
+ builtin_define ("__amd64"); \
+ builtin_define ("__amd64__"); \
} \
else \
{ \
@@ -1047,7 +1050,7 @@ do { \
&& (TARGET_64BIT || !TARGET_PARTIAL_REG_STALL)) \
|| ((MODE1) == DImode && TARGET_64BIT)) \
&& ((MODE2) == HImode || (MODE2) == SImode \
- || ((MODE1) == QImode \
+ || ((MODE2) == QImode \
&& (TARGET_64BIT || !TARGET_PARTIAL_REG_STALL)) \
|| ((MODE2) == DImode && TARGET_64BIT))))
@@ -1522,6 +1525,20 @@ enum reg_class
|| ((CLASS) == SIREG) \
|| ((CLASS) == DIREG))
+/* Return a class of registers that cannot change FROM mode to TO mode.
+
+ x87 registers can't do subreg as all values are reformated to extended
+ precision. XMM registers does not support with nonzero offsets equal
+ to 4, 8 and 12 otherwise valid for integer registers. Since we can't
+ determine these, prohibit all nonparadoxical subregs changing size. */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+ (GET_MODE_SIZE (TO) < GET_MODE_SIZE (FROM) \
+ ? reg_classes_intersect_p (FLOAT_SSE_REGS, (CLASS)) \
+ || MAYBE_MMX_CLASS_P (CLASS) \
+ : GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \
+ ? reg_classes_intersect_p (FLOAT_REGS, (CLASS)) : 0)
+
/* A C statement that adds to CLOBBERS any hard regs the port wishes
to automatically clobber for all asms.
@@ -1716,17 +1733,28 @@ typedef struct ix86_args {
#define FUNCTION_ARG_PARTIAL_NREGS(CUM, MODE, TYPE, NAMED) 0
+/* A C expression that indicates when an argument must be passed by
+ reference. If nonzero for an argument, a copy of that argument is
+ made in memory and a pointer to the argument is passed instead of
+ the argument itself. The pointer is passed in whatever way is
+ appropriate for passing a pointer to that type. */
+
+#define FUNCTION_ARG_PASS_BY_REFERENCE(CUM, MODE, TYPE, NAMED) \
+ function_arg_pass_by_reference(&CUM, MODE, TYPE, NAMED)
+
/* If PIC, we cannot make sibling calls to global functions
because the PLT requires %ebx live.
- If we are returning floats on the register stack, we cannot make
- sibling calls to functions that return floats. (The stack adjust
- instruction will wind up after the sibcall jump, and not be executed.) */
+ If we are returning floats on the 80387 register stack, we cannot
+ make a sibcall from a function that doesn't return a float to a
+ function that does or, conversely, from a function that does return
+ a float to a function that doesn't; the necessary stack adjustment
+ would not be executed. */
#define FUNCTION_OK_FOR_SIBCALL(DECL) \
((DECL) \
&& (! flag_pic || ! TREE_PUBLIC (DECL)) \
&& (! TARGET_FLOAT_RETURNS_IN_80387 \
- || ! FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (TREE_TYPE (DECL)))) \
- || FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (TREE_TYPE (cfun->decl))))))
+ || (FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (TREE_TYPE (DECL)))) \
+ == FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (TREE_TYPE (cfun->decl)))))))
/* Perform any needed actions needed for a function that is receiving a
variable number of arguments.
@@ -2068,9 +2096,12 @@ enum ix86_builtins
IX86_BUILTIN_CVTPI2PS,
IX86_BUILTIN_CVTPS2PI,
IX86_BUILTIN_CVTSI2SS,
+ IX86_BUILTIN_CVTSI642SS,
IX86_BUILTIN_CVTSS2SI,
+ IX86_BUILTIN_CVTSS2SI64,
IX86_BUILTIN_CVTTPS2PI,
IX86_BUILTIN_CVTTSS2SI,
+ IX86_BUILTIN_CVTTSS2SI64,
IX86_BUILTIN_MAXPS,
IX86_BUILTIN_MAXSS,
@@ -2116,6 +2147,7 @@ enum ix86_builtins
IX86_BUILTIN_PADDB,
IX86_BUILTIN_PADDW,
IX86_BUILTIN_PADDD,
+ IX86_BUILTIN_PADDQ,
IX86_BUILTIN_PADDSB,
IX86_BUILTIN_PADDSW,
IX86_BUILTIN_PADDUSB,
@@ -2123,6 +2155,7 @@ enum ix86_builtins
IX86_BUILTIN_PSUBB,
IX86_BUILTIN_PSUBW,
IX86_BUILTIN_PSUBD,
+ IX86_BUILTIN_PSUBQ,
IX86_BUILTIN_PSUBSB,
IX86_BUILTIN_PSUBSW,
IX86_BUILTIN_PSUBUSB,
@@ -2327,11 +2360,14 @@ enum ix86_builtins
IX86_BUILTIN_CVTPI2PD,
IX86_BUILTIN_CVTSI2SD,
+ IX86_BUILTIN_CVTSI642SD,
IX86_BUILTIN_CVTSD2SI,
+ IX86_BUILTIN_CVTSD2SI64,
IX86_BUILTIN_CVTSD2SS,
IX86_BUILTIN_CVTSS2SD,
IX86_BUILTIN_CVTTSD2SI,
+ IX86_BUILTIN_CVTTSD2SI64,
IX86_BUILTIN_CVTPS2DQ,
IX86_BUILTIN_CVTPS2PD,
@@ -3286,6 +3322,7 @@ do { \
{"register_and_not_any_fp_reg_operand", {REG}}, \
{"fp_register_operand", {REG}}, \
{"register_and_not_fp_reg_operand", {REG}}, \
+ {"vector_move_operand", {CONST_VECTOR, SUBREG, REG, MEM}}, \
/* A list of predicates that do special things with modes, and so
should not elicit warnings for VOIDmode match_operand. */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index befbfe49569..edbb7163646 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1,5 +1,6 @@
;; GCC machine description for IA-32 and x86-64.
-;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+;; 2001, 2002, 2003
;; Free Software Foundation, Inc.
;; Mostly by William Schelter.
;; x86_64 support added by Jan Hubicka
@@ -267,6 +268,8 @@
(define_attr "length" ""
(cond [(eq_attr "type" "other,multi,fistp")
(const_int 16)
+ (eq_attr "type" "fcmp")
+ (const_int 4)
(eq_attr "unit" "i387")
(plus (const_int 2)
(plus (attr "prefix_data16")
@@ -1099,25 +1102,20 @@
(set_attr "mode" "SI")
(set_attr "length_immediate" "1")])
-; The first alternative is used only to compute proper length of instruction.
-; Reload's algorithm does not take into account the cost of spill instructions
-; needed to free register in given class, so avoid it from choosing the first
-; alternative when eax is not available.
-
(define_insn "*movsi_1"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=*?a,r,*?a,m,!*y,!rm,!*y,!*Y,!rm,!*Y")
- (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,rm,*y,*y,rm,*Y,*Y"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m,!*y,!rm,!*y,!*Y,!*Y,!rm")
+ (match_operand:SI 1 "general_operand" "rinm,rin,rm,*y,*y,*Y,rm,*Y"))]
"GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
{
switch (get_attr_type (insn))
{
case TYPE_SSEMOV:
- if (get_attr_mode (insn) == TImode)
+ if (get_attr_mode (insn) == MODE_TI)
return "movdqa\t{%1, %0|%0, %1}";
return "movd\t{%1, %0|%0, %1}";
case TYPE_MMXMOV:
- if (get_attr_mode (insn) == DImode)
+ if (get_attr_mode (insn) == MODE_DI)
return "movq\t{%1, %0|%0, %1}";
return "movd\t{%1, %0|%0, %1}";
@@ -1131,17 +1129,16 @@
}
}
[(set (attr "type")
- (cond [(eq_attr "alternative" "4,5,6")
+ (cond [(eq_attr "alternative" "2,3,4")
(const_string "mmxmov")
- (eq_attr "alternative" "7,8,9")
+ (eq_attr "alternative" "5,6,7")
(const_string "ssemov")
(and (ne (symbol_ref "flag_pic") (const_int 0))
(match_operand:SI 1 "symbolic_operand" ""))
(const_string "lea")
]
(const_string "imov")))
- (set_attr "modrm" "0,*,0,*,*,*,*,*,*,*")
- (set_attr "mode" "SI,SI,SI,SI,SI,SI,DI,TI,SI,SI")])
+ (set_attr "mode" "SI,SI,SI,SI,DI,TI,SI,SI")])
;; Stores and loads of ax to arbitary constant address.
;; We fake an second form of instruction to force reload to load address
@@ -1214,14 +1211,9 @@
[(set_attr "type" "push")
(set_attr "mode" "QI")])
-; The first alternative is used only to compute proper length of instruction.
-; Reload's algorithm does not take into account the cost of spill instructions
-; needed to free register in given class, so avoid it from choosing the first
-; alternative when eax is not available.
-
(define_insn "*movhi_1"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=*?a,r,r,*?a,r,m")
- (match_operand:HI 1 "general_operand" "i,r,rn,rm,rm,rn"))]
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
+ (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))]
"GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
{
switch (get_attr_type (insn))
@@ -1238,36 +1230,35 @@
}
}
[(set (attr "type")
- (cond [(and (eq_attr "alternative" "0,1")
+ (cond [(and (eq_attr "alternative" "0")
(ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
(const_int 0))
(eq (symbol_ref "TARGET_HIMODE_MATH")
(const_int 0))))
(const_string "imov")
- (and (eq_attr "alternative" "2,3,4")
+ (and (eq_attr "alternative" "1,2")
(match_operand:HI 1 "aligned_operand" ""))
(const_string "imov")
(and (ne (symbol_ref "TARGET_MOVX")
(const_int 0))
- (eq_attr "alternative" "0,1,3,4"))
+ (eq_attr "alternative" "0,2"))
(const_string "imovx")
]
(const_string "imov")))
(set (attr "mode")
(cond [(eq_attr "type" "imovx")
(const_string "SI")
- (and (eq_attr "alternative" "2,3,4")
+ (and (eq_attr "alternative" "1,2")
(match_operand:HI 1 "aligned_operand" ""))
(const_string "SI")
- (and (eq_attr "alternative" "0,1")
+ (and (eq_attr "alternative" "0")
(ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
(const_int 0))
(eq (symbol_ref "TARGET_HIMODE_MATH")
(const_int 0))))
(const_string "SI")
]
- (const_string "HI")))
- (set_attr "modrm" "0,*,*,0,*,*")])
+ (const_string "HI")))])
;; Stores and loads of ax to arbitary constant address.
;; We fake an second form of instruction to force reload to load address
@@ -1488,7 +1479,7 @@
(define_expand "movstrictqi"
[(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
(match_operand:QI 1 "general_operand" ""))]
- "! TARGET_PARTIAL_REG_STALL"
+ "! TARGET_PARTIAL_REG_STALL || optimize_size"
{
/* Don't generate memory->memory moves, go through a register. */
if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
@@ -1498,7 +1489,7 @@
(define_insn "*movstrictqi_1"
[(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
(match_operand:QI 1 "general_operand" "*qn,m"))]
- "! TARGET_PARTIAL_REG_STALL
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"mov{b}\t{%1, %0|%0, %1}"
[(set_attr "type" "imov")
@@ -12839,9 +12830,9 @@
(set_attr "modrm" "0")
(set (attr "length")
(if_then_else (and (ge (minus (match_dup 0) (pc))
- (const_int -128))
+ (const_int -126))
(lt (minus (match_dup 0) (pc))
- (const_int 124)))
+ (const_int 128)))
(const_int 2)
(const_int 6)))])
@@ -12857,9 +12848,9 @@
(set_attr "modrm" "0")
(set (attr "length")
(if_then_else (and (ge (minus (match_dup 0) (pc))
- (const_int -128))
+ (const_int -126))
(lt (minus (match_dup 0) (pc))
- (const_int 124)))
+ (const_int 128)))
(const_int 2)
(const_int 6)))])
@@ -13124,9 +13115,9 @@
[(set_attr "type" "ibr")
(set (attr "length")
(if_then_else (and (ge (minus (match_dup 0) (pc))
- (const_int -128))
+ (const_int -126))
(lt (minus (match_dup 0) (pc))
- (const_int 124)))
+ (const_int 128)))
(const_int 2)
(const_int 5)))
(set_attr "modrm" "0")])
@@ -13250,9 +13241,9 @@
(set (attr "length")
(if_then_else (and (eq_attr "alternative" "0")
(and (ge (minus (match_dup 0) (pc))
- (const_int -128))
+ (const_int -126))
(lt (minus (match_dup 0) (pc))
- (const_int 124))))
+ (const_int 128))))
(const_int 2)
(const_int 16)))
;; We don't know the type before shorten branches. Optimistically expect
@@ -13616,11 +13607,10 @@
"ix86_expand_epilogue (0); DONE;")
(define_expand "eh_return"
- [(use (match_operand 0 "register_operand" ""))
- (use (match_operand 1 "register_operand" ""))]
+ [(use (match_operand 0 "register_operand" ""))]
""
{
- rtx tmp, sa = operands[0], ra = operands[1];
+ rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0];
/* Tricky bit: we write the address of the handler to which we will
be returning into someone else's stack frame, one word below the
@@ -13682,7 +13672,7 @@
(define_expand "ffssi2"
[(set (match_operand:SI 0 "nonimmediate_operand" "")
- (ffs:SI (match_operand:SI 1 "general_operand" "")))]
+ (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
""
{
rtx out = gen_reg_rtx (SImode), tmp = gen_reg_rtx (SImode);
@@ -14823,7 +14813,7 @@
(define_insn "cosxf2"
[(set (match_operand:XF 0 "register_operand" "=f")
(unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_COS))]
- "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ "!TARGET_64BIT && ! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
"fcos"
[(set_attr "type" "fpspc")
@@ -16734,7 +16724,7 @@
(define_split
[(set (match_operand 0 "register_operand" "")
(if_then_else (match_operator 1 "comparison_operator"
- [(match_operand 4 "register_operand" "")
+ [(match_operand 4 "nonimmediate_operand" "")
(match_operand 5 "nonimmediate_operand" "")])
(match_operand 2 "nonmemory_operand" "")
(match_operand 3 "nonmemory_operand" "")))]
@@ -16746,13 +16736,16 @@
(subreg:TI (match_dup 7) 0)))]
{
PUT_MODE (operands[1], GET_MODE (operands[0]));
- if (!sse_comparison_operator (operands[1], VOIDmode))
+ if (!sse_comparison_operator (operands[1], VOIDmode)
+ || !rtx_equal_p (operands[0], operands[4]))
{
rtx tmp = operands[5];
operands[5] = operands[4];
operands[4] = tmp;
PUT_CODE (operands[1], swap_condition (GET_CODE (operands[1])));
}
+ if (!rtx_equal_p (operands[0], operands[4]))
+ abort ();
if (const0_operand (operands[2], GET_MODE (operands[0])))
{
operands[7] = operands[3];
@@ -16853,6 +16846,10 @@
operands[2] = gen_lowpart (SImode, operands[2]);
PUT_MODE (operands[3], SImode);")
+; Promote the QImode tests, as i386 has encoding of the AND
+; instruction with 32-bit sign-extended immediate and thus the
+; instruction size is unchanged, except in the %eax case for
+; which it is increased by one byte, hence the ! optimize_size.
(define_split
[(set (reg 17)
(compare (and (match_operand 1 "aligned_operand" "")
@@ -16861,12 +16858,11 @@
(set (match_operand 0 "register_operand" "")
(and (match_dup 1) (match_dup 2)))]
"! TARGET_PARTIAL_REG_STALL && reload_completed
- && ix86_match_ccmode (insn, CCNOmode)
- && (GET_MODE (operands[0]) == HImode
- || (GET_MODE (operands[0]) == QImode
- /* Ensure that the operand will remain sign extended immediate. */
- && INTVAL (operands[2]) >= 0
- && (TARGET_PROMOTE_QImode || optimize_size)))"
+ /* Ensure that the operand will remain sign-extended immediate. */
+ && ix86_match_ccmode (insn, INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)
+ && ! optimize_size
+ && ((GET_MODE (operands[0]) == HImode && ! TARGET_FAST_PREFIX)
+ || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))"
[(parallel [(set (reg:CCNO 17)
(compare:CCNO (and:SI (match_dup 1) (match_dup 2))
(const_int 0)))
@@ -16879,17 +16875,20 @@
operands[0] = gen_lowpart (SImode, operands[0]);
operands[1] = gen_lowpart (SImode, operands[1]);")
-; Don't promote the QImode tests, as i386 don't have encoding of
-; the test instruction with 32bit sign extended immediate and thus
-; the code grows.
+; Don't promote the QImode tests, as i386 doesn't have encoding of
+; the TEST instruction with 32-bit sign-extended immediate and thus
+; the instruction size would at least double, which is not what we
+; want even with ! optimize_size.
(define_split
[(set (reg 17)
(compare (and (match_operand:HI 0 "aligned_operand" "")
(match_operand:HI 1 "const_int_operand" ""))
(const_int 0)))]
"! TARGET_PARTIAL_REG_STALL && reload_completed
- && ix86_match_ccmode (insn, CCNOmode)
- && GET_MODE (operands[0]) == HImode"
+ /* Ensure that the operand will remain sign-extended immediate. */
+ && ix86_match_ccmode (insn, INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)
+ && ! TARGET_FAST_PREFIX
+ && ! optimize_size"
[(set (reg:CCNO 17)
(compare:CCNO (and:SI (match_dup 0) (match_dup 1))
(const_int 0)))]
@@ -17848,67 +17847,92 @@
;; Moves for SSE/MMX regs.
(define_insn "movv4sf_internal"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
- (match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))]
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
+ "@
+ xorps\t%0, %0
+ movaps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF")])
(define_insn "movv4si_internal"
- [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
- (match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
+ [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V4SI 1 "vector_move_operand" "C,xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
+ "@
+ xorps\t%0, %0
+ movaps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF")])
(define_insn "movv2di_internal"
- [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,m")
- (match_operand:V2DI 1 "nonimmediate_operand" "xm,x"))]
+ [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V2DI 1 "vector_move_operand" "C,xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
- "movdqa\t{%1, %0|%0, %1}"
+ "@
+ pxor\t%0, %0
+ movdqa\t{%1, %0|%0, %1}
+ movdqa\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF")])
(define_insn "movv8qi_internal"
- [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
- (match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))]
- "TARGET_MMX"
- "movq\t{%1, %0|%0, %1}"
+ [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,y,m")
+ (match_operand:V8QI 1 "vector_move_operand" "C,ym,y"))]
+ "TARGET_MMX
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmxmov")
(set_attr "mode" "DI")])
(define_insn "movv4hi_internal"
- [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m")
- (match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))]
- "TARGET_MMX"
- "movq\t{%1, %0|%0, %1}"
+ [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,y,m")
+ (match_operand:V4HI 1 "vector_move_operand" "C,ym,y"))]
+ "TARGET_MMX
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmxmov")
(set_attr "mode" "DI")])
(define_insn "movv2si_internal"
- [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m")
- (match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))]
- "TARGET_MMX"
- "movq\t{%1, %0|%0, %1}"
+ [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,y,m")
+ (match_operand:V2SI 1 "vector_move_operand" "C,ym,y"))]
+ "TARGET_MMX
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmxcvt")
(set_attr "mode" "DI")])
(define_insn "movv2sf_internal"
- [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m")
- (match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))]
- "TARGET_3DNOW"
- "movq\\t{%1, %0|%0, %1}"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,y,m")
+ (match_operand:V2SF 1 "vector_move_operand" "C,ym,y"))]
+ "TARGET_3DNOW
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmxcvt")
(set_attr "mode" "DI")])
(define_expand "movti"
- [(set (match_operand:TI 0 "general_operand" "")
- (match_operand:TI 1 "general_operand" ""))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (match_operand:TI 1 "nonimmediate_operand" ""))]
"TARGET_SSE || TARGET_64BIT"
{
if (TARGET_64BIT)
@@ -17919,35 +17943,44 @@
})
(define_insn "movv2df_internal"
- [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
- (match_operand:V2DF 1 "nonimmediate_operand" "xm,x"))]
- "TARGET_SSE2"
- ;; @@@ let's try to use movaps here.
- "movapd\t{%1, %0|%0, %1}"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ xorpd\t%0, %0
+ movapd\t{%1, %0|%0, %1}
+ movapd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V2DF")])
(define_insn "movv8hi_internal"
- [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
- (match_operand:V8HI 1 "nonimmediate_operand" "xm,x"))]
- "TARGET_SSE2"
- ;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
+ [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V8HI 1 "vector_move_operand" "C,xm,x"))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ xorps\t%0, %0
+ movaps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF")])
(define_insn "movv16qi_internal"
- [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
- (match_operand:V16QI 1 "nonimmediate_operand" "xm,x"))]
- "TARGET_SSE2"
- ;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V16QI 1 "vector_move_operand" "C,xm,x"))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ xorps\t%0, %0
+ movaps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF")])
(define_expand "movv2df"
- [(set (match_operand:V2DF 0 "general_operand" "")
- (match_operand:V2DF 1 "general_operand" ""))]
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+ (match_operand:V2DF 1 "nonimmediate_operand" ""))]
"TARGET_SSE2"
{
ix86_expand_vector_move (V2DFmode, operands);
@@ -17955,8 +17988,8 @@
})
(define_expand "movv8hi"
- [(set (match_operand:V8HI 0 "general_operand" "")
- (match_operand:V8HI 1 "general_operand" ""))]
+ [(set (match_operand:V8HI 0 "nonimmediate_operand" "")
+ (match_operand:V8HI 1 "nonimmediate_operand" ""))]
"TARGET_SSE2"
{
ix86_expand_vector_move (V8HImode, operands);
@@ -17964,8 +17997,8 @@
})
(define_expand "movv16qi"
- [(set (match_operand:V16QI 0 "general_operand" "")
- (match_operand:V16QI 1 "general_operand" ""))]
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
+ (match_operand:V16QI 1 "nonimmediate_operand" ""))]
"TARGET_SSE2"
{
ix86_expand_vector_move (V16QImode, operands);
@@ -17973,8 +18006,8 @@
})
(define_expand "movv4sf"
- [(set (match_operand:V4SF 0 "general_operand" "")
- (match_operand:V4SF 1 "general_operand" ""))]
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (match_operand:V4SF 1 "nonimmediate_operand" ""))]
"TARGET_SSE"
{
ix86_expand_vector_move (V4SFmode, operands);
@@ -17982,8 +18015,8 @@
})
(define_expand "movv4si"
- [(set (match_operand:V4SI 0 "general_operand" "")
- (match_operand:V4SI 1 "general_operand" ""))]
+ [(set (match_operand:V4SI 0 "nonimmediate_operand" "")
+ (match_operand:V4SI 1 "nonimmediate_operand" ""))]
"TARGET_SSE"
{
ix86_expand_vector_move (V4SImode, operands);
@@ -17991,8 +18024,8 @@
})
(define_expand "movv2di"
- [(set (match_operand:V2DI 0 "general_operand" "")
- (match_operand:V2DI 1 "general_operand" ""))]
+ [(set (match_operand:V2DI 0 "nonimmediate_operand" "")
+ (match_operand:V2DI 1 "nonimmediate_operand" ""))]
"TARGET_SSE"
{
ix86_expand_vector_move (V2DImode, operands);
@@ -18000,8 +18033,8 @@
})
(define_expand "movv2si"
- [(set (match_operand:V2SI 0 "general_operand" "")
- (match_operand:V2SI 1 "general_operand" ""))]
+ [(set (match_operand:V2SI 0 "nonimmediate_operand" "")
+ (match_operand:V2SI 1 "nonimmediate_operand" ""))]
"TARGET_MMX"
{
ix86_expand_vector_move (V2SImode, operands);
@@ -18009,8 +18042,8 @@
})
(define_expand "movv4hi"
- [(set (match_operand:V4HI 0 "general_operand" "")
- (match_operand:V4HI 1 "general_operand" ""))]
+ [(set (match_operand:V4HI 0 "nonimmediate_operand" "")
+ (match_operand:V4HI 1 "nonimmediate_operand" ""))]
"TARGET_MMX"
{
ix86_expand_vector_move (V4HImode, operands);
@@ -18018,8 +18051,8 @@
})
(define_expand "movv8qi"
- [(set (match_operand:V8QI 0 "general_operand" "")
- (match_operand:V8QI 1 "general_operand" ""))]
+ [(set (match_operand:V8QI 0 "nonimmediate_operand" "")
+ (match_operand:V8QI 1 "nonimmediate_operand" ""))]
"TARGET_MMX"
{
ix86_expand_vector_move (V8QImode, operands);
@@ -18027,14 +18060,97 @@
})
(define_expand "movv2sf"
- [(set (match_operand:V2SF 0 "general_operand" "")
- (match_operand:V2SF 1 "general_operand" ""))]
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
+ (match_operand:V2SF 1 "nonimmediate_operand" ""))]
"TARGET_3DNOW"
{
ix86_expand_vector_move (V2SFmode, operands);
DONE;
})
+(define_insn "*pushv2df"
+ [(set (match_operand:V2DF 0 "push_operand" "=<")
+ (match_operand:V2DF 1 "register_operand" "x"))]
+ "TARGET_SSE"
+ "#")
+
+(define_insn "*pushv2di"
+ [(set (match_operand:V2DI 0 "push_operand" "=<")
+ (match_operand:V2DI 1 "register_operand" "x"))]
+ "TARGET_SSE2"
+ "#")
+
+(define_insn "*pushv8hi"
+ [(set (match_operand:V8HI 0 "push_operand" "=<")
+ (match_operand:V8HI 1 "register_operand" "x"))]
+ "TARGET_SSE2"
+ "#")
+
+(define_insn "*pushv16qi"
+ [(set (match_operand:V16QI 0 "push_operand" "=<")
+ (match_operand:V16QI 1 "register_operand" "x"))]
+ "TARGET_SSE2"
+ "#")
+
+(define_insn "*pushv4sf"
+ [(set (match_operand:V4SF 0 "push_operand" "=<")
+ (match_operand:V4SF 1 "register_operand" "x"))]
+ "TARGET_SSE"
+ "#")
+
+(define_insn "*pushv4si"
+ [(set (match_operand:V4SI 0 "push_operand" "=<")
+ (match_operand:V4SI 1 "register_operand" "x"))]
+ "TARGET_SSE2"
+ "#")
+
+(define_insn "*pushv2si"
+ [(set (match_operand:V2SI 0 "push_operand" "=<")
+ (match_operand:V2SI 1 "register_operand" "y"))]
+ "TARGET_MMX"
+ "#")
+
+(define_insn "*pushv4hi"
+ [(set (match_operand:V4HI 0 "push_operand" "=<")
+ (match_operand:V4HI 1 "register_operand" "y"))]
+ "TARGET_MMX"
+ "#")
+
+(define_insn "*pushv8qi"
+ [(set (match_operand:V8QI 0 "push_operand" "=<")
+ (match_operand:V8QI 1 "register_operand" "y"))]
+ "TARGET_MMX"
+ "#")
+
+(define_insn "*pushv2sf"
+ [(set (match_operand:V2SF 0 "push_operand" "=<")
+ (match_operand:V2SF 1 "register_operand" "y"))]
+ "TARGET_3DNOW"
+ "#")
+
+(define_split
+ [(set (match_operand 0 "push_operand" "")
+ (match_operand 1 "register_operand" ""))]
+ "!TARGET_64BIT && reload_completed
+ && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))"
+ [(set (reg:SI 7) (plus:SI (reg:SI 7) (match_dup 3)))
+ (set (match_dup 2) (match_dup 1))]
+ "operands[2] = change_address (operands[0], GET_MODE (operands[0]),
+ stack_pointer_rtx);
+ operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));")
+
+(define_split
+ [(set (match_operand 0 "push_operand" "")
+ (match_operand 1 "register_operand" ""))]
+ "TARGET_64BIT && reload_completed
+ && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))"
+ [(set (reg:DI 7) (plus:DI (reg:DI 7) (match_dup 3)))
+ (set (match_dup 2) (match_dup 1))]
+ "operands[2] = change_address (operands[0], GET_MODE (operands[0]),
+ stack_pointer_rtx);
+ operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));")
+
+
(define_insn_and_split "*pushti"
[(set (match_operand:TI 0 "push_operand" "=<")
(match_operand:TI 1 "nonmemory_operand" "x"))]
@@ -18158,8 +18274,9 @@
(define_insn "movti_internal"
[(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
- (match_operand:TI 1 "general_operand" "C,xm,x"))]
- "TARGET_SSE && !TARGET_64BIT"
+ (match_operand:TI 1 "vector_move_operand" "C,xm,x"))]
+ "TARGET_SSE && !TARGET_64BIT
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"@
xorps\t%0, %0
movaps\t{%1, %0|%0, %1}
@@ -18169,7 +18286,7 @@
(define_insn "*movti_rex64"
[(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
- (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
+ (match_operand:TI 1 "general_operand" "riFo,riF,C,x,m"))]
"TARGET_64BIT
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"@
@@ -18191,29 +18308,56 @@
;; These two patterns are useful for specifying exactly whether to use
;; movaps or movups
-(define_insn "sse_movaps"
+(define_expand "sse_movaps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")]
+ UNSPEC_MOVA))]
+ "TARGET_SSE"
+{
+ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+ {
+ rtx tmp = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_sse_movaps (tmp, operands[1]));
+ emit_move_insn (operands[0], tmp);
+ DONE;
+ }
+})
+
+(define_insn "*sse_movaps_1"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVA))]
- "TARGET_SSE"
- "@
- movaps\t{%1, %0|%0, %1}
- movaps\t{%1, %0|%0, %1}"
+ "TARGET_SSE
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov,ssemov")
(set_attr "mode" "V4SF")])
-(define_insn "sse_movups"
+(define_expand "sse_movups"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")]
+ UNSPEC_MOVU))]
+ "TARGET_SSE"
+{
+ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+ {
+ rtx tmp = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_sse_movups (tmp, operands[1]));
+ emit_move_insn (operands[0], tmp);
+ DONE;
+ }
+})
+
+(define_insn "*sse_movups_1"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))]
- "TARGET_SSE"
- "@
- movups\t{%1, %0|%0, %1}
- movups\t{%1, %0|%0, %1}"
+ "TARGET_SSE
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "movups\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt,ssecvt")
(set_attr "mode" "V4SF")])
-
;; SSE Strange Moves.
(define_insn "sse_movmskps"
@@ -18329,11 +18473,21 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
-(define_insn "sse_loadss"
+(define_expand "sse_loadss"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:SF 1 "memory_operand" "")]
+ "TARGET_SSE"
+{
+ emit_insn (gen_sse_loadss_1 (operands[0], operands[1],
+ CONST0_RTX (V4SFmode)));
+ DONE;
+})
+
+(define_insn "sse_loadss_1"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
- (match_operand:V4SF 1 "memory_operand" "m")
- (vec_duplicate:V4SF (float:SF (const_int 0)))
+ (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m"))
+ (match_operand:V4SF 2 "const0_operand" "X")
(const_int 1)))]
"TARGET_SSE"
"movss\t{%1, %0|%0, %1}"
@@ -18804,7 +18958,7 @@
(define_insn "sse2_nandv2di3"
[(set (match_operand:V2DI 0 "register_operand" "=x")
- (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0"))
+ (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "0"))
(match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
@@ -18908,7 +19062,7 @@
(match_operator:V4SI 3 "sse_comparison_operator"
[(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "register_operand" "x")])
- (match_dup 1)
+ (subreg:V4SI (match_dup 1) 0)
(const_int 1)))]
"TARGET_SSE"
"cmp%D3ss\t{%2, %0|%0, %2}"
@@ -19093,6 +19247,19 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "SF")])
+(define_insn "cvtsi2ssq"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 1 "register_operand" "0,0")
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
+ (const_int 14)))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvtsi2ssq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "athlon_decode" "vector,vector")
+ (set_attr "mode" "SF")])
+
(define_insn "cvtss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
(vec_select:SI
@@ -19103,6 +19270,17 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "SF")])
+(define_insn "cvtss2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (vec_select:DI
+ (fix:V4DI (match_operand:V4SF 1 "nonimmediate_operand" "x,m"))
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE"
+ "cvtss2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "athlon_decode" "vector,vector")
+ (set_attr "mode" "SF")])
+
(define_insn "cvttss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
(vec_select:SI
@@ -19114,6 +19292,18 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "SF")])
+(define_insn "cvttss2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (vec_select:DI
+ (unspec:V4DI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")]
+ UNSPEC_FIX)
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvttss2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SF")
+ (set_attr "athlon_decode" "vector,vector")])
+
;; MMX insns
@@ -19121,7 +19311,7 @@
(define_insn "addv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
- (plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddb\t{%2, %0|%0, %2}"
@@ -19130,7 +19320,7 @@
(define_insn "addv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
- (plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddw\t{%2, %0|%0, %2}"
@@ -19139,16 +19329,27 @@
(define_insn "addv2si3"
[(set (match_operand:V2SI 0 "register_operand" "=y")
- (plus:V2SI (match_operand:V2SI 1 "register_operand" "0")
+ (plus:V2SI (match_operand:V2SI 1 "register_operand" "%0")
(match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddd\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxadd")
(set_attr "mode" "DI")])
+(define_insn "mmx_adddi3"
+ [(set (match_operand:DI 0 "register_operand" "=y")
+ (unspec:DI
+ [(plus:DI (match_operand:DI 1 "register_operand" "%0")
+ (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+ UNSPEC_NOP))]
+ "TARGET_MMX"
+ "paddq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
(define_insn "ssaddv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
- (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddsb\t{%2, %0|%0, %2}"
@@ -19157,7 +19358,7 @@
(define_insn "ssaddv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
- (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddsw\t{%2, %0|%0, %2}"
@@ -19166,7 +19367,7 @@
(define_insn "usaddv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
- (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddusb\t{%2, %0|%0, %2}"
@@ -19175,7 +19376,7 @@
(define_insn "usaddv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
- (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddusw\t{%2, %0|%0, %2}"
@@ -19209,6 +19410,17 @@
[(set_attr "type" "mmxadd")
(set_attr "mode" "DI")])
+(define_insn "mmx_subdi3"
+ [(set (match_operand:DI 0 "register_operand" "=y")
+ (unspec:DI
+ [(minus:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+ UNSPEC_NOP))]
+ "TARGET_MMX"
+ "psubq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
(define_insn "sssubv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
(ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
@@ -19312,7 +19524,7 @@
(define_insn "mmx_iordi3"
[(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI
- [(ior:DI (match_operand:DI 1 "register_operand" "0")
+ [(ior:DI (match_operand:DI 1 "register_operand" "%0")
(match_operand:DI 2 "nonimmediate_operand" "ym"))]
UNSPEC_NOP))]
"TARGET_MMX"
@@ -19323,7 +19535,7 @@
(define_insn "mmx_xordi3"
[(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI
- [(xor:DI (match_operand:DI 1 "register_operand" "0")
+ [(xor:DI (match_operand:DI 1 "register_operand" "%0")
(match_operand:DI 2 "nonimmediate_operand" "ym"))]
UNSPEC_NOP))]
"TARGET_MMX"
@@ -19346,7 +19558,7 @@
(define_insn "mmx_anddi3"
[(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI
- [(and:DI (match_operand:DI 1 "register_operand" "0")
+ [(and:DI (match_operand:DI 1 "register_operand" "%0")
(match_operand:DI 2 "nonimmediate_operand" "ym"))]
UNSPEC_NOP))]
"TARGET_MMX"
@@ -19805,17 +20017,17 @@
(define_insn "ldmxcsr"
[(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
UNSPECV_LDMXCSR)]
- "TARGET_MMX"
+ "TARGET_SSE"
"ldmxcsr\t%0"
- [(set_attr "type" "mmx")
+ [(set_attr "type" "sse")
(set_attr "memory" "load")])
(define_insn "stmxcsr"
[(set (match_operand:SI 0 "memory_operand" "=m")
(unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
- "TARGET_MMX"
+ "TARGET_SSE"
"stmxcsr\t%0"
- [(set_attr "type" "mmx")
+ [(set_attr "type" "sse")
(set_attr "memory" "store")])
(define_expand "sfence"
@@ -20471,7 +20683,7 @@
(match_operator:V2DI 3 "sse_comparison_operator"
[(match_operand:V2DF 1 "register_operand" "0")
(match_operand:V2DF 2 "nonimmediate_operand" "x")])
- (match_dup 1)
+ (subreg:V2DI (match_dup 1) 0)
(const_int 1)))]
"TARGET_SSE2"
"cmp%D3sd\t{%2, %0|%0, %2}"
@@ -20692,6 +20904,15 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "SI")])
+(define_insn "cvtsd2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvtsd2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SI")])
+
(define_insn "cvttsd2si"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
@@ -20701,6 +20922,16 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "SI")])
+(define_insn "cvttsd2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (unspec:DI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm")
+ (parallel [(const_int 0)]))] UNSPEC_FIX))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvttsd2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DI")
+ (set_attr "athlon_decode" "vector,vector")])
+
(define_insn "cvtsi2sd"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0")
@@ -20713,6 +20944,19 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "DF")])
+(define_insn "cvtsi2sdq"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+ (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0")
+ (vec_duplicate:V2DF
+ (float:DF
+ (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
+ (const_int 2)))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvtsi2sdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "vector,direct")])
+
;; Conversions between SF and DF
(define_insn "cvtsd2ss"
@@ -20770,7 +21014,7 @@
(define_insn "addv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "=x")
- (plus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ (plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddb\t{%2, %0|%0, %2}"
@@ -20779,7 +21023,7 @@
(define_insn "addv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
- (plus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
(match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddw\t{%2, %0|%0, %2}"
@@ -20788,7 +21032,7 @@
(define_insn "addv4si3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
- (plus:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ (plus:V4SI (match_operand:V4SI 1 "register_operand" "%0")
(match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddd\t{%2, %0|%0, %2}"
@@ -20797,7 +21041,7 @@
(define_insn "addv2di3"
[(set (match_operand:V2DI 0 "register_operand" "=x")
- (plus:V2DI (match_operand:V2DI 1 "register_operand" "0")
+ (plus:V2DI (match_operand:V2DI 1 "register_operand" "%0")
(match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddq\t{%2, %0|%0, %2}"
@@ -20806,7 +21050,7 @@
(define_insn "ssaddv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "=x")
- (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddsb\t{%2, %0|%0, %2}"
@@ -20815,7 +21059,7 @@
(define_insn "ssaddv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
- (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
(match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddsw\t{%2, %0|%0, %2}"
@@ -20824,7 +21068,7 @@
(define_insn "usaddv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "=x")
- (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddusb\t{%2, %0|%0, %2}"
@@ -20833,7 +21077,7 @@
(define_insn "usaddv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
- (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
(match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddusw\t{%2, %0|%0, %2}"
@@ -21069,7 +21313,8 @@
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_merge:V8HI (match_operand:V8HI 1 "register_operand" "0")
(vec_duplicate:V8HI
- (match_operand:SI 2 "nonimmediate_operand" "rm"))
+ (truncate:HI
+ (match_operand:SI 2 "nonimmediate_operand" "rm")))
(match_operand:SI 3 "immediate_operand" "i")))]
"TARGET_SSE2"
"pinsrw\t{%3, %2, %0|%0, %2, %3}"
@@ -21218,7 +21463,7 @@
(define_insn "ashrv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
"TARGET_SSE2"
"psraw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
@@ -21227,7 +21472,7 @@
(define_insn "ashrv4si3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
"TARGET_SSE2"
"psrad\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
@@ -21236,7 +21481,7 @@
(define_insn "lshrv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
"TARGET_SSE2"
"psrlw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
@@ -21245,7 +21490,7 @@
(define_insn "lshrv4si3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
"TARGET_SSE2"
"psrld\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
@@ -21254,7 +21499,7 @@
(define_insn "lshrv2di3"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
"TARGET_SSE2"
"psrlq\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
@@ -21263,7 +21508,7 @@
(define_insn "ashlv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(ashift:V8HI (match_operand:V8HI 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
"TARGET_SSE2"
"psllw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
@@ -21272,7 +21517,7 @@
(define_insn "ashlv4si3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(ashift:V4SI (match_operand:V4SI 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
"TARGET_SSE2"
"pslld\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
@@ -21281,7 +21526,7 @@
(define_insn "ashlv2di3"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(ashift:V2DI (match_operand:V2DI 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
"TARGET_SSE2"
"psllq\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
@@ -21595,45 +21840,41 @@
(define_insn "sse2_movapd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
- (unspec:V2DF [(match_operand:V2DF 1 "general_operand" "xm,x")]
+ (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVA))]
- "TARGET_SSE2"
- "@
- movapd\t{%1, %0|%0, %1}
- movapd\t{%1, %0|%0, %1}"
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "movapd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V2DF")])
(define_insn "sse2_movupd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
- (unspec:V2DF [(match_operand:V2DF 1 "general_operand" "xm,x")]
+ (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))]
- "TARGET_SSE2"
- "@
- movupd\t{%1, %0|%0, %1}
- movupd\t{%1, %0|%0, %1}"
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "movupd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "V2DF")])
(define_insn "sse2_movdqa"
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
- (unspec:V16QI [(match_operand:V16QI 1 "general_operand" "xm,x")]
+ (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVA))]
- "TARGET_SSE2"
- "@
- movdqa\t{%1, %0|%0, %1}
- movdqa\t{%1, %0|%0, %1}"
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "movdqa\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "TI")])
(define_insn "sse2_movdqu"
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
- (unspec:V16QI [(match_operand:V16QI 1 "general_operand" "xm,x")]
+ (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))]
- "TARGET_SSE2"
- "@
- movdqu\t{%1, %0|%0, %1}
- movdqu\t{%1, %0|%0, %1}"
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "movdqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "TI")])
@@ -21641,24 +21882,48 @@
[(set (match_operand:DI 0 "nonimmediate_operand" "=m,y")
(vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x")
(parallel [(const_int 0)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && !TARGET_64BIT"
"@
movq\t{%1, %0|%0, %1}
movdq2q\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "TI")])
+(define_insn "sse2_movdq2q_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y,r")
+ (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x,x")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "@
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_movq2dq"
[(set (match_operand:V2DI 0 "register_operand" "=x,?x")
(vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y")
(const_int 0)))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && !TARGET_64BIT"
"@
movq\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt,ssemov")
(set_attr "mode" "TI")])
+(define_insn "sse2_movq2dq_rex64"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,?x,?x")
+ (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y,r")
+ (const_int 0)))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "@
+ movq\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt,ssemov,ssecvt")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_movq"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(vec_concat:V2DI (vec_select:DI
@@ -21673,7 +21938,7 @@
(define_insn "sse2_loadd"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_merge:V4SI
- (vec_duplicate:V4HI (match_operand:SI 1 "nonimmediate_operand" "mr"))
+ (vec_duplicate:V4SI (match_operand:SI 1 "nonimmediate_operand" "mr"))
(const_vector:V4SI [(const_int 0)
(const_int 0)
(const_int 0)
@@ -21716,11 +21981,21 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "V2DF")])
-(define_insn "sse2_loadsd"
+(define_expand "sse2_loadsd"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand:DF 1 "memory_operand" "")]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1],
+ CONST0_RTX (V2DFmode)));
+ DONE;
+})
+
+(define_insn "sse2_loadsd_1"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_merge:V2DF
- (match_operand:DF 1 "memory_operand" "m")
- (vec_duplicate:DF (float:DF (const_int 0)))
+ (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m"))
+ (match_operand:V2DF 2 "const0_operand" "X")
(const_int 1)))]
"TARGET_SSE2"
"movsd\t{%1, %0|%0, %1}"
diff --git a/gcc/config/i386/linux64.h b/gcc/config/i386/linux64.h
index ae346e6d518..7a9e0ba989f 100644
--- a/gcc/config/i386/linux64.h
+++ b/gcc/config/i386/linux64.h
@@ -1,5 +1,5 @@
/* Definitions for AMD x86-64 running Linux-based GNU systems with ELF format.
- Copyright (C) 2001, 2002 Free Software Foundation, Inc.
+ Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
Contributed by Jan Hubicka <jh@suse.cz>, based on linux.h.
This file is part of GNU CC.
@@ -36,6 +36,11 @@ Boston, MA 02111-1307, USA. */
builtin_define ("__PIC__"); \
builtin_define ("__pic__"); \
} \
+ if (TARGET_64BIT) \
+ { \
+ builtin_define ("__LP64__"); \
+ builtin_define ("_LP64"); \
+ } \
} \
while (0)
@@ -116,17 +121,17 @@ Boston, MA 02111-1307, USA. */
(FS)->regs.reg[0].how = REG_SAVED_OFFSET; \
(FS)->regs.reg[0].loc.offset = (long)&sc_->rax - new_cfa_; \
(FS)->regs.reg[1].how = REG_SAVED_OFFSET; \
- (FS)->regs.reg[1].loc.offset = (long)&sc_->rbx - new_cfa_; \
+ (FS)->regs.reg[1].loc.offset = (long)&sc_->rdx - new_cfa_; \
(FS)->regs.reg[2].how = REG_SAVED_OFFSET; \
(FS)->regs.reg[2].loc.offset = (long)&sc_->rcx - new_cfa_; \
(FS)->regs.reg[3].how = REG_SAVED_OFFSET; \
- (FS)->regs.reg[3].loc.offset = (long)&sc_->rdx - new_cfa_; \
+ (FS)->regs.reg[3].loc.offset = (long)&sc_->rbx - new_cfa_; \
(FS)->regs.reg[4].how = REG_SAVED_OFFSET; \
- (FS)->regs.reg[4].loc.offset = (long)&sc_->rbp - new_cfa_; \
+ (FS)->regs.reg[4].loc.offset = (long)&sc_->rsi - new_cfa_; \
(FS)->regs.reg[5].how = REG_SAVED_OFFSET; \
- (FS)->regs.reg[5].loc.offset = (long)&sc_->rsi - new_cfa_; \
+ (FS)->regs.reg[5].loc.offset = (long)&sc_->rdi - new_cfa_; \
(FS)->regs.reg[6].how = REG_SAVED_OFFSET; \
- (FS)->regs.reg[6].loc.offset = (long)&sc_->rdi - new_cfa_; \
+ (FS)->regs.reg[6].loc.offset = (long)&sc_->rbp - new_cfa_; \
(FS)->regs.reg[8].how = REG_SAVED_OFFSET; \
(FS)->regs.reg[8].loc.offset = (long)&sc_->r8 - new_cfa_; \
(FS)->regs.reg[9].how = REG_SAVED_OFFSET; \
@@ -143,6 +148,8 @@ Boston, MA 02111-1307, USA. */
(FS)->regs.reg[14].loc.offset = (long)&sc_->r14 - new_cfa_; \
(FS)->regs.reg[15].how = REG_SAVED_OFFSET; \
(FS)->regs.reg[15].loc.offset = (long)&sc_->r15 - new_cfa_; \
+ (FS)->regs.reg[16].how = REG_SAVED_OFFSET; \
+ (FS)->regs.reg[16].loc.offset = (long)&sc_->rip - new_cfa_; \
(FS)->retaddr_column = 16; \
goto SUCCESS; \
} while (0)
diff --git a/gcc/config/i386/mingw32.h b/gcc/config/i386/mingw32.h
index e7c5e8b6bcc..7f62fbd5624 100644
--- a/gcc/config/i386/mingw32.h
+++ b/gcc/config/i386/mingw32.h
@@ -1,6 +1,7 @@
/* Operating system specific defines to be used when targeting GCC for
hosting on Windows32, using GNU tools and the Windows32 API Library.
- Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003
+ Free Software Foundation, Inc.
This file is part of GNU CC.
@@ -89,7 +90,7 @@ Boston, MA 02111-1307, USA. */
/* Include in the mingw32 libraries with libgcc */
#undef LIBGCC_SPEC
#define LIBGCC_SPEC \
- "%{mthreads:-lmingwthrd} -lmingw32 -lgcc -lmoldname -lmsvcrt"
+ "%{mthreads:-lmingwthrd} -lmingw32 -lgcc -lmoldname -lmingwex -lmsvcrt"
#undef STARTFILE_SPEC
#define STARTFILE_SPEC "%{shared|mdll:dllcrt2%O%s} \
diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
index 52e5195fbaf..7b4aa014645 100644
--- a/gcc/config/i386/mmintrin.h
+++ b/gcc/config/i386/mmintrin.h
@@ -56,6 +56,22 @@ _mm_cvtsi32_si64 (int __i)
return (__m64) __tmp;
}
+#ifdef __x86_64__
+/* Convert I to a __m64 object. */
+static __inline __m64
+_mm_cvtsi64x_si64 (long long __i)
+{
+ return (__m64) __i;
+}
+
+/* Convert I to a __m64 object. */
+static __inline __m64
+_mm_set_pi64x (long long __i)
+{
+ return (__m64) __i;
+}
+#endif
+
/* Convert the lower 32 bits of the __m64 object into an integer. */
static __inline int
_mm_cvtsi64_si32 (__m64 __i)
@@ -64,6 +80,15 @@ _mm_cvtsi64_si32 (__m64 __i)
return __tmp;
}
+#ifdef __x86_64__
+/* Convert the lower 32 bits of the __m64 object into an integer. */
+static __inline long long
+_mm_cvtsi64_si64x (__m64 __i)
+{
+ return (long long)__i;
+}
+#endif
+
/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
the result, and the four 16-bit values from M2 into the upper four 8-bit
values of the result, all with signed saturation. */
@@ -160,6 +185,13 @@ _mm_add_pi32 (__m64 __m1, __m64 __m2)
return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
}
+/* Add the 64-bit values in M1 to the 64-bit values in M2. */
+static __inline __m64
+_mm_add_si64 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2);
+}
+
/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
saturated arithmetic. */
static __inline __m64
@@ -213,6 +245,13 @@ _mm_sub_pi32 (__m64 __m1, __m64 __m2)
return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
}
+/* Add the 64-bit values in M1 to the 64-bit values in M2. */
+static __inline __m64
+_mm_sub_si64 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2);
+}
+
/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
saturating arithmetic. */
static __inline __m64
diff --git a/gcc/config/i386/scodbx.h b/gcc/config/i386/scodbx.h
deleted file mode 100644
index 7da93053256..00000000000
--- a/gcc/config/i386/scodbx.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/* Definitions for Intel 386 running SCO Unix System V,
- using dbx-in-coff encapsulation.
- Copyright (C) 1992, 1995, 1996, 1999 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING. If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA. */
-
-#include "i386/svr3dbx.h"
-
-/* Overridden defines for SCO systems from sco.h. */
-
-/* By default, target has a 80387, uses IEEE compatible arithmetic,
- and returns float values in the 387, ie,
- (TARGET_80387 | TARGET_FLOAT_RETURNS_IN_80387)
-
- SCO's software emulation of a 387 fails to handle the `fucomp'
- opcode. fucomp is only used when generating IEEE compliant code.
- So don't make TARGET_IEEE_FP default for SCO. */
-
-#undef TARGET_SUBTARGET_DEFAULT
-#define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_FLOAT_RETURNS)
-
-/* Use crt1.o as a startup file and crtn.o as a closing file. */
-
-#undef STARTFILE_SPEC
-#define STARTFILE_SPEC \
- "%{!r:%{!z:svr3.ifile%s}%{z:svr3z.ifile%s}}\
- %{pg:gcrt1.o%s}%{!pg:%{p:mcrt1.o%s}%{!p:crt1.o%s}}"
-
-/* Library spec, including SCO international language support. */
-
-#undef LIB_SPEC
-#define LIB_SPEC \
- "%{p:-L/usr/lib/libp}%{pg:-L/usr/lib/libp} %{scointl:libintl.a%s} -lc"
-
-/* Specify predefined symbols in preprocessor. */
-
-#undef CPP_PREDEFINES
-#define CPP_PREDEFINES "-Dunix -DM_UNIX -DM_I386 -DM_COFF -DM_WORDSWAP -Asystem=svr3"
-
-#undef CPP_SPEC
-#define CPP_SPEC "%(cpp_cpu) %{scointl:-DM_INTERNAT}"
-
-/* This spec is used for telling cpp whether char is signed or not. */
-
-#undef SIGNED_CHAR_SPEC
-#if DEFAULT_SIGNED_CHAR
-#define SIGNED_CHAR_SPEC \
- "%{funsigned-char:-D__CHAR_UNSIGNED__ -D_CHAR_UNSIGNED}"
-#else
-#define SIGNED_CHAR_SPEC \
- "%{!fsigned-char:-D__CHAR_UNSIGNED__ -D_CHAR_UNSIGNED}"
-#endif
-
-/* caller has to pop the extra argument passed to functions that return
- structures. */
-
-#undef RETURN_POPS_ARGS
-#define RETURN_POPS_ARGS(FUNDECL,FUNTYPE,SIZE) \
- ((FUNDECL) && TREE_CODE (FUNDECL) == IDENTIFIER_NODE ? 0 \
- : (TARGET_RTD \
- && (TYPE_ARG_TYPES (FUNTYPE) == 0 \
- || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (FUNTYPE))) \
- == void_type_node))) ? (SIZE) \
- : 0)
-/* On other 386 systems, the last line looks like this:
- : (aggregate_value_p (TREE_TYPE (FUNTYPE))) ? GET_MODE_SIZE (Pmode) : 0) */
-
-/* Handle #pragma pack. */
-#define HANDLE_SYSV_PRAGMA
diff --git a/gcc/config/i386/t-sco5gas b/gcc/config/i386/t-sco5gas
index 2d0b48a6292..edeb554eea0 100644
--- a/gcc/config/i386/t-sco5gas
+++ b/gcc/config/i386/t-sco5gas
@@ -1,6 +1,6 @@
# The pushl in CTOR initialization interferes with frame pointer elimination.
CRTSTUFF_T_CFLAGS = -fPIC -fno-omit-frame-pointer
-CRTSTUFF_T_CFLAGS_S = -mcoff -fno-omit-frame-pointer
+CRTSTUFF_T_CFLAGS_S = -fno-omit-frame-pointer
#
# I am still a little unsure of the multilib architecture. The following
diff --git a/gcc/config/i386/xm-dgux.h b/gcc/config/i386/xm-dgux.h
deleted file mode 100644
index 881c5c7be9d..00000000000
--- a/gcc/config/i386/xm-dgux.h
+++ /dev/null
@@ -1,4 +0,0 @@
-/* Configuration for GCC for Intel i386 running DG/ux */
-
-/* looks just like sysv4 for now */
-#include "xm-svr4.h"
diff --git a/gcc/config/i386/xm-sun.h b/gcc/config/i386/xm-sun.h
deleted file mode 100644
index 6c0f0a25630..00000000000
--- a/gcc/config/i386/xm-sun.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Configuration for GNU C-compiler for Intel 80386 running SunOS 4.0.
- Copyright (C) 1988, 1997 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING. If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA. */
-
-#define USG
diff --git a/gcc/config/i386/xm-sysv3.h b/gcc/config/i386/xm-sysv3.h
deleted file mode 100644
index 9a655443ff5..00000000000
--- a/gcc/config/i386/xm-sysv3.h
+++ /dev/null
@@ -1,3 +0,0 @@
-/* Configuration for GCC for Intel i386 running System V Release 3. */
-
-#include "xm-svr3.h"
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 4136e901795..43a05c1a6ee 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -475,6 +475,16 @@ _mm_cvtss_si32 (__m128 __A)
return __builtin_ia32_cvtss2si ((__v4sf) __A);
}
+#ifdef __x86_64__
+/* Convert the lower SPFP value to a 32-bit integer according to the current
+ rounding mode. */
+static __inline long long
+_mm_cvtss_si64x (__m128 __A)
+{
+ return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
+}
+#endif
+
/* Convert the two lower SPFP values to 32-bit integers according to the
current rounding mode. Return the integers in packed form. */
static __inline __m64
@@ -490,6 +500,15 @@ _mm_cvttss_si32 (__m128 __A)
return __builtin_ia32_cvttss2si ((__v4sf) __A);
}
+#ifdef __x86_64__
+/* Truncate the lower SPFP value to a 32-bit integer. */
+static __inline long long
+_mm_cvttss_si64x (__m128 __A)
+{
+ return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
+}
+#endif
+
/* Truncate the two lower SPFP values to 32-bit integers. Return the
integers in packed form. */
static __inline __m64
@@ -505,6 +524,15 @@ _mm_cvtsi32_ss (__m128 __A, int __B)
return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
}
+#ifdef __x86_64__
+/* Convert B to a SPFP value and insert it as element zero in A. */
+static __inline __m128
+_mm_cvtsi64x_ss (__m128 __A, long long __B)
+{
+ return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
+}
+#endif
+
/* Convert the two 32-bit values in B to SPFP form and insert them
as the two lower elements in A. */
static __inline __m128
@@ -1586,13 +1614,13 @@ _mm_ucomineq_sd (__m128d __A, __m128d __B)
static __inline __m128i
_mm_load_si128 (__m128i const *__P)
{
- return (__m128i) __builtin_ia32_loaddqa (__P);
+ return (__m128i) __builtin_ia32_loaddqa ((char const *)__P);
}
static __inline __m128i
_mm_loadu_si128 (__m128i const *__P)
{
- return (__m128i) __builtin_ia32_loaddqu (__P);
+ return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
}
static __inline __m128i
@@ -1604,13 +1632,13 @@ _mm_loadl_epi64 (__m128i const *__P)
static __inline void
_mm_store_si128 (__m128i *__P, __m128i __B)
{
- __builtin_ia32_storedqa (__P, (__v16qi)__B);
+ __builtin_ia32_storedqa ((char *)__P, (__v16qi)__B);
}
static __inline void
_mm_storeu_si128 (__m128i *__P, __m128i __B)
{
- __builtin_ia32_storedqu (__P, (__v16qi)__B);
+ __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
}
static __inline void
@@ -1619,6 +1647,12 @@ _mm_storel_epi64 (__m128i *__P, __m128i __B)
*(long long *)__P = __builtin_ia32_movdq2q ((__v2di)__B);
}
+static __inline __m64
+_mm_movepi64_pi64 (__m128i __B)
+{
+ return (__m64) __builtin_ia32_movdq2q ((__v2di)__B);
+}
+
static __inline __m128i
_mm_move_epi64 (__m128i __A)
{
@@ -1656,6 +1690,24 @@ _mm_set_epi32 (int __Z, int __Y, int __X, int __W)
return __u.__v;
}
+
+#ifdef __x86_64__
+/* Create the vector [Z Y]. */
+static __inline __m128i
+_mm_set_epi64x (long long __Z, long long __Y)
+{
+ union {
+ long __a[2];
+ __m128i __v;
+ } __u;
+
+ __u.__a[0] = __Y;
+ __u.__a[1] = __Z;
+
+ return __u.__v;
+}
+#endif
+
/* Create the vector [S T U V Z Y X W]. */
static __inline __m128i
_mm_set_epi16 (short __Z, short __Y, short __X, short __W,
@@ -1724,6 +1776,15 @@ _mm_set1_epi32 (int __A)
return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0));
}
+#ifdef __x86_64__
+static __inline __m128i
+_mm_set1_epi64x (long long __A)
+{
+ __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
+ return (__m128i) __builtin_ia32_shufpd ((__v2df)__tmp, (__v2df)__tmp, _MM_SHUFFLE2 (0,0));
+}
+#endif
+
static __inline __m128i
_mm_set1_epi16 (short __A)
{
@@ -1893,12 +1954,28 @@ _mm_cvtsd_si32 (__m128d __A)
return __builtin_ia32_cvtsd2si ((__v2df) __A);
}
+#ifdef __x86_64__
+static __inline long long
+_mm_cvtsd_si64x (__m128d __A)
+{
+ return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+}
+#endif
+
static __inline int
_mm_cvttsd_si32 (__m128d __A)
{
return __builtin_ia32_cvttsd2si ((__v2df) __A);
}
+#ifdef __x86_64__
+static __inline long long
+_mm_cvttsd_si64x (__m128d __A)
+{
+ return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
+}
+#endif
+
static __inline __m128
_mm_cvtsd_ss (__m128 __A, __m128d __B)
{
@@ -1911,6 +1988,14 @@ _mm_cvtsi32_sd (__m128d __A, int __B)
return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
}
+#ifdef __x86_64__
+static __inline __m128d
+_mm_cvtsi64x_sd (__m128d __A, long long __B)
+{
+ return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+}
+#endif
+
static __inline __m128d
_mm_cvtss_sd (__m128d __A, __m128 __B)
{
@@ -2048,7 +2133,7 @@ _mm_add_epi32 (__m128i __A, __m128i __B)
static __inline __m128i
_mm_add_epi64 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_paddq128 ((__v4si)__A, (__v4si)__B);
+ return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
}
static __inline __m128i
@@ -2096,7 +2181,7 @@ _mm_sub_epi32 (__m128i __A, __m128i __B)
static __inline __m128i
_mm_sub_epi64 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_psubq128 ((__v4si)__A, (__v4si)__B);
+ return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
}
static __inline __m128i
@@ -2142,7 +2227,7 @@ _mm_mullo_epi16 (__m128i __A, __m128i __B)
}
static __inline __m64
-_mm_mul_pu16 (__m64 __A, __m64 __B)
+_mm_mul_su32 (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
}
@@ -2459,6 +2544,14 @@ _mm_cvtsi32_si128 (int __A)
return (__m128i) __builtin_ia32_loadd (&__A);
}
+#ifdef __x86_64__
+static __inline __m128i
+_mm_cvtsi64x_si128 (long long __A)
+{
+ return (__m128i) __builtin_ia32_movq2dq (__A);
+}
+#endif
+
static __inline int
_mm_cvtsi128_si32 (__m128i __A)
{
@@ -2467,6 +2560,14 @@ _mm_cvtsi128_si32 (__m128i __A)
return __tmp;
}
+#ifdef __x86_64__
+static __inline long long
+_mm_cvtsi128_si64x (__m128i __A)
+{
+ return __builtin_ia32_movdq2q ((__v2di)__A);
+}
+#endif
+
#endif /* __SSE2__ */
#endif /* __SSE__ */