diff options
Diffstat (limited to 'gcc/config/i386')
-rw-r--r-- | gcc/config/i386/cygwin.h | 22 | ||||
-rw-r--r-- | gcc/config/i386/djgpp.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/freebsd-aout.h | 12 | ||||
-rw-r--r-- | gcc/config/i386/freebsd64.h | 3 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 3 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 362 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 51 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 665 | ||||
-rw-r--r-- | gcc/config/i386/linux64.h | 19 | ||||
-rw-r--r-- | gcc/config/i386/mingw32.h | 5 | ||||
-rw-r--r-- | gcc/config/i386/mmintrin.h | 39 | ||||
-rw-r--r-- | gcc/config/i386/scodbx.h | 84 | ||||
-rw-r--r-- | gcc/config/i386/t-sco5gas | 2 | ||||
-rw-r--r-- | gcc/config/i386/xm-dgux.h | 4 | ||||
-rw-r--r-- | gcc/config/i386/xm-sun.h | 21 | ||||
-rw-r--r-- | gcc/config/i386/xm-sysv3.h | 3 | ||||
-rw-r--r-- | gcc/config/i386/xmmintrin.h | 115 |
17 files changed, 1012 insertions, 400 deletions
diff --git a/gcc/config/i386/cygwin.h b/gcc/config/i386/cygwin.h index f630d1f8450..03e372e04fa 100644 --- a/gcc/config/i386/cygwin.h +++ b/gcc/config/i386/cygwin.h @@ -1,6 +1,6 @@ /* Operating system specific defines to be used when targeting GCC for hosting on Windows32, using a Unix style C library and tools. - Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 + Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. This file is part of GNU CC. @@ -134,7 +134,8 @@ Boston, MA 02111-1307, USA. */ by calling the init function from the prologue. */ #undef LIBGCC_SPEC -#define LIBGCC_SPEC "%{mno-cygwin: %{mthreads:-lmingwthrd} -lmingw32} -lgcc %{mno-cygwin:-lmoldname -lmsvcrt}" +#define LIBGCC_SPEC "%{mno-cygwin: %{mthreads:-lmingwthrd} -lmingw32} \ + -lgcc %{mno-cygwin:-lmoldname -lmingwex -lmsvcrt}" /* This macro defines names of additional specifications to put in the specs that can be used in various specifications like CC1_SPEC. Its definition @@ -306,11 +307,13 @@ do { \ #define CHECK_STACK_LIMIT 4000 /* By default, target has a 80387, uses IEEE compatible arithmetic, - and returns float values in the 387 and needs stack probes */ -#undef TARGET_SUBTARGET_DEFAULT + returns float values in the 387 and needs stack probes. + We also align doubles to 64-bits for MSVC default compatibility. */ +#undef TARGET_SUBTARGET_DEFAULT #define TARGET_SUBTARGET_DEFAULT \ - (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_STACK_PROBE) + (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_STACK_PROBE \ + | MASK_ALIGN_DOUBLE) /* This is how to output an assembler line that says to advance the location counter @@ -397,6 +400,15 @@ extern void i386_pe_unique_section PARAMS ((TREE, int)); const0_rtx)); \ } +/* Java Native Interface (JNI) methods on Win32 are invoked using the + stdcall calling convention. */ +#undef MODIFY_JNI_METHOD_CALL +#define MODIFY_JNI_METHOD_CALL(MDECL) \ + build_type_attribute_variant ((MDECL), \ + build_tree_list (get_identifier ("stdcall"), \ + NULL)) + + /* External function declarations. */ extern void i386_pe_record_external_function PARAMS ((const char *)); diff --git a/gcc/config/i386/djgpp.h b/gcc/config/i386/djgpp.h index a271aa47cde..67807804501 100644 --- a/gcc/config/i386/djgpp.h +++ b/gcc/config/i386/djgpp.h @@ -136,6 +136,8 @@ Boston, MA 02111-1307, USA. */ #undef ASM_FILE_START #define ASM_FILE_START(FILE) \ do { \ + if (ix86_asm_dialect == ASM_INTEL) \ + fputs ("\t.intel_syntax\n", FILE); \ output_file_directive (FILE, main_input_filename); \ } while (0) diff --git a/gcc/config/i386/freebsd-aout.h b/gcc/config/i386/freebsd-aout.h index a2b616e700b..85e2703f42c 100644 --- a/gcc/config/i386/freebsd-aout.h +++ b/gcc/config/i386/freebsd-aout.h @@ -1,6 +1,6 @@ /* Definitions of target machine for GNU compiler for Intel 80386 running FreeBSD. - Copyright (C) 1988, 1992, 1994, 1996, 1997, 1999, 2000, 2002 + Copyright (C) 1988, 1992, 1994, 1996, 1997, 1999, 2000, 2002, 2003 Free Software Foundation, Inc. Contributed by Poul-Henning Kamp <phk@login.dkuug.dk> Continued development by David O'Brien <obrien@NUXI.org> @@ -94,6 +94,9 @@ Boston, MA 02111-1307, USA. */ /* Profiling routines, partially copied from i386/osfrose.h. */ +/* Tell final.c that we don't need a label passed to mcount. */ +#define NO_PROFILE_COUNTERS 1 + #undef MCOUNT_NAME #define MCOUNT_NAME "mcount" #undef PROFILE_COUNT_REGISTER @@ -112,6 +115,7 @@ Boston, MA 02111-1307, USA. */ #define TYPE_ASM_OP "\t.type\t" #define SIZE_ASM_OP "\t.size\t" +#define SET_ASM_OP "\t.set\t" /* The following macro defines the format used to output the second operand of the .type assembler directive. Different svr4 assemblers @@ -121,6 +125,12 @@ Boston, MA 02111-1307, USA. */ #define TYPE_OPERAND_FMT "@%s" +#define HANDLE_SYSV_PRAGMA 1 + +#define ASM_WEAKEN_LABEL(FILE,NAME) \ + do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME); \ + fputc ('\n', FILE); } while (0) + /* Write the extra assembler code needed to declare a function's result. Most svr4 assemblers don't require any special declaration of the result value, but there are exceptions. */ diff --git a/gcc/config/i386/freebsd64.h b/gcc/config/i386/freebsd64.h index 699f4c4d344..12ca062301d 100644 --- a/gcc/config/i386/freebsd64.h +++ b/gcc/config/i386/freebsd64.h @@ -29,8 +29,7 @@ Boston, MA 02111-1307, USA. */ #undef LINK_SPEC #define LINK_SPEC "\ - %{!m32:-m elf_x86_64} \ - %{m32:-m elf_i386} \ + %{m32:-m elf_i386_fbsd} \ %{Wl,*:%*} \ %{v:-V} \ %{assert*} %{R*} %{rpath*} %{defsym*} \ diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 4afdf668bd8..d3c9d160190 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -190,6 +190,9 @@ extern void x86_function_profiler PARAMS ((FILE *, int)); #ifdef TREE_CODE extern void init_cumulative_args PARAMS ((CUMULATIVE_ARGS *, tree, rtx)); extern rtx function_arg PARAMS ((CUMULATIVE_ARGS *, enum machine_mode, tree, int)); +extern int function_arg_pass_by_reference PARAMS ((CUMULATIVE_ARGS *, + enum machine_mode, + tree, int)); extern void function_arg_advance PARAMS ((CUMULATIVE_ARGS *, enum machine_mode, tree, int)); extern rtx ix86_function_value PARAMS ((tree)); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 42f6d93d3c3..0f8c8e4c3fc 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1,6 +1,6 @@ /* Subroutines used for code generation on IA-32. Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, - 2002 Free Software Foundation, Inc. + 2002, 2003 Free Software Foundation, Inc. This file is part of GNU CC. @@ -799,6 +799,7 @@ const struct attribute_spec ix86_attribute_table[]; static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *)); static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *)); static int ix86_value_regno PARAMS ((enum machine_mode)); +static bool contains_128bit_aligned_vector_p PARAMS ((tree)); #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION) static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int)); @@ -911,6 +912,12 @@ static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class, struct gcc_target targetm = TARGET_INITIALIZER; +/* The svr4 ABI for the i386 says that records and unions are returned + in memory. */ +#ifndef DEFAULT_PCC_STRUCT_RETURN +#define DEFAULT_PCC_STRUCT_RETURN 1 +#endif + /* Sometimes certain combinations of command options do not make sense on a particular target machine. You can define a macro `OVERRIDE_OPTIONS' to take account of this. This macro, if @@ -1021,7 +1028,7 @@ override_options () if (flag_asynchronous_unwind_tables == 2) flag_asynchronous_unwind_tables = 0; if (flag_pcc_struct_return == 2) - flag_pcc_struct_return = 1; + flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; } #ifdef SUBTARGET_OVERRIDE_OPTIONS @@ -2252,6 +2259,9 @@ function_arg (cum, mode, type, named) break; case BLKmode: + if (bytes < 0) + break; + /* FALLTHRU */ case DImode: case SImode: case HImode: @@ -2282,6 +2292,90 @@ function_arg (cum, mode, type, named) return ret; } +/* Return true when TYPE should be 128bit aligned for 32bit argument passing + ABI */ +static bool +contains_128bit_aligned_vector_p (type) + tree type; +{ + enum machine_mode mode = TYPE_MODE (type); + if (SSE_REG_MODE_P (mode) + && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) + return true; + if (TYPE_ALIGN (type) < 128) + return false; + + if (AGGREGATE_TYPE_P (type)) + { + /* Walk the agregates recursivly. */ + if (TREE_CODE (type) == RECORD_TYPE + || TREE_CODE (type) == UNION_TYPE + || TREE_CODE (type) == QUAL_UNION_TYPE) + { + tree field; + + if (TYPE_BINFO (type) != NULL + && TYPE_BINFO_BASETYPES (type) != NULL) + { + tree bases = TYPE_BINFO_BASETYPES (type); + int n_bases = TREE_VEC_LENGTH (bases); + int i; + + for (i = 0; i < n_bases; ++i) + { + tree binfo = TREE_VEC_ELT (bases, i); + tree type = BINFO_TYPE (binfo); + + if (contains_128bit_aligned_vector_p (type)) + return true; + } + } + /* And now merge the fields of structure. */ + for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + { + if (TREE_CODE (field) == FIELD_DECL + && contains_128bit_aligned_vector_p (TREE_TYPE (field))) + return true; + } + } + /* Just for use if some languages passes arrays by value. */ + else if (TREE_CODE (type) == ARRAY_TYPE) + { + if (contains_128bit_aligned_vector_p (TREE_TYPE (type))) + return true; + } + else + abort (); + } + return false; +} + +/* A C expression that indicates when an argument must be passed by + reference. If nonzero for an argument, a copy of that argument is + made in memory and a pointer to the argument is passed instead of + the argument itself. The pointer is passed in whatever way is + appropriate for passing a pointer to that type. */ + +int +function_arg_pass_by_reference (cum, mode, type, named) + CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED; + enum machine_mode mode ATTRIBUTE_UNUSED; + tree type; + int named ATTRIBUTE_UNUSED; +{ + if (!TARGET_64BIT) + return 0; + + if (type && int_size_in_bytes (type) == -1) + { + if (TARGET_DEBUG_ARG) + fprintf (stderr, "function_arg_pass_by_reference\n"); + return 1; + } + + return 0; +} + /* Gives the alignment boundary, in bits, of an argument with the specified mode and type. */ @@ -2291,14 +2385,34 @@ ix86_function_arg_boundary (mode, type) tree type; { int align; - if (!TARGET_64BIT) - return PARM_BOUNDARY; if (type) align = TYPE_ALIGN (type); else align = GET_MODE_ALIGNMENT (mode); if (align < PARM_BOUNDARY) align = PARM_BOUNDARY; + if (!TARGET_64BIT) + { + /* i386 ABI defines all arguments to be 4 byte aligned. We have to + make an exception for SSE modes since these require 128bit + alignment. + + The handling here differs from field_alignment. ICC aligns MMX + arguments to 4 byte boundaries, while structure fields are aligned + to 8 byte boundaries. */ + if (!type) + { + if (!SSE_REG_MODE_P (mode)) + align = PARM_BOUNDARY; + } + else + { + if (!contains_128bit_aligned_vector_p (type)) + align = PARM_BOUNDARY; + } + if (align != PARM_BOUNDARY && !TARGET_SSE) + abort(); + } if (align > 128) align = 128; return align; @@ -2488,6 +2602,8 @@ ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl) /* Indicate to allocate space on the stack for varargs save area. */ ix86_save_varrargs_registers = 1; + cfun->stack_alignment_needed = 128; + fntype = TREE_TYPE (current_function_decl); stdarg_p = (TYPE_ARG_TYPES (fntype) != 0 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) @@ -2637,6 +2753,7 @@ ix86_va_arg (valist, type) rtx lab_false, lab_over = NULL_RTX; rtx addr_rtx, r; rtx container; + int indirect_p = 0; /* Only 64bit target needs something special. */ if (!TARGET_64BIT) @@ -2656,6 +2773,13 @@ ix86_va_arg (valist, type) sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav); size = int_size_in_bytes (type); + if (size == -1) + { + /* Passed by reference. */ + indirect_p = 1; + type = build_pointer_type (type); + size = int_size_in_bytes (type); + } rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; container = construct_container (TYPE_MODE (type), type, 0, @@ -2760,10 +2884,12 @@ ix86_va_arg (valist, type) { int i; rtx mem; + rtx x; /* Never use the memory itself, as it has the alias set. */ - addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0); - mem = gen_rtx_MEM (BLKmode, addr_rtx); + x = XEXP (assign_temp (type, 0, 1, 0), 0); + mem = gen_rtx_MEM (BLKmode, x); + force_operand (x, addr_rtx); set_mem_alias_set (mem, get_varargs_alias_set ()); set_mem_align (mem, BITS_PER_UNIT); @@ -2846,6 +2972,13 @@ ix86_va_arg (valist, type) if (container) emit_label (lab_over); + if (indirect_p) + { + r = gen_rtx_MEM (Pmode, addr_rtx); + set_mem_alias_set (r, get_varargs_alias_set ()); + emit_move_insn (addr_rtx, r); + } + return addr_rtx; } @@ -3401,6 +3534,19 @@ non_q_regs_operand (op, mode) return NON_QI_REG_P (op); } +/* Return 1 when OP is operand acceptable for standard SSE move. */ +int +vector_move_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (nonimmediate_operand (op, mode)) + return 1; + if (GET_MODE (op) != mode && mode != VOIDmode) + return 0; + return (op == CONST0_RTX (GET_MODE (op))); +} + /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS insns. */ int @@ -4225,7 +4371,8 @@ ix86_save_reg (regno, maybe_eh_return) && regno == REAL_PIC_OFFSET_TABLE_REGNUM && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] || current_function_profile - || current_function_calls_eh_return)) + || current_function_calls_eh_return + || current_function_uses_const_pool)) { if (ix86_select_alt_pic_regnum () != INVALID_REGNUM) return 0; @@ -4516,6 +4663,10 @@ ix86_expand_prologue () CALL_INSN_FUNCTION_USAGE (insn) = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0), CALL_INSN_FUNCTION_USAGE (insn)); + + /* Don't allow scheduling pass to move insns across __alloca + call. */ + emit_insn (gen_blockage (const0_rtx)); } if (use_mov) { @@ -7484,12 +7635,12 @@ output_fp_compare (insn, operands, eflags_p, unordered_p) if (unordered_p) return "ucomiss\t{%1, %0|%0, %1}"; else - return "comiss\t{%1, %0|%0, %y}"; + return "comiss\t{%1, %0|%0, %1}"; else if (unordered_p) return "ucomisd\t{%1, %0|%0, %1}"; else - return "comisd\t{%1, %0|%0, %y}"; + return "comisd\t{%1, %0|%0, %1}"; } if (! STACK_TOP_P (cmp_op0)) @@ -7777,9 +7928,17 @@ ix86_expand_move (mode, operands) if (strict) ; - else if (GET_CODE (op1) == CONST_DOUBLE - && register_operand (op0, mode)) - op1 = validize_mem (force_const_mem (mode, op1)); + else if (GET_CODE (op1) == CONST_DOUBLE) + { + op1 = validize_mem (force_const_mem (mode, op1)); + if (!register_operand (op0, mode)) + { + rtx temp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, temp, op1)); + emit_move_insn (op0, temp); + return; + } + } } } @@ -7799,8 +7958,12 @@ ix86_expand_vector_move (mode, operands) to handle some of them more efficiently. */ if ((reload_in_progress | reload_completed) == 0 && register_operand (operands[0], mode) - && CONSTANT_P (operands[1])) - operands[1] = force_const_mem (mode, operands[1]); + && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode)) + { + operands[1] = force_const_mem (mode, operands[1]); + emit_move_insn (operands[0], operands[1]); + return; + } /* Make operand1 a register if it isn't already. */ if (!no_new_pseudos @@ -9219,11 +9382,11 @@ ix86_expand_int_movcc (operands) /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics done in proper mode to match. */ if (diff == 1) - tmp = out; + tmp = copy_rtx (out); else { rtx out1; - out1 = out; + out1 = copy_rtx (out); tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); nops++; if (diff & 1) @@ -9241,9 +9404,9 @@ ix86_expand_int_movcc (operands) && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out)) { if (nops == 1) - out = force_operand (tmp, out); + out = force_operand (tmp, copy_rtx (out)); else - emit_insn (gen_rtx_SET (VOIDmode, out, tmp)); + emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp))); } if (out != operands[0]) emit_move_insn (operands[0], copy_rtx (out)); @@ -9822,15 +9985,24 @@ ix86_split_long_move (operands) Do an lea to the last part and use only one colliding move. */ else if (collisions > 1) { + rtx base; + collisions = 1; - emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1], - XEXP (part[1][0], 0))); - part[1][0] = change_address (part[1][0], - TARGET_64BIT ? DImode : SImode, - part[0][nparts - 1]); - part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD); + + base = part[0][nparts - 1]; + + /* Handle the case when the last part isn't valid for lea. + Happens in 64-bit mode storing the 12-byte XFmode. */ + if (GET_MODE (base) != Pmode) + base = gen_rtx_REG (Pmode, REGNO (base)); + + emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); + part[1][0] = replace_equiv_address (part[1][0], base); + part[1][1] = replace_equiv_address (part[1][1], + plus_constant (base, UNITS_PER_WORD)); if (nparts == 3) - part[1][2] = adjust_address (part[1][0], VOIDmode, 8); + part[1][2] = replace_equiv_address (part[1][2], + plus_constant (base, 8)); } } @@ -10973,7 +11145,8 @@ memory_address_length (addr) if (disp) { if (GET_CODE (disp) == CONST_INT - && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')) + && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K') + && base) len = 1; else len = 4; @@ -11036,6 +11209,26 @@ ix86_attr_length_address_default (insn) rtx insn; { int i; + + if (get_attr_type (insn) == TYPE_LEA) + { + rtx set = PATTERN (insn); + if (GET_CODE (set) == SET) + ; + else if (GET_CODE (set) == PARALLEL + && GET_CODE (XVECEXP (set, 0, 0)) == SET) + set = XVECEXP (set, 0, 0); + else + { +#ifdef ENABLE_CHECKING + abort (); +#endif + return 0; + } + + return memory_address_length (SET_SRC (set)); + } + extract_insn_cached (insn); for (i = recog_data.n_operands - 1; i >= 0; --i) if (GET_CODE (recog_data.operand[i]) == MEM) @@ -11834,7 +12027,8 @@ x86_initialize_trampoline (tramp, fnaddr, cxt) #define def_builtin(MASK, NAME, TYPE, CODE) \ do { \ - if ((MASK) & target_flags) \ + if ((MASK) & target_flags \ + && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \ builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ NULL, NULL_TREE); \ } while (0) @@ -11851,6 +12045,8 @@ struct builtin_description /* Used for builtins that are enabled both by -msse and -msse2. */ #define MASK_SSE1 (MASK_SSE | MASK_SSE2) +#define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT) +#define MASK_SSE264 (MASK_SSE2 | MASK_64BIT) static const struct builtin_description bdesc_comi[] = { @@ -11933,9 +12129,11 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 }, { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 }, { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, @@ -11984,6 +12182,7 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, + { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, @@ -12056,11 +12255,11 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 }, { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 }, { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 }, - { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 }, + { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 }, { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 }, { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 }, { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 }, - { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 }, + { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 }, { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 }, { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 }, @@ -12134,6 +12333,7 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, + { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 } }; @@ -12149,8 +12349,10 @@ static const struct builtin_description bdesc_1arg[] = { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, + { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, + { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, @@ -12172,6 +12374,8 @@ static const struct builtin_description bdesc_1arg[] = { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, + { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, + { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, @@ -12197,7 +12401,11 @@ ix86_init_mmx_sse_builtins () size_t i; tree pchar_type_node = build_pointer_type (char_type_node); + tree pcchar_type_node = build_pointer_type ( + build_type_variant (char_type_node, 1, 0)); tree pfloat_type_node = build_pointer_type (float_type_node); + tree pcfloat_type_node = build_pointer_type ( + build_type_variant (float_type_node, 1, 0)); tree pv2si_type_node = build_pointer_type (V2SI_type_node); tree pv2di_type_node = build_pointer_type (V2DI_type_node); tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); @@ -12213,11 +12421,18 @@ ix86_init_mmx_sse_builtins () tree int_ftype_v4sf = build_function_type_list (integer_type_node, V4SF_type_node, NULL_TREE); + tree int64_ftype_v4sf + = build_function_type_list (long_long_integer_type_node, + V4SF_type_node, NULL_TREE); tree int_ftype_v8qi = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); tree v4sf_ftype_v4sf_int = build_function_type_list (V4SF_type_node, V4SF_type_node, integer_type_node, NULL_TREE); + tree v4sf_ftype_v4sf_int64 + = build_function_type_list (V4SF_type_node, + V4SF_type_node, long_long_integer_type_node, + NULL_TREE); tree v4sf_ftype_v4sf_v2si = build_function_type_list (V4SF_type_node, V4SF_type_node, V2SI_type_node, NULL_TREE); @@ -12270,8 +12485,8 @@ ix86_init_mmx_sse_builtins () = build_function_type_list (void_type_node, V8QI_type_node, V8QI_type_node, pchar_type_node, NULL_TREE); - tree v4sf_ftype_pfloat - = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE); + tree v4sf_ftype_pcfloat + = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); /* @@@ the type is bogus */ tree v4sf_ftype_v4sf_pv2si = build_function_type_list (V4SF_type_node, @@ -12326,7 +12541,11 @@ ix86_init_mmx_sse_builtins () = build_function_type_list (V2SI_type_node, V2SF_type_node, V2SF_type_node, NULL_TREE); tree pint_type_node = build_pointer_type (integer_type_node); + tree pcint_type_node = build_pointer_type ( + build_type_variant (integer_type_node, 1, 0)); tree pdouble_type_node = build_pointer_type (double_type_node); + tree pcdouble_type_node = build_pointer_type ( + build_type_variant (double_type_node, 1, 0)); tree int_ftype_v2df_v2df = build_function_type_list (integer_type_node, V2DF_type_node, V2DF_type_node, NULL_TREE); @@ -12338,8 +12557,8 @@ ix86_init_mmx_sse_builtins () tree ti_ftype_ti_ti = build_function_type_list (intTI_type_node, intTI_type_node, intTI_type_node, NULL_TREE); - tree void_ftype_pvoid - = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE); + tree void_ftype_pcvoid + = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); tree v2di_ftype_di = build_function_type_list (V2DI_type_node, long_long_unsigned_type_node, NULL_TREE); @@ -12364,9 +12583,16 @@ ix86_init_mmx_sse_builtins () = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); tree int_ftype_v2df = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); + tree int64_ftype_v2df + = build_function_type_list (long_long_integer_type_node, + V2DF_type_node, NULL_TREE); tree v2df_ftype_v2df_int = build_function_type_list (V2DF_type_node, V2DF_type_node, integer_type_node, NULL_TREE); + tree v2df_ftype_v2df_int64 + = build_function_type_list (V2DF_type_node, + V2DF_type_node, long_long_integer_type_node, + NULL_TREE); tree v4sf_ftype_v4sf_v2df = build_function_type_list (V4SF_type_node, V4SF_type_node, V2DF_type_node, NULL_TREE); @@ -12394,8 +12620,8 @@ ix86_init_mmx_sse_builtins () = build_function_type_list (void_type_node, V16QI_type_node, V16QI_type_node, pchar_type_node, NULL_TREE); - tree v2df_ftype_pdouble - = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE); + tree v2df_ftype_pcdouble + = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE); tree v2df_ftype_v2df_v2df = build_function_type_list (V2DF_type_node, V2DF_type_node, V2DF_type_node, NULL_TREE); @@ -12454,16 +12680,16 @@ ix86_init_mmx_sse_builtins () V16QI_type_node, V16QI_type_node, NULL_TREE); tree int_ftype_v16qi = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); - tree v16qi_ftype_pchar - = build_function_type_list (V16QI_type_node, pchar_type_node, NULL_TREE); + tree v16qi_ftype_pcchar + = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); tree void_ftype_pchar_v16qi = build_function_type_list (void_type_node, pchar_type_node, V16QI_type_node, NULL_TREE); - tree v4si_ftype_pchar - = build_function_type_list (V4SI_type_node, pchar_type_node, NULL_TREE); - tree void_ftype_pchar_v4si + tree v4si_ftype_pcint + = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE); + tree void_ftype_pcint_v4si = build_function_type_list (void_type_node, - pchar_type_node, V4SI_type_node, NULL_TREE); + pcint_type_node, V4SI_type_node, NULL_TREE); tree v2di_ftype_v2di = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); @@ -12539,8 +12765,6 @@ ix86_init_mmx_sse_builtins () /* Add the remaining MMX insns with somewhat more complicated types. */ def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO); def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); - def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); - def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD); def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ); @@ -12566,21 +12790,26 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); + def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); + def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); + def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); + def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); + def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); - def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS); - def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS); - def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS); + def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS); + def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); + def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS); def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS); def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS); @@ -12648,9 +12877,9 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ); def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q); - def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD); - def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD); - def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD); def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD); def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD); def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD); @@ -12689,33 +12918,36 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI); def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI); + def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); + def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ); def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD); def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ); def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD); + def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS); def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD); def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1); def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD); def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD); - def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1); - def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1); + def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD); def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1); def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD); - def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH); + def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE); def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); - def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQA); - def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQU); - def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pchar, IX86_BUILTIN_LOADD); + def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA); + def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU); + def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD); def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA); def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU); - def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pchar_v4si, IX86_BUILTIN_STORED); + def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED); def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ); def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI); @@ -12795,6 +13027,13 @@ ix86_expand_binop_builtin (icode, arglist, target) || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); + if (GET_MODE (op1) == SImode && mode1 == TImode) + { + rtx x = gen_reg_rtx (V4SImode); + emit_insn (gen_sse2_loadd (x, op1)); + op1 = gen_lowpart (TImode, x); + } + /* In case the insn wants input operands in modes different from the result, abort. */ if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1) @@ -12837,9 +13076,7 @@ ix86_expand_store_builtin (icode, arglist) op1 = safe_vector_operand (op1, mode1); op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); - - if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); + op1 = copy_to_mode_reg (mode1, op1); pat = GEN_FCN (icode) (op0, op1); if (pat) @@ -13828,9 +14065,10 @@ ix86_hard_regno_mode_ok (regno, mode) if (FP_REGNO_P (regno)) return VALID_FP_MODE_P (mode); if (SSE_REGNO_P (regno)) - return VALID_SSE_REG_MODE (mode); + return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0); if (MMX_REGNO_P (regno)) - return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode); + return (TARGET_MMX + ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0); /* We handle both integer and floats in the general purpose registers. In future we should be able to handle vector modes as well. */ if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode)) @@ -14299,7 +14537,7 @@ x86_function_profiler (file, labelno) else { #ifndef NO_PROFILE_COUNTERS - fprintf (file, "\tmovl\t$%sP%d,%%$s\n", LPREFIX, labelno, + fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno, PROFILE_COUNT_REGISTER); #endif fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 493a2b5bf9c..ffca44fd57b 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -487,9 +487,12 @@ extern int x86_prefetch_sse; if (TARGET_64BIT) \ { \ builtin_assert ("cpu=x86_64"); \ - builtin_assert ("machine=x86_64"); \ + builtin_define ("__amd64"); \ + builtin_define ("__amd64__"); \ builtin_define ("__x86_64"); \ builtin_define ("__x86_64__"); \ + builtin_define ("__amd64"); \ + builtin_define ("__amd64__"); \ } \ else \ { \ @@ -1047,7 +1050,7 @@ do { \ && (TARGET_64BIT || !TARGET_PARTIAL_REG_STALL)) \ || ((MODE1) == DImode && TARGET_64BIT)) \ && ((MODE2) == HImode || (MODE2) == SImode \ - || ((MODE1) == QImode \ + || ((MODE2) == QImode \ && (TARGET_64BIT || !TARGET_PARTIAL_REG_STALL)) \ || ((MODE2) == DImode && TARGET_64BIT)))) @@ -1522,6 +1525,20 @@ enum reg_class || ((CLASS) == SIREG) \ || ((CLASS) == DIREG)) +/* Return a class of registers that cannot change FROM mode to TO mode. + + x87 registers can't do subreg as all values are reformated to extended + precision. XMM registers does not support with nonzero offsets equal + to 4, 8 and 12 otherwise valid for integer registers. Since we can't + determine these, prohibit all nonparadoxical subregs changing size. */ + +#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ + (GET_MODE_SIZE (TO) < GET_MODE_SIZE (FROM) \ + ? reg_classes_intersect_p (FLOAT_SSE_REGS, (CLASS)) \ + || MAYBE_MMX_CLASS_P (CLASS) \ + : GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \ + ? reg_classes_intersect_p (FLOAT_REGS, (CLASS)) : 0) + /* A C statement that adds to CLOBBERS any hard regs the port wishes to automatically clobber for all asms. @@ -1716,17 +1733,28 @@ typedef struct ix86_args { #define FUNCTION_ARG_PARTIAL_NREGS(CUM, MODE, TYPE, NAMED) 0 +/* A C expression that indicates when an argument must be passed by + reference. If nonzero for an argument, a copy of that argument is + made in memory and a pointer to the argument is passed instead of + the argument itself. The pointer is passed in whatever way is + appropriate for passing a pointer to that type. */ + +#define FUNCTION_ARG_PASS_BY_REFERENCE(CUM, MODE, TYPE, NAMED) \ + function_arg_pass_by_reference(&CUM, MODE, TYPE, NAMED) + /* If PIC, we cannot make sibling calls to global functions because the PLT requires %ebx live. - If we are returning floats on the register stack, we cannot make - sibling calls to functions that return floats. (The stack adjust - instruction will wind up after the sibcall jump, and not be executed.) */ + If we are returning floats on the 80387 register stack, we cannot + make a sibcall from a function that doesn't return a float to a + function that does or, conversely, from a function that does return + a float to a function that doesn't; the necessary stack adjustment + would not be executed. */ #define FUNCTION_OK_FOR_SIBCALL(DECL) \ ((DECL) \ && (! flag_pic || ! TREE_PUBLIC (DECL)) \ && (! TARGET_FLOAT_RETURNS_IN_80387 \ - || ! FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (TREE_TYPE (DECL)))) \ - || FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (TREE_TYPE (cfun->decl)))))) + || (FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (TREE_TYPE (DECL)))) \ + == FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (TREE_TYPE (cfun->decl))))))) /* Perform any needed actions needed for a function that is receiving a variable number of arguments. @@ -2068,9 +2096,12 @@ enum ix86_builtins IX86_BUILTIN_CVTPI2PS, IX86_BUILTIN_CVTPS2PI, IX86_BUILTIN_CVTSI2SS, + IX86_BUILTIN_CVTSI642SS, IX86_BUILTIN_CVTSS2SI, + IX86_BUILTIN_CVTSS2SI64, IX86_BUILTIN_CVTTPS2PI, IX86_BUILTIN_CVTTSS2SI, + IX86_BUILTIN_CVTTSS2SI64, IX86_BUILTIN_MAXPS, IX86_BUILTIN_MAXSS, @@ -2116,6 +2147,7 @@ enum ix86_builtins IX86_BUILTIN_PADDB, IX86_BUILTIN_PADDW, IX86_BUILTIN_PADDD, + IX86_BUILTIN_PADDQ, IX86_BUILTIN_PADDSB, IX86_BUILTIN_PADDSW, IX86_BUILTIN_PADDUSB, @@ -2123,6 +2155,7 @@ enum ix86_builtins IX86_BUILTIN_PSUBB, IX86_BUILTIN_PSUBW, IX86_BUILTIN_PSUBD, + IX86_BUILTIN_PSUBQ, IX86_BUILTIN_PSUBSB, IX86_BUILTIN_PSUBSW, IX86_BUILTIN_PSUBUSB, @@ -2327,11 +2360,14 @@ enum ix86_builtins IX86_BUILTIN_CVTPI2PD, IX86_BUILTIN_CVTSI2SD, + IX86_BUILTIN_CVTSI642SD, IX86_BUILTIN_CVTSD2SI, + IX86_BUILTIN_CVTSD2SI64, IX86_BUILTIN_CVTSD2SS, IX86_BUILTIN_CVTSS2SD, IX86_BUILTIN_CVTTSD2SI, + IX86_BUILTIN_CVTTSD2SI64, IX86_BUILTIN_CVTPS2DQ, IX86_BUILTIN_CVTPS2PD, @@ -3286,6 +3322,7 @@ do { \ {"register_and_not_any_fp_reg_operand", {REG}}, \ {"fp_register_operand", {REG}}, \ {"register_and_not_fp_reg_operand", {REG}}, \ + {"vector_move_operand", {CONST_VECTOR, SUBREG, REG, MEM}}, \ /* A list of predicates that do special things with modes, and so should not elicit warnings for VOIDmode match_operand. */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index befbfe49569..edbb7163646 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1,5 +1,6 @@ ;; GCC machine description for IA-32 and x86-64. -;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 +;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +;; 2001, 2002, 2003 ;; Free Software Foundation, Inc. ;; Mostly by William Schelter. ;; x86_64 support added by Jan Hubicka @@ -267,6 +268,8 @@ (define_attr "length" "" (cond [(eq_attr "type" "other,multi,fistp") (const_int 16) + (eq_attr "type" "fcmp") + (const_int 4) (eq_attr "unit" "i387") (plus (const_int 2) (plus (attr "prefix_data16") @@ -1099,25 +1102,20 @@ (set_attr "mode" "SI") (set_attr "length_immediate" "1")]) -; The first alternative is used only to compute proper length of instruction. -; Reload's algorithm does not take into account the cost of spill instructions -; needed to free register in given class, so avoid it from choosing the first -; alternative when eax is not available. - (define_insn "*movsi_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=*?a,r,*?a,m,!*y,!rm,!*y,!*Y,!rm,!*Y") - (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,rm,*y,*y,rm,*Y,*Y"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m,!*y,!rm,!*y,!*Y,!*Y,!rm") + (match_operand:SI 1 "general_operand" "rinm,rin,rm,*y,*y,*Y,rm,*Y"))] "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" { switch (get_attr_type (insn)) { case TYPE_SSEMOV: - if (get_attr_mode (insn) == TImode) + if (get_attr_mode (insn) == MODE_TI) return "movdqa\t{%1, %0|%0, %1}"; return "movd\t{%1, %0|%0, %1}"; case TYPE_MMXMOV: - if (get_attr_mode (insn) == DImode) + if (get_attr_mode (insn) == MODE_DI) return "movq\t{%1, %0|%0, %1}"; return "movd\t{%1, %0|%0, %1}"; @@ -1131,17 +1129,16 @@ } } [(set (attr "type") - (cond [(eq_attr "alternative" "4,5,6") + (cond [(eq_attr "alternative" "2,3,4") (const_string "mmxmov") - (eq_attr "alternative" "7,8,9") + (eq_attr "alternative" "5,6,7") (const_string "ssemov") (and (ne (symbol_ref "flag_pic") (const_int 0)) (match_operand:SI 1 "symbolic_operand" "")) (const_string "lea") ] (const_string "imov"))) - (set_attr "modrm" "0,*,0,*,*,*,*,*,*,*") - (set_attr "mode" "SI,SI,SI,SI,SI,SI,DI,TI,SI,SI")]) + (set_attr "mode" "SI,SI,SI,SI,DI,TI,SI,SI")]) ;; Stores and loads of ax to arbitary constant address. ;; We fake an second form of instruction to force reload to load address @@ -1214,14 +1211,9 @@ [(set_attr "type" "push") (set_attr "mode" "QI")]) -; The first alternative is used only to compute proper length of instruction. -; Reload's algorithm does not take into account the cost of spill instructions -; needed to free register in given class, so avoid it from choosing the first -; alternative when eax is not available. - (define_insn "*movhi_1" - [(set (match_operand:HI 0 "nonimmediate_operand" "=*?a,r,r,*?a,r,m") - (match_operand:HI 1 "general_operand" "i,r,rn,rm,rm,rn"))] + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") + (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))] "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" { switch (get_attr_type (insn)) @@ -1238,36 +1230,35 @@ } } [(set (attr "type") - (cond [(and (eq_attr "alternative" "0,1") + (cond [(and (eq_attr "alternative" "0") (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") (const_int 0)) (eq (symbol_ref "TARGET_HIMODE_MATH") (const_int 0)))) (const_string "imov") - (and (eq_attr "alternative" "2,3,4") + (and (eq_attr "alternative" "1,2") (match_operand:HI 1 "aligned_operand" "")) (const_string "imov") (and (ne (symbol_ref "TARGET_MOVX") (const_int 0)) - (eq_attr "alternative" "0,1,3,4")) + (eq_attr "alternative" "0,2")) (const_string "imovx") ] (const_string "imov"))) (set (attr "mode") (cond [(eq_attr "type" "imovx") (const_string "SI") - (and (eq_attr "alternative" "2,3,4") + (and (eq_attr "alternative" "1,2") (match_operand:HI 1 "aligned_operand" "")) (const_string "SI") - (and (eq_attr "alternative" "0,1") + (and (eq_attr "alternative" "0") (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") (const_int 0)) (eq (symbol_ref "TARGET_HIMODE_MATH") (const_int 0)))) (const_string "SI") ] - (const_string "HI"))) - (set_attr "modrm" "0,*,*,0,*,*")]) + (const_string "HI")))]) ;; Stores and loads of ax to arbitary constant address. ;; We fake an second form of instruction to force reload to load address @@ -1488,7 +1479,7 @@ (define_expand "movstrictqi" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) (match_operand:QI 1 "general_operand" ""))] - "! TARGET_PARTIAL_REG_STALL" + "! TARGET_PARTIAL_REG_STALL || optimize_size" { /* Don't generate memory->memory moves, go through a register. */ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) @@ -1498,7 +1489,7 @@ (define_insn "*movstrictqi_1" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) (match_operand:QI 1 "general_operand" "*qn,m"))] - "! TARGET_PARTIAL_REG_STALL + "(! TARGET_PARTIAL_REG_STALL || optimize_size) && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "mov{b}\t{%1, %0|%0, %1}" [(set_attr "type" "imov") @@ -12839,9 +12830,9 @@ (set_attr "modrm" "0") (set (attr "length") (if_then_else (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) + (const_int -126)) (lt (minus (match_dup 0) (pc)) - (const_int 124))) + (const_int 128))) (const_int 2) (const_int 6)))]) @@ -12857,9 +12848,9 @@ (set_attr "modrm" "0") (set (attr "length") (if_then_else (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) + (const_int -126)) (lt (minus (match_dup 0) (pc)) - (const_int 124))) + (const_int 128))) (const_int 2) (const_int 6)))]) @@ -13124,9 +13115,9 @@ [(set_attr "type" "ibr") (set (attr "length") (if_then_else (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) + (const_int -126)) (lt (minus (match_dup 0) (pc)) - (const_int 124))) + (const_int 128))) (const_int 2) (const_int 5))) (set_attr "modrm" "0")]) @@ -13250,9 +13241,9 @@ (set (attr "length") (if_then_else (and (eq_attr "alternative" "0") (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) + (const_int -126)) (lt (minus (match_dup 0) (pc)) - (const_int 124)))) + (const_int 128)))) (const_int 2) (const_int 16))) ;; We don't know the type before shorten branches. Optimistically expect @@ -13616,11 +13607,10 @@ "ix86_expand_epilogue (0); DONE;") (define_expand "eh_return" - [(use (match_operand 0 "register_operand" "")) - (use (match_operand 1 "register_operand" ""))] + [(use (match_operand 0 "register_operand" ""))] "" { - rtx tmp, sa = operands[0], ra = operands[1]; + rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0]; /* Tricky bit: we write the address of the handler to which we will be returning into someone else's stack frame, one word below the @@ -13682,7 +13672,7 @@ (define_expand "ffssi2" [(set (match_operand:SI 0 "nonimmediate_operand" "") - (ffs:SI (match_operand:SI 1 "general_operand" "")))] + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))] "" { rtx out = gen_reg_rtx (SImode), tmp = gen_reg_rtx (SImode); @@ -14823,7 +14813,7 @@ (define_insn "cosxf2" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_COS))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + "!TARGET_64BIT && ! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fcos" [(set_attr "type" "fpspc") @@ -16734,7 +16724,7 @@ (define_split [(set (match_operand 0 "register_operand" "") (if_then_else (match_operator 1 "comparison_operator" - [(match_operand 4 "register_operand" "") + [(match_operand 4 "nonimmediate_operand" "") (match_operand 5 "nonimmediate_operand" "")]) (match_operand 2 "nonmemory_operand" "") (match_operand 3 "nonmemory_operand" "")))] @@ -16746,13 +16736,16 @@ (subreg:TI (match_dup 7) 0)))] { PUT_MODE (operands[1], GET_MODE (operands[0])); - if (!sse_comparison_operator (operands[1], VOIDmode)) + if (!sse_comparison_operator (operands[1], VOIDmode) + || !rtx_equal_p (operands[0], operands[4])) { rtx tmp = operands[5]; operands[5] = operands[4]; operands[4] = tmp; PUT_CODE (operands[1], swap_condition (GET_CODE (operands[1]))); } + if (!rtx_equal_p (operands[0], operands[4])) + abort (); if (const0_operand (operands[2], GET_MODE (operands[0]))) { operands[7] = operands[3]; @@ -16853,6 +16846,10 @@ operands[2] = gen_lowpart (SImode, operands[2]); PUT_MODE (operands[3], SImode);") +; Promote the QImode tests, as i386 has encoding of the AND +; instruction with 32-bit sign-extended immediate and thus the +; instruction size is unchanged, except in the %eax case for +; which it is increased by one byte, hence the ! optimize_size. (define_split [(set (reg 17) (compare (and (match_operand 1 "aligned_operand" "") @@ -16861,12 +16858,11 @@ (set (match_operand 0 "register_operand" "") (and (match_dup 1) (match_dup 2)))] "! TARGET_PARTIAL_REG_STALL && reload_completed - && ix86_match_ccmode (insn, CCNOmode) - && (GET_MODE (operands[0]) == HImode - || (GET_MODE (operands[0]) == QImode - /* Ensure that the operand will remain sign extended immediate. */ - && INTVAL (operands[2]) >= 0 - && (TARGET_PROMOTE_QImode || optimize_size)))" + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode) + && ! optimize_size + && ((GET_MODE (operands[0]) == HImode && ! TARGET_FAST_PREFIX) + || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))" [(parallel [(set (reg:CCNO 17) (compare:CCNO (and:SI (match_dup 1) (match_dup 2)) (const_int 0))) @@ -16879,17 +16875,20 @@ operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]);") -; Don't promote the QImode tests, as i386 don't have encoding of -; the test instruction with 32bit sign extended immediate and thus -; the code grows. +; Don't promote the QImode tests, as i386 doesn't have encoding of +; the TEST instruction with 32-bit sign-extended immediate and thus +; the instruction size would at least double, which is not what we +; want even with ! optimize_size. (define_split [(set (reg 17) (compare (and (match_operand:HI 0 "aligned_operand" "") (match_operand:HI 1 "const_int_operand" "")) (const_int 0)))] "! TARGET_PARTIAL_REG_STALL && reload_completed - && ix86_match_ccmode (insn, CCNOmode) - && GET_MODE (operands[0]) == HImode" + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode) + && ! TARGET_FAST_PREFIX + && ! optimize_size" [(set (reg:CCNO 17) (compare:CCNO (and:SI (match_dup 0) (match_dup 1)) (const_int 0)))] @@ -17848,67 +17847,92 @@ ;; Moves for SSE/MMX regs. (define_insn "movv4sf_internal" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))] + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE" ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) (define_insn "movv4si_internal" - [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m") - (match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))] + [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V4SI 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE" ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) (define_insn "movv2di_internal" - [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,m") - (match_operand:V2DI 1 "nonimmediate_operand" "xm,x"))] + [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V2DI 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE" ;; @@@ let's try to use movaps here. - "movdqa\t{%1, %0|%0, %1}" + "@ + pxor\t%0, %0 + movdqa\t{%1, %0|%0, %1} + movdqa\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) (define_insn "movv8qi_internal" - [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m") - (match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))] - "TARGET_MMX" - "movq\t{%1, %0|%0, %1}" + [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V8QI 1 "vector_move_operand" "C,ym,y"))] + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmxmov") (set_attr "mode" "DI")]) (define_insn "movv4hi_internal" - [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m") - (match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))] - "TARGET_MMX" - "movq\t{%1, %0|%0, %1}" + [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V4HI 1 "vector_move_operand" "C,ym,y"))] + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmxmov") (set_attr "mode" "DI")]) (define_insn "movv2si_internal" - [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m") - (match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))] - "TARGET_MMX" - "movq\t{%1, %0|%0, %1}" + [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V2SI 1 "vector_move_operand" "C,ym,y"))] + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmxcvt") (set_attr "mode" "DI")]) (define_insn "movv2sf_internal" - [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m") - (match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))] - "TARGET_3DNOW" - "movq\\t{%1, %0|%0, %1}" + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V2SF 1 "vector_move_operand" "C,ym,y"))] + "TARGET_3DNOW + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmxcvt") (set_attr "mode" "DI")]) (define_expand "movti" - [(set (match_operand:TI 0 "general_operand" "") - (match_operand:TI 1 "general_operand" ""))] + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "nonimmediate_operand" ""))] "TARGET_SSE || TARGET_64BIT" { if (TARGET_64BIT) @@ -17919,35 +17943,44 @@ }) (define_insn "movv2df_internal" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") - (match_operand:V2DF 1 "nonimmediate_operand" "xm,x"))] - "TARGET_SSE2" - ;; @@@ let's try to use movaps here. - "movapd\t{%1, %0|%0, %1}" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + xorpd\t%0, %0 + movapd\t{%1, %0|%0, %1} + movapd\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V2DF")]) (define_insn "movv8hi_internal" - [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") - (match_operand:V8HI 1 "nonimmediate_operand" "xm,x"))] - "TARGET_SSE2" - ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" + [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V8HI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) (define_insn "movv16qi_internal" - [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") - (match_operand:V16QI 1 "nonimmediate_operand" "xm,x"))] - "TARGET_SSE2" - ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" + [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V16QI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) (define_expand "movv2df" - [(set (match_operand:V2DF 0 "general_operand" "") - (match_operand:V2DF 1 "general_operand" ""))] + [(set (match_operand:V2DF 0 "nonimmediate_operand" "") + (match_operand:V2DF 1 "nonimmediate_operand" ""))] "TARGET_SSE2" { ix86_expand_vector_move (V2DFmode, operands); @@ -17955,8 +17988,8 @@ }) (define_expand "movv8hi" - [(set (match_operand:V8HI 0 "general_operand" "") - (match_operand:V8HI 1 "general_operand" ""))] + [(set (match_operand:V8HI 0 "nonimmediate_operand" "") + (match_operand:V8HI 1 "nonimmediate_operand" ""))] "TARGET_SSE2" { ix86_expand_vector_move (V8HImode, operands); @@ -17964,8 +17997,8 @@ }) (define_expand "movv16qi" - [(set (match_operand:V16QI 0 "general_operand" "") - (match_operand:V16QI 1 "general_operand" ""))] + [(set (match_operand:V16QI 0 "nonimmediate_operand" "") + (match_operand:V16QI 1 "nonimmediate_operand" ""))] "TARGET_SSE2" { ix86_expand_vector_move (V16QImode, operands); @@ -17973,8 +18006,8 @@ }) (define_expand "movv4sf" - [(set (match_operand:V4SF 0 "general_operand" "") - (match_operand:V4SF 1 "general_operand" ""))] + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (match_operand:V4SF 1 "nonimmediate_operand" ""))] "TARGET_SSE" { ix86_expand_vector_move (V4SFmode, operands); @@ -17982,8 +18015,8 @@ }) (define_expand "movv4si" - [(set (match_operand:V4SI 0 "general_operand" "") - (match_operand:V4SI 1 "general_operand" ""))] + [(set (match_operand:V4SI 0 "nonimmediate_operand" "") + (match_operand:V4SI 1 "nonimmediate_operand" ""))] "TARGET_SSE" { ix86_expand_vector_move (V4SImode, operands); @@ -17991,8 +18024,8 @@ }) (define_expand "movv2di" - [(set (match_operand:V2DI 0 "general_operand" "") - (match_operand:V2DI 1 "general_operand" ""))] + [(set (match_operand:V2DI 0 "nonimmediate_operand" "") + (match_operand:V2DI 1 "nonimmediate_operand" ""))] "TARGET_SSE" { ix86_expand_vector_move (V2DImode, operands); @@ -18000,8 +18033,8 @@ }) (define_expand "movv2si" - [(set (match_operand:V2SI 0 "general_operand" "") - (match_operand:V2SI 1 "general_operand" ""))] + [(set (match_operand:V2SI 0 "nonimmediate_operand" "") + (match_operand:V2SI 1 "nonimmediate_operand" ""))] "TARGET_MMX" { ix86_expand_vector_move (V2SImode, operands); @@ -18009,8 +18042,8 @@ }) (define_expand "movv4hi" - [(set (match_operand:V4HI 0 "general_operand" "") - (match_operand:V4HI 1 "general_operand" ""))] + [(set (match_operand:V4HI 0 "nonimmediate_operand" "") + (match_operand:V4HI 1 "nonimmediate_operand" ""))] "TARGET_MMX" { ix86_expand_vector_move (V4HImode, operands); @@ -18018,8 +18051,8 @@ }) (define_expand "movv8qi" - [(set (match_operand:V8QI 0 "general_operand" "") - (match_operand:V8QI 1 "general_operand" ""))] + [(set (match_operand:V8QI 0 "nonimmediate_operand" "") + (match_operand:V8QI 1 "nonimmediate_operand" ""))] "TARGET_MMX" { ix86_expand_vector_move (V8QImode, operands); @@ -18027,14 +18060,97 @@ }) (define_expand "movv2sf" - [(set (match_operand:V2SF 0 "general_operand" "") - (match_operand:V2SF 1 "general_operand" ""))] + [(set (match_operand:V2SF 0 "nonimmediate_operand" "") + (match_operand:V2SF 1 "nonimmediate_operand" ""))] "TARGET_3DNOW" { ix86_expand_vector_move (V2SFmode, operands); DONE; }) +(define_insn "*pushv2df" + [(set (match_operand:V2DF 0 "push_operand" "=<") + (match_operand:V2DF 1 "register_operand" "x"))] + "TARGET_SSE" + "#") + +(define_insn "*pushv2di" + [(set (match_operand:V2DI 0 "push_operand" "=<") + (match_operand:V2DI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv8hi" + [(set (match_operand:V8HI 0 "push_operand" "=<") + (match_operand:V8HI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv16qi" + [(set (match_operand:V16QI 0 "push_operand" "=<") + (match_operand:V16QI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv4sf" + [(set (match_operand:V4SF 0 "push_operand" "=<") + (match_operand:V4SF 1 "register_operand" "x"))] + "TARGET_SSE" + "#") + +(define_insn "*pushv4si" + [(set (match_operand:V4SI 0 "push_operand" "=<") + (match_operand:V4SI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv2si" + [(set (match_operand:V2SI 0 "push_operand" "=<") + (match_operand:V2SI 1 "register_operand" "y"))] + "TARGET_MMX" + "#") + +(define_insn "*pushv4hi" + [(set (match_operand:V4HI 0 "push_operand" "=<") + (match_operand:V4HI 1 "register_operand" "y"))] + "TARGET_MMX" + "#") + +(define_insn "*pushv8qi" + [(set (match_operand:V8QI 0 "push_operand" "=<") + (match_operand:V8QI 1 "register_operand" "y"))] + "TARGET_MMX" + "#") + +(define_insn "*pushv2sf" + [(set (match_operand:V2SF 0 "push_operand" "=<") + (match_operand:V2SF 1 "register_operand" "y"))] + "TARGET_3DNOW" + "#") + +(define_split + [(set (match_operand 0 "push_operand" "") + (match_operand 1 "register_operand" ""))] + "!TARGET_64BIT && reload_completed + && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (match_dup 3))) + (set (match_dup 2) (match_dup 1))] + "operands[2] = change_address (operands[0], GET_MODE (operands[0]), + stack_pointer_rtx); + operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));") + +(define_split + [(set (match_operand 0 "push_operand" "") + (match_operand 1 "register_operand" ""))] + "TARGET_64BIT && reload_completed + && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))" + [(set (reg:DI 7) (plus:DI (reg:DI 7) (match_dup 3))) + (set (match_dup 2) (match_dup 1))] + "operands[2] = change_address (operands[0], GET_MODE (operands[0]), + stack_pointer_rtx); + operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));") + + (define_insn_and_split "*pushti" [(set (match_operand:TI 0 "push_operand" "=<") (match_operand:TI 1 "nonmemory_operand" "x"))] @@ -18158,8 +18274,9 @@ (define_insn "movti_internal" [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") - (match_operand:TI 1 "general_operand" "C,xm,x"))] - "TARGET_SSE && !TARGET_64BIT" + (match_operand:TI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE && !TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "@ xorps\t%0, %0 movaps\t{%1, %0|%0, %1} @@ -18169,7 +18286,7 @@ (define_insn "*movti_rex64" [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x") - (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))] + (match_operand:TI 1 "general_operand" "riFo,riF,C,x,m"))] "TARGET_64BIT && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "@ @@ -18191,29 +18308,56 @@ ;; These two patterns are useful for specifying exactly whether to use ;; movaps or movups -(define_insn "sse_movaps" +(define_expand "sse_movaps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")] + UNSPEC_MOVA))] + "TARGET_SSE" +{ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + { + rtx tmp = gen_reg_rtx (V4SFmode); + emit_insn (gen_sse_movaps (tmp, operands[1])); + emit_move_insn (operands[0], tmp); + DONE; + } +}) + +(define_insn "*sse_movaps_1" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVA))] - "TARGET_SSE" - "@ - movaps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1}" + "TARGET_SSE + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov,ssemov") (set_attr "mode" "V4SF")]) -(define_insn "sse_movups" +(define_expand "sse_movups" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")] + UNSPEC_MOVU))] + "TARGET_SSE" +{ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + { + rtx tmp = gen_reg_rtx (V4SFmode); + emit_insn (gen_sse_movups (tmp, operands[1])); + emit_move_insn (operands[0], tmp); + DONE; + } +}) + +(define_insn "*sse_movups_1" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVU))] - "TARGET_SSE" - "@ - movups\t{%1, %0|%0, %1} - movups\t{%1, %0|%0, %1}" + "TARGET_SSE + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movups\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt,ssecvt") (set_attr "mode" "V4SF")]) - ;; SSE Strange Moves. (define_insn "sse_movmskps" @@ -18329,11 +18473,21 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "V4SF")]) -(define_insn "sse_loadss" +(define_expand "sse_loadss" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:SF 1 "memory_operand" "")] + "TARGET_SSE" +{ + emit_insn (gen_sse_loadss_1 (operands[0], operands[1], + CONST0_RTX (V4SFmode))); + DONE; +}) + +(define_insn "sse_loadss_1" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_merge:V4SF - (match_operand:V4SF 1 "memory_operand" "m") - (vec_duplicate:V4SF (float:SF (const_int 0))) + (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m")) + (match_operand:V4SF 2 "const0_operand" "X") (const_int 1)))] "TARGET_SSE" "movss\t{%1, %0|%0, %1}" @@ -18804,7 +18958,7 @@ (define_insn "sse2_nandv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x") - (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")) + (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "0")) (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" @@ -18908,7 +19062,7 @@ (match_operator:V4SI 3 "sse_comparison_operator" [(match_operand:V4SF 1 "register_operand" "0") (match_operand:V4SF 2 "register_operand" "x")]) - (match_dup 1) + (subreg:V4SI (match_dup 1) 0) (const_int 1)))] "TARGET_SSE" "cmp%D3ss\t{%2, %0|%0, %2}" @@ -19093,6 +19247,19 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "SF")]) +(define_insn "cvtsi2ssq" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0,0") + (vec_duplicate:V4SF + (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) + (const_int 14)))] + "TARGET_SSE && TARGET_64BIT" + "cvtsi2ssq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "athlon_decode" "vector,vector") + (set_attr "mode" "SF")]) + (define_insn "cvtss2si" [(set (match_operand:SI 0 "register_operand" "=r") (vec_select:SI @@ -19103,6 +19270,17 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "SF")]) +(define_insn "cvtss2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (vec_select:DI + (fix:V4DI (match_operand:V4SF 1 "nonimmediate_operand" "x,m")) + (parallel [(const_int 0)])))] + "TARGET_SSE" + "cvtss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "athlon_decode" "vector,vector") + (set_attr "mode" "SF")]) + (define_insn "cvttss2si" [(set (match_operand:SI 0 "register_operand" "=r") (vec_select:SI @@ -19114,6 +19292,18 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "SF")]) +(define_insn "cvttss2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (vec_select:DI + (unspec:V4DI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")] + UNSPEC_FIX) + (parallel [(const_int 0)])))] + "TARGET_SSE && TARGET_64BIT" + "cvttss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "vector,vector")]) + ;; MMX insns @@ -19121,7 +19311,7 @@ (define_insn "addv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") - (plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddb\t{%2, %0|%0, %2}" @@ -19130,7 +19320,7 @@ (define_insn "addv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") - (plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddw\t{%2, %0|%0, %2}" @@ -19139,16 +19329,27 @@ (define_insn "addv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") - (plus:V2SI (match_operand:V2SI 1 "register_operand" "0") + (plus:V2SI (match_operand:V2SI 1 "register_operand" "%0") (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddd\t{%2, %0|%0, %2}" [(set_attr "type" "mmxadd") (set_attr "mode" "DI")]) +(define_insn "mmx_adddi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(plus:DI (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] + "TARGET_MMX" + "paddq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + (define_insn "ssaddv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") - (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddsb\t{%2, %0|%0, %2}" @@ -19157,7 +19358,7 @@ (define_insn "ssaddv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") - (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddsw\t{%2, %0|%0, %2}" @@ -19166,7 +19367,7 @@ (define_insn "usaddv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") - (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddusb\t{%2, %0|%0, %2}" @@ -19175,7 +19376,7 @@ (define_insn "usaddv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") - (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddusw\t{%2, %0|%0, %2}" @@ -19209,6 +19410,17 @@ [(set_attr "type" "mmxadd") (set_attr "mode" "DI")]) +(define_insn "mmx_subdi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(minus:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] + "TARGET_MMX" + "psubq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + (define_insn "sssubv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") @@ -19312,7 +19524,7 @@ (define_insn "mmx_iordi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI - [(ior:DI (match_operand:DI 1 "register_operand" "0") + [(ior:DI (match_operand:DI 1 "register_operand" "%0") (match_operand:DI 2 "nonimmediate_operand" "ym"))] UNSPEC_NOP))] "TARGET_MMX" @@ -19323,7 +19535,7 @@ (define_insn "mmx_xordi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI - [(xor:DI (match_operand:DI 1 "register_operand" "0") + [(xor:DI (match_operand:DI 1 "register_operand" "%0") (match_operand:DI 2 "nonimmediate_operand" "ym"))] UNSPEC_NOP))] "TARGET_MMX" @@ -19346,7 +19558,7 @@ (define_insn "mmx_anddi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI - [(and:DI (match_operand:DI 1 "register_operand" "0") + [(and:DI (match_operand:DI 1 "register_operand" "%0") (match_operand:DI 2 "nonimmediate_operand" "ym"))] UNSPEC_NOP))] "TARGET_MMX" @@ -19805,17 +20017,17 @@ (define_insn "ldmxcsr" [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] UNSPECV_LDMXCSR)] - "TARGET_MMX" + "TARGET_SSE" "ldmxcsr\t%0" - [(set_attr "type" "mmx") + [(set_attr "type" "sse") (set_attr "memory" "load")]) (define_insn "stmxcsr" [(set (match_operand:SI 0 "memory_operand" "=m") (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] - "TARGET_MMX" + "TARGET_SSE" "stmxcsr\t%0" - [(set_attr "type" "mmx") + [(set_attr "type" "sse") (set_attr "memory" "store")]) (define_expand "sfence" @@ -20471,7 +20683,7 @@ (match_operator:V2DI 3 "sse_comparison_operator" [(match_operand:V2DF 1 "register_operand" "0") (match_operand:V2DF 2 "nonimmediate_operand" "x")]) - (match_dup 1) + (subreg:V2DI (match_dup 1) 0) (const_int 1)))] "TARGET_SSE2" "cmp%D3sd\t{%2, %0|%0, %2}" @@ -20692,6 +20904,15 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "SI")]) +(define_insn "cvtsd2siq" + [(set (match_operand:DI 0 "register_operand" "=r") + (fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_SSE2 && TARGET_64BIT" + "cvtsd2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SI")]) + (define_insn "cvttsd2si" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "xm") @@ -20701,6 +20922,16 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "SI")]) +(define_insn "cvttsd2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (unspec:DI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm") + (parallel [(const_int 0)]))] UNSPEC_FIX))] + "TARGET_SSE2 && TARGET_64BIT" + "cvttsd2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "vector,vector")]) + (define_insn "cvtsi2sd" [(set (match_operand:V2DF 0 "register_operand" "=x") (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0") @@ -20713,6 +20944,19 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "DF")]) +(define_insn "cvtsi2sdq" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0") + (vec_duplicate:V2DF + (float:DF + (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) + (const_int 2)))] + "TARGET_SSE2 && TARGET_64BIT" + "cvtsi2sdq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "vector,direct")]) + ;; Conversions between SF and DF (define_insn "cvtsd2ss" @@ -20770,7 +21014,7 @@ (define_insn "addv16qi3" [(set (match_operand:V16QI 0 "register_operand" "=x") - (plus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddb\t{%2, %0|%0, %2}" @@ -20779,7 +21023,7 @@ (define_insn "addv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") - (plus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddw\t{%2, %0|%0, %2}" @@ -20788,7 +21032,7 @@ (define_insn "addv4si3" [(set (match_operand:V4SI 0 "register_operand" "=x") - (plus:V4SI (match_operand:V4SI 1 "register_operand" "0") + (plus:V4SI (match_operand:V4SI 1 "register_operand" "%0") (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddd\t{%2, %0|%0, %2}" @@ -20797,7 +21041,7 @@ (define_insn "addv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x") - (plus:V2DI (match_operand:V2DI 1 "register_operand" "0") + (plus:V2DI (match_operand:V2DI 1 "register_operand" "%0") (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddq\t{%2, %0|%0, %2}" @@ -20806,7 +21050,7 @@ (define_insn "ssaddv16qi3" [(set (match_operand:V16QI 0 "register_operand" "=x") - (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddsb\t{%2, %0|%0, %2}" @@ -20815,7 +21059,7 @@ (define_insn "ssaddv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") - (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddsw\t{%2, %0|%0, %2}" @@ -20824,7 +21068,7 @@ (define_insn "usaddv16qi3" [(set (match_operand:V16QI 0 "register_operand" "=x") - (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddusb\t{%2, %0|%0, %2}" @@ -20833,7 +21077,7 @@ (define_insn "usaddv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") - (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddusw\t{%2, %0|%0, %2}" @@ -21069,7 +21313,8 @@ [(set (match_operand:V8HI 0 "register_operand" "=x") (vec_merge:V8HI (match_operand:V8HI 1 "register_operand" "0") (vec_duplicate:V8HI - (match_operand:SI 2 "nonimmediate_operand" "rm")) + (truncate:HI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) (match_operand:SI 3 "immediate_operand" "i")))] "TARGET_SSE2" "pinsrw\t{%3, %2, %0|%0, %2, %3}" @@ -21218,7 +21463,7 @@ (define_insn "ashrv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psraw\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21227,7 +21472,7 @@ (define_insn "ashrv4si3" [(set (match_operand:V4SI 0 "register_operand" "=x") (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psrad\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21236,7 +21481,7 @@ (define_insn "lshrv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psrlw\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21245,7 +21490,7 @@ (define_insn "lshrv4si3" [(set (match_operand:V4SI 0 "register_operand" "=x") (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psrld\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21254,7 +21499,7 @@ (define_insn "lshrv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x") (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psrlq\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21263,7 +21508,7 @@ (define_insn "ashlv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psllw\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21272,7 +21517,7 @@ (define_insn "ashlv4si3" [(set (match_operand:V4SI 0 "register_operand" "=x") (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "pslld\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21281,7 +21526,7 @@ (define_insn "ashlv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x") (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psllq\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21595,45 +21840,41 @@ (define_insn "sse2_movapd" [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") - (unspec:V2DF [(match_operand:V2DF 1 "general_operand" "xm,x")] + (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVA))] - "TARGET_SSE2" - "@ - movapd\t{%1, %0|%0, %1} - movapd\t{%1, %0|%0, %1}" + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movapd\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V2DF")]) (define_insn "sse2_movupd" [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") - (unspec:V2DF [(match_operand:V2DF 1 "general_operand" "xm,x")] + (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVU))] - "TARGET_SSE2" - "@ - movupd\t{%1, %0|%0, %1} - movupd\t{%1, %0|%0, %1}" + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movupd\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "mode" "V2DF")]) (define_insn "sse2_movdqa" [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") - (unspec:V16QI [(match_operand:V16QI 1 "general_operand" "xm,x")] + (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVA))] - "TARGET_SSE2" - "@ - movdqa\t{%1, %0|%0, %1} - movdqa\t{%1, %0|%0, %1}" + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movdqa\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "TI")]) (define_insn "sse2_movdqu" [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") - (unspec:V16QI [(match_operand:V16QI 1 "general_operand" "xm,x")] + (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVU))] - "TARGET_SSE2" - "@ - movdqu\t{%1, %0|%0, %1} - movdqu\t{%1, %0|%0, %1}" + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movdqu\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "mode" "TI")]) @@ -21641,24 +21882,48 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y") (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x") (parallel [(const_int 0)])))] - "TARGET_SSE2" + "TARGET_SSE2 && !TARGET_64BIT" "@ movq\t{%1, %0|%0, %1} movdq2q\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "mode" "TI")]) +(define_insn "sse2_movdq2q_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y,r") + (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x,x") + (parallel [(const_int 0)])))] + "TARGET_SSE2 && TARGET_64BIT" + "@ + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + (define_insn "sse2_movq2dq" [(set (match_operand:V2DI 0 "register_operand" "=x,?x") (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y") (const_int 0)))] - "TARGET_SSE2" + "TARGET_SSE2 && !TARGET_64BIT" "@ movq\t{%1, %0|%0, %1} movq2dq\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt,ssemov") (set_attr "mode" "TI")]) +(define_insn "sse2_movq2dq_rex64" + [(set (match_operand:V2DI 0 "register_operand" "=x,?x,?x") + (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y,r") + (const_int 0)))] + "TARGET_SSE2 && TARGET_64BIT" + "@ + movq\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt,ssemov,ssecvt") + (set_attr "mode" "TI")]) + (define_insn "sse2_movq" [(set (match_operand:V2DI 0 "register_operand" "=x") (vec_concat:V2DI (vec_select:DI @@ -21673,7 +21938,7 @@ (define_insn "sse2_loadd" [(set (match_operand:V4SI 0 "register_operand" "=x") (vec_merge:V4SI - (vec_duplicate:V4HI (match_operand:SI 1 "nonimmediate_operand" "mr")) + (vec_duplicate:V4SI (match_operand:SI 1 "nonimmediate_operand" "mr")) (const_vector:V4SI [(const_int 0) (const_int 0) (const_int 0) @@ -21716,11 +21981,21 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "V2DF")]) -(define_insn "sse2_loadsd" +(define_expand "sse2_loadsd" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand:DF 1 "memory_operand" "")] + "TARGET_SSE2" +{ + emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1], + CONST0_RTX (V2DFmode))); + DONE; +}) + +(define_insn "sse2_loadsd_1" [(set (match_operand:V2DF 0 "register_operand" "=x") (vec_merge:V2DF - (match_operand:DF 1 "memory_operand" "m") - (vec_duplicate:DF (float:DF (const_int 0))) + (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")) + (match_operand:V2DF 2 "const0_operand" "X") (const_int 1)))] "TARGET_SSE2" "movsd\t{%1, %0|%0, %1}" diff --git a/gcc/config/i386/linux64.h b/gcc/config/i386/linux64.h index ae346e6d518..7a9e0ba989f 100644 --- a/gcc/config/i386/linux64.h +++ b/gcc/config/i386/linux64.h @@ -1,5 +1,5 @@ /* Definitions for AMD x86-64 running Linux-based GNU systems with ELF format. - Copyright (C) 2001, 2002 Free Software Foundation, Inc. + Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc. Contributed by Jan Hubicka <jh@suse.cz>, based on linux.h. This file is part of GNU CC. @@ -36,6 +36,11 @@ Boston, MA 02111-1307, USA. */ builtin_define ("__PIC__"); \ builtin_define ("__pic__"); \ } \ + if (TARGET_64BIT) \ + { \ + builtin_define ("__LP64__"); \ + builtin_define ("_LP64"); \ + } \ } \ while (0) @@ -116,17 +121,17 @@ Boston, MA 02111-1307, USA. */ (FS)->regs.reg[0].how = REG_SAVED_OFFSET; \ (FS)->regs.reg[0].loc.offset = (long)&sc_->rax - new_cfa_; \ (FS)->regs.reg[1].how = REG_SAVED_OFFSET; \ - (FS)->regs.reg[1].loc.offset = (long)&sc_->rbx - new_cfa_; \ + (FS)->regs.reg[1].loc.offset = (long)&sc_->rdx - new_cfa_; \ (FS)->regs.reg[2].how = REG_SAVED_OFFSET; \ (FS)->regs.reg[2].loc.offset = (long)&sc_->rcx - new_cfa_; \ (FS)->regs.reg[3].how = REG_SAVED_OFFSET; \ - (FS)->regs.reg[3].loc.offset = (long)&sc_->rdx - new_cfa_; \ + (FS)->regs.reg[3].loc.offset = (long)&sc_->rbx - new_cfa_; \ (FS)->regs.reg[4].how = REG_SAVED_OFFSET; \ - (FS)->regs.reg[4].loc.offset = (long)&sc_->rbp - new_cfa_; \ + (FS)->regs.reg[4].loc.offset = (long)&sc_->rsi - new_cfa_; \ (FS)->regs.reg[5].how = REG_SAVED_OFFSET; \ - (FS)->regs.reg[5].loc.offset = (long)&sc_->rsi - new_cfa_; \ + (FS)->regs.reg[5].loc.offset = (long)&sc_->rdi - new_cfa_; \ (FS)->regs.reg[6].how = REG_SAVED_OFFSET; \ - (FS)->regs.reg[6].loc.offset = (long)&sc_->rdi - new_cfa_; \ + (FS)->regs.reg[6].loc.offset = (long)&sc_->rbp - new_cfa_; \ (FS)->regs.reg[8].how = REG_SAVED_OFFSET; \ (FS)->regs.reg[8].loc.offset = (long)&sc_->r8 - new_cfa_; \ (FS)->regs.reg[9].how = REG_SAVED_OFFSET; \ @@ -143,6 +148,8 @@ Boston, MA 02111-1307, USA. */ (FS)->regs.reg[14].loc.offset = (long)&sc_->r14 - new_cfa_; \ (FS)->regs.reg[15].how = REG_SAVED_OFFSET; \ (FS)->regs.reg[15].loc.offset = (long)&sc_->r15 - new_cfa_; \ + (FS)->regs.reg[16].how = REG_SAVED_OFFSET; \ + (FS)->regs.reg[16].loc.offset = (long)&sc_->rip - new_cfa_; \ (FS)->retaddr_column = 16; \ goto SUCCESS; \ } while (0) diff --git a/gcc/config/i386/mingw32.h b/gcc/config/i386/mingw32.h index e7c5e8b6bcc..7f62fbd5624 100644 --- a/gcc/config/i386/mingw32.h +++ b/gcc/config/i386/mingw32.h @@ -1,6 +1,7 @@ /* Operating system specific defines to be used when targeting GCC for hosting on Windows32, using GNU tools and the Windows32 API Library. - Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003 + Free Software Foundation, Inc. This file is part of GNU CC. @@ -89,7 +90,7 @@ Boston, MA 02111-1307, USA. */ /* Include in the mingw32 libraries with libgcc */ #undef LIBGCC_SPEC #define LIBGCC_SPEC \ - "%{mthreads:-lmingwthrd} -lmingw32 -lgcc -lmoldname -lmsvcrt" + "%{mthreads:-lmingwthrd} -lmingw32 -lgcc -lmoldname -lmingwex -lmsvcrt" #undef STARTFILE_SPEC #define STARTFILE_SPEC "%{shared|mdll:dllcrt2%O%s} \ diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h index 52e5195fbaf..7b4aa014645 100644 --- a/gcc/config/i386/mmintrin.h +++ b/gcc/config/i386/mmintrin.h @@ -56,6 +56,22 @@ _mm_cvtsi32_si64 (int __i) return (__m64) __tmp; } +#ifdef __x86_64__ +/* Convert I to a __m64 object. */ +static __inline __m64 +_mm_cvtsi64x_si64 (long long __i) +{ + return (__m64) __i; +} + +/* Convert I to a __m64 object. */ +static __inline __m64 +_mm_set_pi64x (long long __i) +{ + return (__m64) __i; +} +#endif + /* Convert the lower 32 bits of the __m64 object into an integer. */ static __inline int _mm_cvtsi64_si32 (__m64 __i) @@ -64,6 +80,15 @@ _mm_cvtsi64_si32 (__m64 __i) return __tmp; } +#ifdef __x86_64__ +/* Convert the lower 32 bits of the __m64 object into an integer. */ +static __inline long long +_mm_cvtsi64_si64x (__m64 __i) +{ + return (long long)__i; +} +#endif + /* Pack the four 16-bit values from M1 into the lower four 8-bit values of the result, and the four 16-bit values from M2 into the upper four 8-bit values of the result, all with signed saturation. */ @@ -160,6 +185,13 @@ _mm_add_pi32 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2); } +/* Add the 64-bit values in M1 to the 64-bit values in M2. */ +static __inline __m64 +_mm_add_si64 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2); +} + /* Add the 8-bit values in M1 to the 8-bit values in M2 using signed saturated arithmetic. */ static __inline __m64 @@ -213,6 +245,13 @@ _mm_sub_pi32 (__m64 __m1, __m64 __m2) return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2); } +/* Add the 64-bit values in M1 to the 64-bit values in M2. */ +static __inline __m64 +_mm_sub_si64 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2); +} + /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed saturating arithmetic. */ static __inline __m64 diff --git a/gcc/config/i386/scodbx.h b/gcc/config/i386/scodbx.h deleted file mode 100644 index 7da93053256..00000000000 --- a/gcc/config/i386/scodbx.h +++ /dev/null @@ -1,84 +0,0 @@ -/* Definitions for Intel 386 running SCO Unix System V, - using dbx-in-coff encapsulation. - Copyright (C) 1992, 1995, 1996, 1999 Free Software Foundation, Inc. - -This file is part of GNU CC. - -GNU CC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -GNU CC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GNU CC; see the file COPYING. If not, write to -the Free Software Foundation, 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ - -#include "i386/svr3dbx.h" - -/* Overridden defines for SCO systems from sco.h. */ - -/* By default, target has a 80387, uses IEEE compatible arithmetic, - and returns float values in the 387, ie, - (TARGET_80387 | TARGET_FLOAT_RETURNS_IN_80387) - - SCO's software emulation of a 387 fails to handle the `fucomp' - opcode. fucomp is only used when generating IEEE compliant code. - So don't make TARGET_IEEE_FP default for SCO. */ - -#undef TARGET_SUBTARGET_DEFAULT -#define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_FLOAT_RETURNS) - -/* Use crt1.o as a startup file and crtn.o as a closing file. */ - -#undef STARTFILE_SPEC -#define STARTFILE_SPEC \ - "%{!r:%{!z:svr3.ifile%s}%{z:svr3z.ifile%s}}\ - %{pg:gcrt1.o%s}%{!pg:%{p:mcrt1.o%s}%{!p:crt1.o%s}}" - -/* Library spec, including SCO international language support. */ - -#undef LIB_SPEC -#define LIB_SPEC \ - "%{p:-L/usr/lib/libp}%{pg:-L/usr/lib/libp} %{scointl:libintl.a%s} -lc" - -/* Specify predefined symbols in preprocessor. */ - -#undef CPP_PREDEFINES -#define CPP_PREDEFINES "-Dunix -DM_UNIX -DM_I386 -DM_COFF -DM_WORDSWAP -Asystem=svr3" - -#undef CPP_SPEC -#define CPP_SPEC "%(cpp_cpu) %{scointl:-DM_INTERNAT}" - -/* This spec is used for telling cpp whether char is signed or not. */ - -#undef SIGNED_CHAR_SPEC -#if DEFAULT_SIGNED_CHAR -#define SIGNED_CHAR_SPEC \ - "%{funsigned-char:-D__CHAR_UNSIGNED__ -D_CHAR_UNSIGNED}" -#else -#define SIGNED_CHAR_SPEC \ - "%{!fsigned-char:-D__CHAR_UNSIGNED__ -D_CHAR_UNSIGNED}" -#endif - -/* caller has to pop the extra argument passed to functions that return - structures. */ - -#undef RETURN_POPS_ARGS -#define RETURN_POPS_ARGS(FUNDECL,FUNTYPE,SIZE) \ - ((FUNDECL) && TREE_CODE (FUNDECL) == IDENTIFIER_NODE ? 0 \ - : (TARGET_RTD \ - && (TYPE_ARG_TYPES (FUNTYPE) == 0 \ - || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (FUNTYPE))) \ - == void_type_node))) ? (SIZE) \ - : 0) -/* On other 386 systems, the last line looks like this: - : (aggregate_value_p (TREE_TYPE (FUNTYPE))) ? GET_MODE_SIZE (Pmode) : 0) */ - -/* Handle #pragma pack. */ -#define HANDLE_SYSV_PRAGMA diff --git a/gcc/config/i386/t-sco5gas b/gcc/config/i386/t-sco5gas index 2d0b48a6292..edeb554eea0 100644 --- a/gcc/config/i386/t-sco5gas +++ b/gcc/config/i386/t-sco5gas @@ -1,6 +1,6 @@ # The pushl in CTOR initialization interferes with frame pointer elimination. CRTSTUFF_T_CFLAGS = -fPIC -fno-omit-frame-pointer -CRTSTUFF_T_CFLAGS_S = -mcoff -fno-omit-frame-pointer +CRTSTUFF_T_CFLAGS_S = -fno-omit-frame-pointer # # I am still a little unsure of the multilib architecture. The following diff --git a/gcc/config/i386/xm-dgux.h b/gcc/config/i386/xm-dgux.h deleted file mode 100644 index 881c5c7be9d..00000000000 --- a/gcc/config/i386/xm-dgux.h +++ /dev/null @@ -1,4 +0,0 @@ -/* Configuration for GCC for Intel i386 running DG/ux */ - -/* looks just like sysv4 for now */ -#include "xm-svr4.h" diff --git a/gcc/config/i386/xm-sun.h b/gcc/config/i386/xm-sun.h deleted file mode 100644 index 6c0f0a25630..00000000000 --- a/gcc/config/i386/xm-sun.h +++ /dev/null @@ -1,21 +0,0 @@ -/* Configuration for GNU C-compiler for Intel 80386 running SunOS 4.0. - Copyright (C) 1988, 1997 Free Software Foundation, Inc. - -This file is part of GNU CC. - -GNU CC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -GNU CC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GNU CC; see the file COPYING. If not, write to -the Free Software Foundation, 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ - -#define USG diff --git a/gcc/config/i386/xm-sysv3.h b/gcc/config/i386/xm-sysv3.h deleted file mode 100644 index 9a655443ff5..00000000000 --- a/gcc/config/i386/xm-sysv3.h +++ /dev/null @@ -1,3 +0,0 @@ -/* Configuration for GCC for Intel i386 running System V Release 3. */ - -#include "xm-svr3.h" diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index 4136e901795..43a05c1a6ee 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -475,6 +475,16 @@ _mm_cvtss_si32 (__m128 __A) return __builtin_ia32_cvtss2si ((__v4sf) __A); } +#ifdef __x86_64__ +/* Convert the lower SPFP value to a 32-bit integer according to the current + rounding mode. */ +static __inline long long +_mm_cvtss_si64x (__m128 __A) +{ + return __builtin_ia32_cvtss2si64 ((__v4sf) __A); +} +#endif + /* Convert the two lower SPFP values to 32-bit integers according to the current rounding mode. Return the integers in packed form. */ static __inline __m64 @@ -490,6 +500,15 @@ _mm_cvttss_si32 (__m128 __A) return __builtin_ia32_cvttss2si ((__v4sf) __A); } +#ifdef __x86_64__ +/* Truncate the lower SPFP value to a 32-bit integer. */ +static __inline long long +_mm_cvttss_si64x (__m128 __A) +{ + return __builtin_ia32_cvttss2si64 ((__v4sf) __A); +} +#endif + /* Truncate the two lower SPFP values to 32-bit integers. Return the integers in packed form. */ static __inline __m64 @@ -505,6 +524,15 @@ _mm_cvtsi32_ss (__m128 __A, int __B) return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B); } +#ifdef __x86_64__ +/* Convert B to a SPFP value and insert it as element zero in A. */ +static __inline __m128 +_mm_cvtsi64x_ss (__m128 __A, long long __B) +{ + return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B); +} +#endif + /* Convert the two 32-bit values in B to SPFP form and insert them as the two lower elements in A. */ static __inline __m128 @@ -1586,13 +1614,13 @@ _mm_ucomineq_sd (__m128d __A, __m128d __B) static __inline __m128i _mm_load_si128 (__m128i const *__P) { - return (__m128i) __builtin_ia32_loaddqa (__P); + return (__m128i) __builtin_ia32_loaddqa ((char const *)__P); } static __inline __m128i _mm_loadu_si128 (__m128i const *__P) { - return (__m128i) __builtin_ia32_loaddqu (__P); + return (__m128i) __builtin_ia32_loaddqu ((char const *)__P); } static __inline __m128i @@ -1604,13 +1632,13 @@ _mm_loadl_epi64 (__m128i const *__P) static __inline void _mm_store_si128 (__m128i *__P, __m128i __B) { - __builtin_ia32_storedqa (__P, (__v16qi)__B); + __builtin_ia32_storedqa ((char *)__P, (__v16qi)__B); } static __inline void _mm_storeu_si128 (__m128i *__P, __m128i __B) { - __builtin_ia32_storedqu (__P, (__v16qi)__B); + __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B); } static __inline void @@ -1619,6 +1647,12 @@ _mm_storel_epi64 (__m128i *__P, __m128i __B) *(long long *)__P = __builtin_ia32_movdq2q ((__v2di)__B); } +static __inline __m64 +_mm_movepi64_pi64 (__m128i __B) +{ + return (__m64) __builtin_ia32_movdq2q ((__v2di)__B); +} + static __inline __m128i _mm_move_epi64 (__m128i __A) { @@ -1656,6 +1690,24 @@ _mm_set_epi32 (int __Z, int __Y, int __X, int __W) return __u.__v; } + +#ifdef __x86_64__ +/* Create the vector [Z Y]. */ +static __inline __m128i +_mm_set_epi64x (long long __Z, long long __Y) +{ + union { + long __a[2]; + __m128i __v; + } __u; + + __u.__a[0] = __Y; + __u.__a[1] = __Z; + + return __u.__v; +} +#endif + /* Create the vector [S T U V Z Y X W]. */ static __inline __m128i _mm_set_epi16 (short __Z, short __Y, short __X, short __W, @@ -1724,6 +1776,15 @@ _mm_set1_epi32 (int __A) return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0)); } +#ifdef __x86_64__ +static __inline __m128i +_mm_set1_epi64x (long long __A) +{ + __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A); + return (__m128i) __builtin_ia32_shufpd ((__v2df)__tmp, (__v2df)__tmp, _MM_SHUFFLE2 (0,0)); +} +#endif + static __inline __m128i _mm_set1_epi16 (short __A) { @@ -1893,12 +1954,28 @@ _mm_cvtsd_si32 (__m128d __A) return __builtin_ia32_cvtsd2si ((__v2df) __A); } +#ifdef __x86_64__ +static __inline long long +_mm_cvtsd_si64x (__m128d __A) +{ + return __builtin_ia32_cvtsd2si64 ((__v2df) __A); +} +#endif + static __inline int _mm_cvttsd_si32 (__m128d __A) { return __builtin_ia32_cvttsd2si ((__v2df) __A); } +#ifdef __x86_64__ +static __inline long long +_mm_cvttsd_si64x (__m128d __A) +{ + return __builtin_ia32_cvttsd2si64 ((__v2df) __A); +} +#endif + static __inline __m128 _mm_cvtsd_ss (__m128 __A, __m128d __B) { @@ -1911,6 +1988,14 @@ _mm_cvtsi32_sd (__m128d __A, int __B) return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B); } +#ifdef __x86_64__ +static __inline __m128d +_mm_cvtsi64x_sd (__m128d __A, long long __B) +{ + return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); +} +#endif + static __inline __m128d _mm_cvtss_sd (__m128d __A, __m128 __B) { @@ -2048,7 +2133,7 @@ _mm_add_epi32 (__m128i __A, __m128i __B) static __inline __m128i _mm_add_epi64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_paddq128 ((__v4si)__A, (__v4si)__B); + return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B); } static __inline __m128i @@ -2096,7 +2181,7 @@ _mm_sub_epi32 (__m128i __A, __m128i __B) static __inline __m128i _mm_sub_epi64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_psubq128 ((__v4si)__A, (__v4si)__B); + return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B); } static __inline __m128i @@ -2142,7 +2227,7 @@ _mm_mullo_epi16 (__m128i __A, __m128i __B) } static __inline __m64 -_mm_mul_pu16 (__m64 __A, __m64 __B) +_mm_mul_su32 (__m64 __A, __m64 __B) { return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B); } @@ -2459,6 +2544,14 @@ _mm_cvtsi32_si128 (int __A) return (__m128i) __builtin_ia32_loadd (&__A); } +#ifdef __x86_64__ +static __inline __m128i +_mm_cvtsi64x_si128 (long long __A) +{ + return (__m128i) __builtin_ia32_movq2dq (__A); +} +#endif + static __inline int _mm_cvtsi128_si32 (__m128i __A) { @@ -2467,6 +2560,14 @@ _mm_cvtsi128_si32 (__m128i __A) return __tmp; } +#ifdef __x86_64__ +static __inline long long +_mm_cvtsi128_si64x (__m128i __A) +{ + return __builtin_ia32_movdq2q ((__v2di)__A); +} +#endif + #endif /* __SSE2__ */ #endif /* __SSE__ */ |