diff options
Diffstat (limited to 'gcc/config/i386/i386.c')
-rw-r--r-- | gcc/config/i386/i386.c | 362 |
1 files changed, 300 insertions, 62 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 42f6d93d3c3..0f8c8e4c3fc 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1,6 +1,6 @@ /* Subroutines used for code generation on IA-32. Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, - 2002 Free Software Foundation, Inc. + 2002, 2003 Free Software Foundation, Inc. This file is part of GNU CC. @@ -799,6 +799,7 @@ const struct attribute_spec ix86_attribute_table[]; static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *)); static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *)); static int ix86_value_regno PARAMS ((enum machine_mode)); +static bool contains_128bit_aligned_vector_p PARAMS ((tree)); #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION) static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int)); @@ -911,6 +912,12 @@ static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class, struct gcc_target targetm = TARGET_INITIALIZER; +/* The svr4 ABI for the i386 says that records and unions are returned + in memory. */ +#ifndef DEFAULT_PCC_STRUCT_RETURN +#define DEFAULT_PCC_STRUCT_RETURN 1 +#endif + /* Sometimes certain combinations of command options do not make sense on a particular target machine. You can define a macro `OVERRIDE_OPTIONS' to take account of this. This macro, if @@ -1021,7 +1028,7 @@ override_options () if (flag_asynchronous_unwind_tables == 2) flag_asynchronous_unwind_tables = 0; if (flag_pcc_struct_return == 2) - flag_pcc_struct_return = 1; + flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; } #ifdef SUBTARGET_OVERRIDE_OPTIONS @@ -2252,6 +2259,9 @@ function_arg (cum, mode, type, named) break; case BLKmode: + if (bytes < 0) + break; + /* FALLTHRU */ case DImode: case SImode: case HImode: @@ -2282,6 +2292,90 @@ function_arg (cum, mode, type, named) return ret; } +/* Return true when TYPE should be 128bit aligned for 32bit argument passing + ABI */ +static bool +contains_128bit_aligned_vector_p (type) + tree type; +{ + enum machine_mode mode = TYPE_MODE (type); + if (SSE_REG_MODE_P (mode) + && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) + return true; + if (TYPE_ALIGN (type) < 128) + return false; + + if (AGGREGATE_TYPE_P (type)) + { + /* Walk the agregates recursivly. */ + if (TREE_CODE (type) == RECORD_TYPE + || TREE_CODE (type) == UNION_TYPE + || TREE_CODE (type) == QUAL_UNION_TYPE) + { + tree field; + + if (TYPE_BINFO (type) != NULL + && TYPE_BINFO_BASETYPES (type) != NULL) + { + tree bases = TYPE_BINFO_BASETYPES (type); + int n_bases = TREE_VEC_LENGTH (bases); + int i; + + for (i = 0; i < n_bases; ++i) + { + tree binfo = TREE_VEC_ELT (bases, i); + tree type = BINFO_TYPE (binfo); + + if (contains_128bit_aligned_vector_p (type)) + return true; + } + } + /* And now merge the fields of structure. */ + for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + { + if (TREE_CODE (field) == FIELD_DECL + && contains_128bit_aligned_vector_p (TREE_TYPE (field))) + return true; + } + } + /* Just for use if some languages passes arrays by value. */ + else if (TREE_CODE (type) == ARRAY_TYPE) + { + if (contains_128bit_aligned_vector_p (TREE_TYPE (type))) + return true; + } + else + abort (); + } + return false; +} + +/* A C expression that indicates when an argument must be passed by + reference. If nonzero for an argument, a copy of that argument is + made in memory and a pointer to the argument is passed instead of + the argument itself. The pointer is passed in whatever way is + appropriate for passing a pointer to that type. */ + +int +function_arg_pass_by_reference (cum, mode, type, named) + CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED; + enum machine_mode mode ATTRIBUTE_UNUSED; + tree type; + int named ATTRIBUTE_UNUSED; +{ + if (!TARGET_64BIT) + return 0; + + if (type && int_size_in_bytes (type) == -1) + { + if (TARGET_DEBUG_ARG) + fprintf (stderr, "function_arg_pass_by_reference\n"); + return 1; + } + + return 0; +} + /* Gives the alignment boundary, in bits, of an argument with the specified mode and type. */ @@ -2291,14 +2385,34 @@ ix86_function_arg_boundary (mode, type) tree type; { int align; - if (!TARGET_64BIT) - return PARM_BOUNDARY; if (type) align = TYPE_ALIGN (type); else align = GET_MODE_ALIGNMENT (mode); if (align < PARM_BOUNDARY) align = PARM_BOUNDARY; + if (!TARGET_64BIT) + { + /* i386 ABI defines all arguments to be 4 byte aligned. We have to + make an exception for SSE modes since these require 128bit + alignment. + + The handling here differs from field_alignment. ICC aligns MMX + arguments to 4 byte boundaries, while structure fields are aligned + to 8 byte boundaries. */ + if (!type) + { + if (!SSE_REG_MODE_P (mode)) + align = PARM_BOUNDARY; + } + else + { + if (!contains_128bit_aligned_vector_p (type)) + align = PARM_BOUNDARY; + } + if (align != PARM_BOUNDARY && !TARGET_SSE) + abort(); + } if (align > 128) align = 128; return align; @@ -2488,6 +2602,8 @@ ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl) /* Indicate to allocate space on the stack for varargs save area. */ ix86_save_varrargs_registers = 1; + cfun->stack_alignment_needed = 128; + fntype = TREE_TYPE (current_function_decl); stdarg_p = (TYPE_ARG_TYPES (fntype) != 0 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) @@ -2637,6 +2753,7 @@ ix86_va_arg (valist, type) rtx lab_false, lab_over = NULL_RTX; rtx addr_rtx, r; rtx container; + int indirect_p = 0; /* Only 64bit target needs something special. */ if (!TARGET_64BIT) @@ -2656,6 +2773,13 @@ ix86_va_arg (valist, type) sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav); size = int_size_in_bytes (type); + if (size == -1) + { + /* Passed by reference. */ + indirect_p = 1; + type = build_pointer_type (type); + size = int_size_in_bytes (type); + } rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; container = construct_container (TYPE_MODE (type), type, 0, @@ -2760,10 +2884,12 @@ ix86_va_arg (valist, type) { int i; rtx mem; + rtx x; /* Never use the memory itself, as it has the alias set. */ - addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0); - mem = gen_rtx_MEM (BLKmode, addr_rtx); + x = XEXP (assign_temp (type, 0, 1, 0), 0); + mem = gen_rtx_MEM (BLKmode, x); + force_operand (x, addr_rtx); set_mem_alias_set (mem, get_varargs_alias_set ()); set_mem_align (mem, BITS_PER_UNIT); @@ -2846,6 +2972,13 @@ ix86_va_arg (valist, type) if (container) emit_label (lab_over); + if (indirect_p) + { + r = gen_rtx_MEM (Pmode, addr_rtx); + set_mem_alias_set (r, get_varargs_alias_set ()); + emit_move_insn (addr_rtx, r); + } + return addr_rtx; } @@ -3401,6 +3534,19 @@ non_q_regs_operand (op, mode) return NON_QI_REG_P (op); } +/* Return 1 when OP is operand acceptable for standard SSE move. */ +int +vector_move_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (nonimmediate_operand (op, mode)) + return 1; + if (GET_MODE (op) != mode && mode != VOIDmode) + return 0; + return (op == CONST0_RTX (GET_MODE (op))); +} + /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS insns. */ int @@ -4225,7 +4371,8 @@ ix86_save_reg (regno, maybe_eh_return) && regno == REAL_PIC_OFFSET_TABLE_REGNUM && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM] || current_function_profile - || current_function_calls_eh_return)) + || current_function_calls_eh_return + || current_function_uses_const_pool)) { if (ix86_select_alt_pic_regnum () != INVALID_REGNUM) return 0; @@ -4516,6 +4663,10 @@ ix86_expand_prologue () CALL_INSN_FUNCTION_USAGE (insn) = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0), CALL_INSN_FUNCTION_USAGE (insn)); + + /* Don't allow scheduling pass to move insns across __alloca + call. */ + emit_insn (gen_blockage (const0_rtx)); } if (use_mov) { @@ -7484,12 +7635,12 @@ output_fp_compare (insn, operands, eflags_p, unordered_p) if (unordered_p) return "ucomiss\t{%1, %0|%0, %1}"; else - return "comiss\t{%1, %0|%0, %y}"; + return "comiss\t{%1, %0|%0, %1}"; else if (unordered_p) return "ucomisd\t{%1, %0|%0, %1}"; else - return "comisd\t{%1, %0|%0, %y}"; + return "comisd\t{%1, %0|%0, %1}"; } if (! STACK_TOP_P (cmp_op0)) @@ -7777,9 +7928,17 @@ ix86_expand_move (mode, operands) if (strict) ; - else if (GET_CODE (op1) == CONST_DOUBLE - && register_operand (op0, mode)) - op1 = validize_mem (force_const_mem (mode, op1)); + else if (GET_CODE (op1) == CONST_DOUBLE) + { + op1 = validize_mem (force_const_mem (mode, op1)); + if (!register_operand (op0, mode)) + { + rtx temp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, temp, op1)); + emit_move_insn (op0, temp); + return; + } + } } } @@ -7799,8 +7958,12 @@ ix86_expand_vector_move (mode, operands) to handle some of them more efficiently. */ if ((reload_in_progress | reload_completed) == 0 && register_operand (operands[0], mode) - && CONSTANT_P (operands[1])) - operands[1] = force_const_mem (mode, operands[1]); + && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode)) + { + operands[1] = force_const_mem (mode, operands[1]); + emit_move_insn (operands[0], operands[1]); + return; + } /* Make operand1 a register if it isn't already. */ if (!no_new_pseudos @@ -9219,11 +9382,11 @@ ix86_expand_int_movcc (operands) /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics done in proper mode to match. */ if (diff == 1) - tmp = out; + tmp = copy_rtx (out); else { rtx out1; - out1 = out; + out1 = copy_rtx (out); tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); nops++; if (diff & 1) @@ -9241,9 +9404,9 @@ ix86_expand_int_movcc (operands) && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out)) { if (nops == 1) - out = force_operand (tmp, out); + out = force_operand (tmp, copy_rtx (out)); else - emit_insn (gen_rtx_SET (VOIDmode, out, tmp)); + emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp))); } if (out != operands[0]) emit_move_insn (operands[0], copy_rtx (out)); @@ -9822,15 +9985,24 @@ ix86_split_long_move (operands) Do an lea to the last part and use only one colliding move. */ else if (collisions > 1) { + rtx base; + collisions = 1; - emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1], - XEXP (part[1][0], 0))); - part[1][0] = change_address (part[1][0], - TARGET_64BIT ? DImode : SImode, - part[0][nparts - 1]); - part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD); + + base = part[0][nparts - 1]; + + /* Handle the case when the last part isn't valid for lea. + Happens in 64-bit mode storing the 12-byte XFmode. */ + if (GET_MODE (base) != Pmode) + base = gen_rtx_REG (Pmode, REGNO (base)); + + emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); + part[1][0] = replace_equiv_address (part[1][0], base); + part[1][1] = replace_equiv_address (part[1][1], + plus_constant (base, UNITS_PER_WORD)); if (nparts == 3) - part[1][2] = adjust_address (part[1][0], VOIDmode, 8); + part[1][2] = replace_equiv_address (part[1][2], + plus_constant (base, 8)); } } @@ -10973,7 +11145,8 @@ memory_address_length (addr) if (disp) { if (GET_CODE (disp) == CONST_INT - && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')) + && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K') + && base) len = 1; else len = 4; @@ -11036,6 +11209,26 @@ ix86_attr_length_address_default (insn) rtx insn; { int i; + + if (get_attr_type (insn) == TYPE_LEA) + { + rtx set = PATTERN (insn); + if (GET_CODE (set) == SET) + ; + else if (GET_CODE (set) == PARALLEL + && GET_CODE (XVECEXP (set, 0, 0)) == SET) + set = XVECEXP (set, 0, 0); + else + { +#ifdef ENABLE_CHECKING + abort (); +#endif + return 0; + } + + return memory_address_length (SET_SRC (set)); + } + extract_insn_cached (insn); for (i = recog_data.n_operands - 1; i >= 0; --i) if (GET_CODE (recog_data.operand[i]) == MEM) @@ -11834,7 +12027,8 @@ x86_initialize_trampoline (tramp, fnaddr, cxt) #define def_builtin(MASK, NAME, TYPE, CODE) \ do { \ - if ((MASK) & target_flags) \ + if ((MASK) & target_flags \ + && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \ builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ NULL, NULL_TREE); \ } while (0) @@ -11851,6 +12045,8 @@ struct builtin_description /* Used for builtins that are enabled both by -msse and -msse2. */ #define MASK_SSE1 (MASK_SSE | MASK_SSE2) +#define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT) +#define MASK_SSE264 (MASK_SSE2 | MASK_64BIT) static const struct builtin_description bdesc_comi[] = { @@ -11933,9 +12129,11 @@ static const struct builtin_description bdesc_2arg[] = { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 }, { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 }, { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 }, { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 }, { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 }, { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 }, { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 }, { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 }, @@ -11984,6 +12182,7 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, + { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, @@ -12056,11 +12255,11 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 }, { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 }, { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 }, - { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 }, + { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 }, { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 }, { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 }, { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 }, - { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 }, + { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 }, { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 }, { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 }, @@ -12134,6 +12333,7 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, + { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 } }; @@ -12149,8 +12349,10 @@ static const struct builtin_description bdesc_1arg[] = { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, + { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, + { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, @@ -12172,6 +12374,8 @@ static const struct builtin_description bdesc_1arg[] = { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, + { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, + { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, @@ -12197,7 +12401,11 @@ ix86_init_mmx_sse_builtins () size_t i; tree pchar_type_node = build_pointer_type (char_type_node); + tree pcchar_type_node = build_pointer_type ( + build_type_variant (char_type_node, 1, 0)); tree pfloat_type_node = build_pointer_type (float_type_node); + tree pcfloat_type_node = build_pointer_type ( + build_type_variant (float_type_node, 1, 0)); tree pv2si_type_node = build_pointer_type (V2SI_type_node); tree pv2di_type_node = build_pointer_type (V2DI_type_node); tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); @@ -12213,11 +12421,18 @@ ix86_init_mmx_sse_builtins () tree int_ftype_v4sf = build_function_type_list (integer_type_node, V4SF_type_node, NULL_TREE); + tree int64_ftype_v4sf + = build_function_type_list (long_long_integer_type_node, + V4SF_type_node, NULL_TREE); tree int_ftype_v8qi = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); tree v4sf_ftype_v4sf_int = build_function_type_list (V4SF_type_node, V4SF_type_node, integer_type_node, NULL_TREE); + tree v4sf_ftype_v4sf_int64 + = build_function_type_list (V4SF_type_node, + V4SF_type_node, long_long_integer_type_node, + NULL_TREE); tree v4sf_ftype_v4sf_v2si = build_function_type_list (V4SF_type_node, V4SF_type_node, V2SI_type_node, NULL_TREE); @@ -12270,8 +12485,8 @@ ix86_init_mmx_sse_builtins () = build_function_type_list (void_type_node, V8QI_type_node, V8QI_type_node, pchar_type_node, NULL_TREE); - tree v4sf_ftype_pfloat - = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE); + tree v4sf_ftype_pcfloat + = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); /* @@@ the type is bogus */ tree v4sf_ftype_v4sf_pv2si = build_function_type_list (V4SF_type_node, @@ -12326,7 +12541,11 @@ ix86_init_mmx_sse_builtins () = build_function_type_list (V2SI_type_node, V2SF_type_node, V2SF_type_node, NULL_TREE); tree pint_type_node = build_pointer_type (integer_type_node); + tree pcint_type_node = build_pointer_type ( + build_type_variant (integer_type_node, 1, 0)); tree pdouble_type_node = build_pointer_type (double_type_node); + tree pcdouble_type_node = build_pointer_type ( + build_type_variant (double_type_node, 1, 0)); tree int_ftype_v2df_v2df = build_function_type_list (integer_type_node, V2DF_type_node, V2DF_type_node, NULL_TREE); @@ -12338,8 +12557,8 @@ ix86_init_mmx_sse_builtins () tree ti_ftype_ti_ti = build_function_type_list (intTI_type_node, intTI_type_node, intTI_type_node, NULL_TREE); - tree void_ftype_pvoid - = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE); + tree void_ftype_pcvoid + = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); tree v2di_ftype_di = build_function_type_list (V2DI_type_node, long_long_unsigned_type_node, NULL_TREE); @@ -12364,9 +12583,16 @@ ix86_init_mmx_sse_builtins () = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); tree int_ftype_v2df = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); + tree int64_ftype_v2df + = build_function_type_list (long_long_integer_type_node, + V2DF_type_node, NULL_TREE); tree v2df_ftype_v2df_int = build_function_type_list (V2DF_type_node, V2DF_type_node, integer_type_node, NULL_TREE); + tree v2df_ftype_v2df_int64 + = build_function_type_list (V2DF_type_node, + V2DF_type_node, long_long_integer_type_node, + NULL_TREE); tree v4sf_ftype_v4sf_v2df = build_function_type_list (V4SF_type_node, V4SF_type_node, V2DF_type_node, NULL_TREE); @@ -12394,8 +12620,8 @@ ix86_init_mmx_sse_builtins () = build_function_type_list (void_type_node, V16QI_type_node, V16QI_type_node, pchar_type_node, NULL_TREE); - tree v2df_ftype_pdouble - = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE); + tree v2df_ftype_pcdouble + = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE); tree v2df_ftype_v2df_v2df = build_function_type_list (V2DF_type_node, V2DF_type_node, V2DF_type_node, NULL_TREE); @@ -12454,16 +12680,16 @@ ix86_init_mmx_sse_builtins () V16QI_type_node, V16QI_type_node, NULL_TREE); tree int_ftype_v16qi = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); - tree v16qi_ftype_pchar - = build_function_type_list (V16QI_type_node, pchar_type_node, NULL_TREE); + tree v16qi_ftype_pcchar + = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); tree void_ftype_pchar_v16qi = build_function_type_list (void_type_node, pchar_type_node, V16QI_type_node, NULL_TREE); - tree v4si_ftype_pchar - = build_function_type_list (V4SI_type_node, pchar_type_node, NULL_TREE); - tree void_ftype_pchar_v4si + tree v4si_ftype_pcint + = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE); + tree void_ftype_pcint_v4si = build_function_type_list (void_type_node, - pchar_type_node, V4SI_type_node, NULL_TREE); + pcint_type_node, V4SI_type_node, NULL_TREE); tree v2di_ftype_v2di = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); @@ -12539,8 +12765,6 @@ ix86_init_mmx_sse_builtins () /* Add the remaining MMX insns with somewhat more complicated types. */ def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO); def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); - def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); - def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW); def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD); def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ); @@ -12566,21 +12790,26 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW); def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB); + def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); + def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); + def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); + def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); + def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); - def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS); - def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS); - def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS); + def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS); + def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS); + def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS); def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS); def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS); def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS); @@ -12648,9 +12877,9 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ); def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q); - def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD); - def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD); - def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD); def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD); def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD); def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD); @@ -12689,33 +12918,36 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI); def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI); + def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); + def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ); def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD); def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ); def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD); + def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS); def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD); def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1); def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD); def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD); - def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1); - def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1); + def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD); def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1); def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD); - def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH); + def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE); def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); - def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQA); - def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQU); - def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pchar, IX86_BUILTIN_LOADD); + def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA); + def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU); + def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD); def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA); def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU); - def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pchar_v4si, IX86_BUILTIN_STORED); + def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED); def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ); def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI); @@ -12795,6 +13027,13 @@ ix86_expand_binop_builtin (icode, arglist, target) || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); + if (GET_MODE (op1) == SImode && mode1 == TImode) + { + rtx x = gen_reg_rtx (V4SImode); + emit_insn (gen_sse2_loadd (x, op1)); + op1 = gen_lowpart (TImode, x); + } + /* In case the insn wants input operands in modes different from the result, abort. */ if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1) @@ -12837,9 +13076,7 @@ ix86_expand_store_builtin (icode, arglist) op1 = safe_vector_operand (op1, mode1); op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); - - if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); + op1 = copy_to_mode_reg (mode1, op1); pat = GEN_FCN (icode) (op0, op1); if (pat) @@ -13828,9 +14065,10 @@ ix86_hard_regno_mode_ok (regno, mode) if (FP_REGNO_P (regno)) return VALID_FP_MODE_P (mode); if (SSE_REGNO_P (regno)) - return VALID_SSE_REG_MODE (mode); + return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0); if (MMX_REGNO_P (regno)) - return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode); + return (TARGET_MMX + ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0); /* We handle both integer and floats in the general purpose registers. In future we should be able to handle vector modes as well. */ if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode)) @@ -14299,7 +14537,7 @@ x86_function_profiler (file, labelno) else { #ifndef NO_PROFILE_COUNTERS - fprintf (file, "\tmovl\t$%sP%d,%%$s\n", LPREFIX, labelno, + fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno, PROFILE_COUNT_REGISTER); #endif fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); |