diff options
Diffstat (limited to 'gcc/config/i386/i386.c')
-rw-r--r-- | gcc/config/i386/i386.c | 681 |
1 files changed, 590 insertions, 91 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index bc680c80df9..ec08478ba6e 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -48,6 +48,13 @@ Boston, MA 02111-1307, USA. */ #include "langhooks.h" #include "cgraph.h" #include "tree-gimple.h" +/* APPLE LOCAL inline intrinsics with -Os 4037817 */ +#include "params.h" + +/* APPLE LOCAL begin pascal strings */ +#include "../../libcpp/internal.h" +extern struct cpp_reader* parse_in; +/* APPLE LOCAL end pascal strings */ #ifndef CHECK_STACK_LIMIT #define CHECK_STACK_LIMIT (-1) @@ -505,6 +512,34 @@ struct processor_costs nocona_cost = { const struct processor_costs *ix86_cost = &pentium_cost; +/* APPLE LOCAL begin Altivec */ +/* vector types */ +static GTY(()) tree unsigned_V16QI_type_node; +static GTY(()) tree unsigned_V4SI_type_node; +static GTY(()) tree unsigned_V8QI_type_node; +static GTY(()) tree unsigned_V8HI_type_node; +static GTY(()) tree unsigned_V4HI_type_node; +static GTY(()) tree unsigned_V2HI_type_node; +static GTY(()) tree unsigned_V2SI_type_node; +static GTY(()) tree unsigned_V2DI_type_node; +static GTY(()) tree unsigned_V1DI_type_node; + +static GTY(()) tree V16QI_type_node; +static GTY(()) tree V4SF_type_node; +static GTY(()) tree V4SI_type_node; +static GTY(()) tree V8QI_type_node; +static GTY(()) tree V8HI_type_node; +static GTY(()) tree V4HI_type_node; +static GTY(()) tree V2HI_type_node; +static GTY(()) tree V2SI_type_node; +static GTY(()) tree V2SF_type_node; +static GTY(()) tree V2DI_type_node; +static GTY(()) tree V2DF_type_node; +static GTY(()) tree V16SF_type_node; +static GTY(()) tree V1DI_type_node; +static GTY(()) tree V4DF_type_node; +/* APPLE LOCAL end Altivec */ + /* Processor feature/optimization bitmasks. */ #define m_386 (1<<PROCESSOR_I386) #define m_486 (1<<PROCESSOR_I486) @@ -525,6 +560,8 @@ const int x86_double_with_add = ~m_386; const int x86_use_bit_test = m_386; const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6; const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA; +/* APPLE LOCAL mainline 2005-03-16 4054919 */ +const int x86_fisttp = m_NOCONA; const int x86_3dnow_a = m_ATHLON_K8; const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA; /* Branch hints were put in P4 based on simulation result. But @@ -919,6 +956,11 @@ static rtx ix86_struct_value_rtx (tree, int); static bool ix86_ms_bitfield_layout_p (tree); static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *); static int extended_reg_mentioned_1 (rtx *, void *); +/* APPLE LOCAL begin why is this local? */ +#if TARGET_MACHO +static bool ix86_binds_local_p (tree); +#endif +/* APPLE LOCAL end why is this local? */ static bool ix86_rtx_costs (rtx, int, int, int *); static int min_insn_size (rtx); static tree ix86_md_asm_clobbers (tree clobbers); @@ -1030,6 +1072,13 @@ static void init_ext_80387_constants (void); #undef TARGET_MS_BITFIELD_LAYOUT_P #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p +/* APPLE LOCAL begin why is this local? */ +#if TARGET_MACHO +#undef TARGET_BINDS_LOCAL_P +#define TARGET_BINDS_LOCAL_P ix86_binds_local_p +#endif +/* APPLE LOCAL end why is this local? */ + #undef TARGET_ASM_OUTPUT_MI_THUNK #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK @@ -1043,6 +1092,12 @@ static void init_ext_80387_constants (void); #undef TARGET_ADDRESS_COST #define TARGET_ADDRESS_COST ix86_address_cost +/* APPLE LOCAL begin SSE stack alignment */ +#ifndef BASIC_STACK_BOUNDARY +#define BASIC_STACK_BOUNDARY (32) +#endif +/* APPLE LOCAL end SSE stack alignment */ + #undef TARGET_FIXED_CONDITION_CODE_REGS #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs #undef TARGET_CC_MODES_COMPATIBLE @@ -1203,6 +1258,12 @@ override_options (void) SUBTARGET_OVERRIDE_OPTIONS; #endif + /* APPLE LOCAL begin constant cfstrings */ +#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS + SUBSUBTARGET_OVERRIDE_OPTIONS; +#endif + /* APPLE LOCAL end constant cfstrings */ + /* Set the default values for switches whose default depends on TARGET_64BIT in case they weren't overwritten by command line options. */ if (TARGET_64BIT) @@ -1426,9 +1487,11 @@ override_options (void) The default of 128 bits is for Pentium III's SSE __m128, but we don't want additional code to keep the stack aligned when optimizing for code size. */ + /* APPLE LOCAL begin SSE stack alignment */ ix86_preferred_stack_boundary = (optimize_size - ? TARGET_64BIT ? 128 : 32 + ? TARGET_64BIT ? 128 : BASIC_STACK_BOUNDARY : 128); + /* APPLE LOCAL end SSE stack alignment */ if (ix86_preferred_stack_boundary_string) { i = atoi (ix86_preferred_stack_boundary_string); @@ -1514,19 +1577,19 @@ override_options (void) target_flags |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE) & ~target_flags_explicit); - - if (TARGET_SSE) - ix86_fpmath = FPMATH_SSE; +/* APPLE LOCAL begin mainline 2005-04-11 4010614 */ } else { - ix86_fpmath = FPMATH_387; /* i386 ABI does not specify red zone. It still makes sense to use it when programmer takes care to stack from being destroyed. */ if (!(target_flags_explicit & MASK_NO_RED_ZONE)) target_flags |= MASK_NO_RED_ZONE; } + ix86_fpmath = TARGET_FPMATH_DEFAULT; +/* APPLE LOCAL end mainline 2005-04-11 4010614 */ + if (ix86_fpmath_string != 0) { if (! strcmp (ix86_fpmath_string, "387")) @@ -1583,11 +1646,41 @@ override_options (void) so it won't slow down the compilation and make x87 code slower. */ if (!TARGET_SCHEDULE) flag_schedule_insns_after_reload = flag_schedule_insns = 0; + + /* APPLE LOCAL begin dynamic-no-pic */ +#if TARGET_MACHO + if (MACHO_DYNAMIC_NO_PIC_P) + { + if (flag_pic) + warning ("-mdynamic-no-pic overrides -fpic or -fPIC"); + flag_pic = 0; + } + else +#endif + if (flag_pic == 1) + { + /* Darwin doesn't support -fpic. */ + warning ("-fpic is not supported; -fPIC assumed"); + flag_pic = 2; + } + /* APPLE LOCAL end dynamic-no-pic */ } void optimization_options (int level, int size ATTRIBUTE_UNUSED) { + /* APPLE LOCAL begin disable strict aliasing; breaks too much existing code. */ +#if TARGET_MACHO + flag_strict_aliasing = 0; + /* APPLE LOCAL begin inline intrinsics with -Os 4037817 */ + if (optimize_size) + { + set_param_value ("max-inline-insns-single", 30); + set_param_value ("max-inline-insns-auto", 30); + } + /* APPLE LOCAL end inline intrinsics with -Os 4037817 */ +#endif + /* APPLE LOCAL end disable strict aliasing; breaks too much existing code. */ /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to make the problem with not enough registers even worse. */ #ifdef INSN_SCHEDULING @@ -1595,6 +1688,19 @@ optimization_options (int level, int size ATTRIBUTE_UNUSED) flag_schedule_insns = 0; #endif + /* APPLE LOCAL begin pragma fenv */ + /* Trapping math is not needed by many users, and is expensive. + C99 permits us to default it off and we do that. It is + turned on when <fenv.h> is included (see darwin_pragma_fenv + in darwin-c.c). */ + flag_trapping_math = 0; + /* APPLE LOCAL end pragma fenv */ + + /* APPLE LOCAL begin radar 4094534. */ + /* The Darwin libraries never set errno, so we might as well + avoid calling them when that's the only reason we would. */ + flag_errno_math = 0; + /* APPLE LOCAL end radar 4094534. */ /* The default values of these switches depend on the TARGET_64BIT that is not known at this moment. Mark these values with 2 and let user the to override these. In case there is no command line option @@ -1607,6 +1713,43 @@ optimization_options (int level, int size ATTRIBUTE_UNUSED) SUBTARGET_OPTIMIZATION_OPTIONS; #endif } + +/* APPLE LOCAL begin optimization pragmas 3124235/3420242 */ +/* Version of the above for use from #pragma optimization_level. Only + per-function flags are reset. */ + +void +reset_optimization_options (int level, int size ATTRIBUTE_UNUSED) +{ + /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to + make the problem with not enough registers even worse. */ +#ifdef INSN_SCHEDULING + if (level > 1) + flag_schedule_insns = 0; +#endif + + /* APPLE LOCAL begin pragma fenv */ + /* Trapping math is not needed by many users, and is expensive. + C99 permits us to default it off and we do that. It is + turned on when <fenv.h> is included (see darwin_pragma_fenv + in darwin-c.c). */ + flag_trapping_math = 0; + /* APPLE LOCAL end pragma fenv */ + + /* The default values of these switches depend on TARGET_64BIT + which was set earlier and not reset. */ + if (optimize >= 1) + { + if (TARGET_64BIT) + flag_omit_frame_pointer = 1; + else + flag_omit_frame_pointer = 0; + } +#ifdef SUBTARGET_OPTIMIZATION_OPTIONS + SUBTARGET_OPTIMIZATION_OPTIONS; +#endif +} +/* APPLE LOCAL end optimization pragmas 3124235/3420242 */ /* Table of valid machine attributes. */ const struct attribute_spec ix86_attribute_table[] = @@ -1643,11 +1786,14 @@ const struct attribute_spec ix86_attribute_table[] = static bool ix86_function_ok_for_sibcall (tree decl, tree exp) { + /* APPLE LOCAL begin indirect sibcall 4087330 */ /* If we are generating position-independent code, we cannot sibcall optimize any indirect call, or a direct call to a global function, - as the PLT requires %ebx be live. */ - if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl))) + as the PLT requires %ebx be live. (Darwin does not have a PLT.) */ + if (!TARGET_MACHO && !TARGET_64BIT && flag_pic + && (!decl || TREE_PUBLIC (decl))) return false; + /* APPLE LOCAL end indirect sibcall 4087330 */ /* If we are returning floats on the 80387 register stack, we cannot make a sibcall from a function that doesn't return a float to a @@ -2657,6 +2803,11 @@ construct_container (enum machine_mode mode, enum machine_mode orig_mode, abort (); } } + + /* Empty aligned struct, union or class. */ + if (nexps == 0) + return NULL; + ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); for (i = 0; i < nexps; i++) XVECEXP (ret, 0, i) = exp [i]; @@ -3877,7 +4028,8 @@ static int pic_labels_used; static void get_pc_thunk_name (char name[32], unsigned int regno) { - if (USE_HIDDEN_LINKONCE) + /* APPLE LOCAL deep branch prediction pic-base. */ + if (USE_HIDDEN_LINKONCE || TARGET_MACHO) sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]); else ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); @@ -3921,6 +4073,18 @@ ix86_file_end (void) fputc ('\n', asm_out_file); ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); } + /* APPLE LOCAL begin deep branch prediction pic-base */ + else if (TARGET_MACHO) + { + darwin_textcoal_nt_section (); + fputs (".weak_definition\t", asm_out_file); + assemble_name (asm_out_file, name); + fputs ("\n.private_extern\t", asm_out_file); + assemble_name (asm_out_file, name); + fputs ("\n", asm_out_file); + ASM_OUTPUT_LABEL (asm_out_file, name); + } + /* APPLE LOCAL end deep branch prediction pic-base */ else { text_section (); @@ -3976,12 +4140,24 @@ output_set_got (rtx dest) xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); xops[2] = gen_rtx_MEM (QImode, xops[2]); output_asm_insn ("call\t%X2", xops); + /* APPLE LOCAL begin deep branch prediction pic-base */ +#if TARGET_MACHO + /* Output the "canonical" label name ("Lxx$pb") here too. This + is what will be referred to by the Mach-O PIC subsystem. */ + if (cfun) + ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ()); +#endif + /* APPLE LOCAL end deep branch prediction pic-base */ } + /* APPLE LOCAL begin deep branch prediction pic-base */ +#if !TARGET_MACHO if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops); else if (!TARGET_MACHO) output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops); +#endif /* !TARGET_MACHO */ + /* APPLE LOCAL end deep branch prediction pic-base */ return ""; } @@ -4324,6 +4500,12 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style) RTX_FRAME_RELATED_P (insn) = 1; } +/* APPLE LOCAL begin fix-and-continue x86 */ +#ifndef TARGET_FIX_AND_CONTINUE +#define TARGET_FIX_AND_CONTINUE 0 +#endif +/* APPLE LOCAL end fix-and-continue x86 */ + /* Expand the prologue into a bunch of separate insns. */ void @@ -4334,6 +4516,23 @@ ix86_expand_prologue (void) struct ix86_frame frame; HOST_WIDE_INT allocate; + /* APPLE LOCAL begin fix-and-continue x86 */ + if (TARGET_FIX_AND_CONTINUE) + { + /* gdb on darwin arranges to forward a function from the old + address by modifying the first 6 instructions of the function + to branch to the overriding function. This is necessary to + permit function pointers that point to the old function to + actually forward to the new function. */ + emit_insn (gen_nop ()); + emit_insn (gen_nop ()); + emit_insn (gen_nop ()); + emit_insn (gen_nop ()); + emit_insn (gen_nop ()); + emit_insn (gen_nop ()); + } + /* APPLE LOCAL end fix-and-continue x86 */ + ix86_compute_frame_layout (&frame); /* Note: AT&T enter does NOT have reversed args. Enter is probably @@ -4651,6 +4850,24 @@ ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, { if (pic_offset_table_rtx) REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM; + /* APPLE LOCAL begin */ +#if TARGET_MACHO + /* Mach-O doesn't support labels at the end of objects, so if + it looks like we might want one, insert a NOP. */ + { + rtx insn = get_last_insn (); + while (insn + && NOTE_P (insn) + && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL) + insn = PREV_INSN (insn); + if (insn + && (LABEL_P (insn) + || (NOTE_P (insn) + && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL))) + fputs ("\tnop\n", file); + } +#endif + /* APPLE LOCAL end */ } /* Extract the parts of an RTL expression that is a valid memory address @@ -4661,9 +4878,8 @@ ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, int ix86_decompose_address (rtx addr, struct ix86_address *out) { - rtx base = NULL_RTX; - rtx index = NULL_RTX; - rtx disp = NULL_RTX; + rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; + rtx base_reg, index_reg; HOST_WIDE_INT scale = 1; rtx scale_rtx = NULL_RTX; int retval = 1; @@ -4765,34 +4981,37 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) scale = INTVAL (scale_rtx); } + base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base; + index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index; + /* Allow arg pointer and stack pointer as index if there is not scaling. */ - if (base && index && scale == 1 - && (index == arg_pointer_rtx - || index == frame_pointer_rtx - || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM))) + if (base_reg && index_reg && scale == 1 + && (index_reg == arg_pointer_rtx + || index_reg == frame_pointer_rtx + || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM))) { - rtx tmp = base; - base = index; - index = tmp; + rtx tmp; + tmp = base, base = index, index = tmp; + tmp = base_reg, base_reg = index_reg, index_reg = tmp; } /* Special case: %ebp cannot be encoded as a base without a displacement. */ - if ((base == hard_frame_pointer_rtx - || base == frame_pointer_rtx - || base == arg_pointer_rtx) && !disp) + if ((base_reg == hard_frame_pointer_rtx + || base_reg == frame_pointer_rtx + || base_reg == arg_pointer_rtx) && !disp) disp = const0_rtx; /* Special case: on K6, [%esi] makes the instruction vector decoded. Avoid this by transforming to [%esi+0]. */ if (ix86_tune == PROCESSOR_K6 && !optimize_size - && base && !index && !disp - && REG_P (base) - && REGNO_REG_CLASS (REGNO (base)) == SIREG) + && base_reg && !index_reg && !disp + && REG_P (base_reg) + && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG) disp = const0_rtx; /* Special case: encode reg+reg instead of reg*2. */ if (!base && index && scale && scale == 2) - base = index, scale = 1; + base = index, base_reg = index_reg, scale = 1; /* Special case: scaling cannot be encoded without base or displacement. */ if (!base && !disp && index && scale != 1) @@ -4821,6 +5040,11 @@ ix86_address_cost (rtx x) if (!ix86_decompose_address (x, &parts)) abort (); + if (parts.base && GET_CODE (parts.base) == SUBREG) + parts.base = SUBREG_REG (parts.base); + if (parts.index && GET_CODE (parts.index) == SUBREG) + parts.index = SUBREG_REG (parts.index); + /* More complex memory references are better. */ if (parts.disp && parts.disp != const0_rtx) cost--; @@ -4971,7 +5195,21 @@ legitimate_constant_p (rtx x) /* TLS symbols are never valid. */ if (tls_symbolic_operand (x, Pmode)) return false; + /* APPLE LOCAL begin dynamic-no-pic */ + if (TARGET_MACHO && TARGET_DYNAMIC_NO_PIC) + return machopic_symbol_defined_p (x); + break; + + case PLUS: + { + rtx left = XEXP (x, 0); + rtx right = XEXP (x, 1); + bool left_is_constant = legitimate_constant_p (left); + bool right_is_constant = legitimate_constant_p (right); + return left_is_constant && right_is_constant; + } break; + /* APPLE LOCAL end dynamic-no-pic */ default: break; @@ -5170,15 +5408,23 @@ legitimate_address_p (enum machine_mode mode, rtx addr, int strict) /* Validate base register. - Don't allow SUBREG's here, it can lead to spill failures when the base - is one word out of a two word structure, which is represented internally - as a DImode int. */ + Don't allow SUBREG's that span more than a word here. It can lead to spill + failures when the base is one word out of a two word structure, which is + represented internally as a DImode int. */ if (base) { + rtx reg; reason_rtx = base; - - if (GET_CODE (base) != REG) + + if (REG_P (base)) + reg = base; + else if (GET_CODE (base) == SUBREG + && REG_P (SUBREG_REG (base)) + && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base))) + <= UNITS_PER_WORD) + reg = SUBREG_REG (base); + else { reason = "base is not a register"; goto report_error; @@ -5190,8 +5436,8 @@ legitimate_address_p (enum machine_mode mode, rtx addr, int strict) goto report_error; } - if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base)) - || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base))) + if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) + || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) { reason = "base is not valid"; goto report_error; @@ -5200,15 +5446,21 @@ legitimate_address_p (enum machine_mode mode, rtx addr, int strict) /* Validate index register. - Don't allow SUBREG's here, it can lead to spill failures when the index - is one word out of a two word structure, which is represented internally - as a DImode int. */ + Don't allow SUBREG's that span more than a word here -- same as above. */ if (index) { + rtx reg; reason_rtx = index; - if (GET_CODE (index) != REG) + if (REG_P (index)) + reg = index; + else if (GET_CODE (index) == SUBREG + && REG_P (SUBREG_REG (index)) + && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index))) + <= UNITS_PER_WORD) + reg = SUBREG_REG (index); + else { reason = "index is not a register"; goto report_error; @@ -5220,8 +5472,8 @@ legitimate_address_p (enum machine_mode mode, rtx addr, int strict) goto report_error; } - if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index)) - || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index))) + if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) + || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) { reason = "index is not valid"; goto report_error; @@ -5273,7 +5525,8 @@ legitimate_address_p (enum machine_mode mode, rtx addr, int strict) goto report_error; } - else if (flag_pic && (SYMBOLIC_CONST (disp) + /* APPLE LOCAL dynamic-no-pic */ + else if (MACHOPIC_INDIRECT && (SYMBOLIC_CONST (disp) #if TARGET_MACHO && !machopic_operand_p (disp) #endif @@ -5294,11 +5547,18 @@ legitimate_address_p (enum machine_mode mode, rtx addr, int strict) goto report_error; } } - else if (! legitimate_pic_address_disp_p (disp)) + /* APPLE LOCAL begin dynamic-no-pic */ + else if (flag_pic && ! legitimate_pic_address_disp_p (disp)) { reason = "displacement is an invalid pic construct"; goto report_error; } + else if (TARGET_DYNAMIC_NO_PIC && !legitimate_constant_p (disp)) + { + reason = "displacment must be referenced via non_lazy_pointer"; + goto report_error; + } + /* APPLE LOCAL end dynamic-no-pic */ /* This code used to verify that a symbolic pic displacement includes the pic_offset_table_rtx register. @@ -5721,11 +5981,16 @@ legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) if (flag_pic && SYMBOLIC_CONST (x)) return legitimize_pic_address (x, 0); + /* APPLE LOCAL begin dynamic-no-pic */ + if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x)) + return machopic_indirect_data_reference (x, 0); + /* APPLE LOCAL end dynamic-no-pic */ /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ if (GET_CODE (x) == ASHIFT && GET_CODE (XEXP (x, 1)) == CONST_INT - && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4) + /* APPLE LOCAL 4101687 */ + && (log = INTVAL (XEXP (x, 1))) < 4) { changed = 1; x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), @@ -5738,7 +6003,8 @@ legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) if (GET_CODE (XEXP (x, 0)) == ASHIFT && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT - && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4) + /* APPLE LOCAL 4101687 */ + && (log = INTVAL (XEXP (XEXP (x, 0), 1))) < 4) { changed = 1; XEXP (x, 0) = gen_rtx_MULT (Pmode, @@ -5748,7 +6014,8 @@ legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) if (GET_CODE (XEXP (x, 1)) == ASHIFT && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT - && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4) + /* APPLE LOCAL 4101687 */ + && (log = INTVAL (XEXP (XEXP (x, 1), 1))) < 4) { changed = 1; XEXP (x, 1) = gen_rtx_MULT (Pmode, @@ -5891,7 +6158,19 @@ output_pic_addr_const (FILE *file, rtx x, int code) if (SYMBOL_REF_DECL (x)) mark_decl_referenced (SYMBOL_REF_DECL (x)); + /* APPLE LOCAL begin stubify optimized symbols */ +#if TARGET_MACHO + { + const char *name = XSTR (x, 0); + if (MACHOPIC_INDIRECT + && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) + name = machopic_indirection_name (x, /*stub_p=*/true); + assemble_name (file, name); + } +#else assemble_name (file, XSTR (x, 0)); +#endif + /* APPLE LOCAL end stubify optimized symbols */ if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) fputs ("@PLT", file); break; @@ -6703,6 +6982,17 @@ print_operand (FILE *file, rtx x, int code) else { + /* We have patterns that allow zero sets of memory, for instance. + In 64-bit mode, we should probably support all 8-byte vectors, + since we can in fact encode that into an immediate. */ + if (GET_CODE (x) == CONST_VECTOR) + { + if (x == CONST0_RTX (GET_MODE (x))) + x = const0_rtx; + else + abort (); + } + if (code != 'P') { if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) @@ -6721,7 +7011,13 @@ print_operand (FILE *file, rtx x, int code) } if (GET_CODE (x) == CONST_INT) fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); - else if (flag_pic) + /* APPLE LOCAL begin dynamic-no-pic */ + else if (flag_pic +#if TARGET_MACHO + || MACHOPIC_INDIRECT +#endif + ) + /* APPLE LOCAL end dynamic-no-pic */ output_pic_addr_const (file, x, code); else output_addr_const (file, x); @@ -7280,8 +7576,9 @@ emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode) are the insn operands. The output may be [HSD]Imode and the input operand may be [SDX]Fmode. */ +/* APPLE LOCAL begin mainline 2005-03-16 4054919 */ const char * -output_fix_trunc (rtx insn, rtx *operands) +output_fix_trunc (rtx insn, rtx *operands, int fisttp) { int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; int dimode_p = GET_MODE (operands[0]) == DImode; @@ -7289,7 +7586,7 @@ output_fix_trunc (rtx insn, rtx *operands) /* Jump through a hoop or two for DImode, since the hardware has no non-popping instruction. We used to do this a different way, but that was somewhat fragile and broke with post-reload splitters. */ - if (dimode_p && !stack_top_dies) + if ((dimode_p || fisttp) && !stack_top_dies) output_asm_insn ("fld\t%y1", operands); if (!STACK_TOP_P (operands[1])) @@ -7298,15 +7595,21 @@ output_fix_trunc (rtx insn, rtx *operands) if (GET_CODE (operands[0]) != MEM) abort (); - output_asm_insn ("fldcw\t%3", operands); - if (stack_top_dies || dimode_p) - output_asm_insn ("fistp%z0\t%0", operands); + if (fisttp) + output_asm_insn ("fisttp%z0\t%0", operands); else - output_asm_insn ("fist%z0\t%0", operands); - output_asm_insn ("fldcw\t%2", operands); + { + output_asm_insn ("fldcw\t%3", operands); + if (stack_top_dies || dimode_p) + output_asm_insn ("fistp%z0\t%0", operands); + else + output_asm_insn ("fist%z0\t%0", operands); + output_asm_insn ("fldcw\t%2", operands); + } return ""; } +/* APPLE LOCAL end mainline 2005-03-16 4054919 */ /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi should be used. UNORDERED_P is true when fucom should be used. */ @@ -7515,7 +7818,8 @@ void ix86_expand_move (enum machine_mode mode, rtx operands[]) { int strict = (reload_in_progress || reload_completed); - rtx op0, op1; + /* APPLE LOCAL dynamic-no-pic */ + rtx insn, op0, op1; enum tls_model model; op0 = operands[0]; @@ -7549,29 +7853,49 @@ ix86_expand_move (enum machine_mode mode, rtx operands[]) } } - if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode)) + /* APPLE LOCAL begin dynamic-no-pic */ + if (MACHOPIC_INDIRECT + && mode == Pmode && symbolic_operand (op1, Pmode)) + /* APPLE LOCAL end dynamic-no-pic */ { #if TARGET_MACHO - if (MACHOPIC_PURE) + /* APPLE LOCAL begin dynamic-no-pic */ + if (MACHOPIC_INDIRECT) { rtx temp = ((reload_in_progress || ((op0 && GET_CODE (op0) == REG) && mode == Pmode)) ? op0 : gen_reg_rtx (Pmode)); op1 = machopic_indirect_data_reference (op1, temp); - op1 = machopic_legitimize_pic_address (op1, mode, - temp == op1 ? 0 : temp); + if (MACHOPIC_PURE) + op1 = machopic_legitimize_pic_address (op1, mode, + temp == op1 ? 0 : temp); } - else if (MACHOPIC_INDIRECT) - op1 = machopic_indirect_data_reference (op1, 0); - if (op0 == op1) - return; -#else + if (op0 != op1 && GET_CODE (op0) != MEM) + { + insn = gen_rtx_SET (VOIDmode, op0, op1); + emit_insn (insn); + return; + } + if (GET_CODE (op0) == MEM) + op1 = force_reg (Pmode, op1); + else + { + rtx temp = op0; + if (GET_CODE (temp) != REG) + temp = gen_reg_rtx (Pmode); + temp = legitimize_pic_address (op1, temp); + if (temp == op0) + return; + op1 = temp; + } +#else /* TARGET_MACHO */ if (GET_CODE (op0) == MEM) op1 = force_reg (Pmode, op1); else op1 = legitimize_address (op1, op1, Pmode); #endif /* TARGET_MACHO */ + /* APPLE LOCAL end dynamic-no-pic */ } else { @@ -10272,8 +10596,18 @@ ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) operand = copy_rtx (operand); PUT_MODE (operand, Pmode); parts[0] = parts[1] = parts[2] = operand; + return size; } - else if (!TARGET_64BIT) + + if (GET_CODE (operand) == CONST_VECTOR) + { + enum machine_mode imode = int_mode_for_mode (mode); + operand = simplify_subreg (imode, operand, mode, 0); + gcc_assert (operand != NULL); + mode = imode; + } + + if (!TARGET_64BIT) { if (mode == DImode) split_di (&operand, 1, &parts[0], &parts[1]); @@ -10430,6 +10764,10 @@ ix86_split_long_move (rtx operands[]) if (push && GET_CODE (operands[1]) == MEM && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) { + /* APPLE LOCAL begin 4099768 */ + if (nparts == 3 && TARGET_128BIT_LONG_DOUBLE && mode == XFmode) + part[1][2] = adjust_address (part[1][2], SImode, 4); + /* APPLE LOCAL end 4099768 */ if (nparts == 3) part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]), XEXP (part[1][2], 0)); @@ -11699,7 +12037,8 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, abort (); #if TARGET_MACHO - if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) + /* APPLE LOCAL dynamic-no-pic */ + if (MACHOPIC_INDIRECT && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) fnaddr = machopic_indirect_call_target (fnaddr); #else /* Static functions and indirect calls don't need the pic register. */ @@ -11828,6 +12167,11 @@ memory_address_length (rtx addr) if (! ix86_decompose_address (addr, &parts)) abort (); + if (parts.base && GET_CODE (parts.base) == SUBREG) + parts.base = SUBREG_REG (parts.base); + if (parts.index && GET_CODE (parts.index) == SUBREG) + parts.index = SUBREG_REG (parts.index); + base = parts.base; index = parts.index; disp = parts.disp; @@ -12239,6 +12583,20 @@ ix86_constant_alignment (tree exp, int align) && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) return BITS_PER_WORD; +/* APPLE LOCAL begin 4090661 */ +#if TARGET_MACHO + /* Without this, static arrays initialized to strings get aligned + to 32 bytes. These go in cstring, so would result in a lot of extra + padding in files with a couple of small strings. 4090661. */ + else if (TREE_CODE (exp) == STRING_CST) + { + if (TREE_STRING_LENGTH (exp) >= 31 && !optimize_size) + return BITS_PER_WORD; + else + return 8; + } +#endif +/* APPLE LOCAL end 4090661 */ return align; } @@ -12741,6 +13099,12 @@ enum ix86_builtins IX86_BUILTIN_MOVMSKPD, IX86_BUILTIN_PMOVMSKB128, + /* APPLE LOCAL begin 4099020 */ + IX86_BUILTIN_MOVQ, + IX86_BUILTIN_LOADQ, + IX86_BUILTIN_STOREQ, + /* APPLE LOCAL end 4099020 */ + IX86_BUILTIN_PACKSSWB128, IX86_BUILTIN_PACKSSDW128, IX86_BUILTIN_PACKUSWB128, @@ -13235,6 +13599,12 @@ ix86_init_builtins (void) { if (TARGET_MMX) ix86_init_mmx_sse_builtins (); + + /* APPLE LOCAL begin constant cfstrings */ +#ifdef SUBTARGET_INIT_BUILTINS + SUBTARGET_INIT_BUILTINS; +#endif + /* APPLE LOCAL end constant cfstrings */ } /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX @@ -13806,6 +14176,15 @@ ix86_init_mmx_sse_builtins (void) def_builtin (MASK_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); + /* APPLE LOCAL begin 4099020 */ + ftype = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); + def_builtin (MASK_SSE, "__builtin_ia32_movqv4si", ftype, IX86_BUILTIN_MOVQ); + ftype = build_function_type_list (V4SI_type_node, pv2si_type_node, NULL_TREE); + def_builtin (MASK_SSE, "__builtin_ia32_loadlv4si", ftype, IX86_BUILTIN_LOADQ); + ftype = build_function_type_list (void_type_node, pv2si_type_node, V4SI_type_node, NULL_TREE); + def_builtin (MASK_SSE, "__builtin_ia32_storelv4si", ftype, IX86_BUILTIN_STOREQ); + /* APPLE LOCAL end 4099020 */ + /* Access to the vec_init patterns. */ ftype = build_function_type_list (V2SI_type_node, integer_type_node, integer_type_node, NULL_TREE); @@ -14348,6 +14727,17 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, case IX86_BUILTIN_RCPSS: return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target); + /* APPLE LOCAL begin 4099020 */ + case IX86_BUILTIN_LOADQ: + return ix86_expand_unop_builtin (CODE_FOR_sse_loadqv4si, arglist, target, 1); + + case IX86_BUILTIN_MOVQ: + return ix86_expand_unop_builtin (CODE_FOR_sse_movqv4si, arglist, target, 0); + + case IX86_BUILTIN_STOREQ: + return ix86_expand_store_builtin (CODE_FOR_sse_storeqv4si, arglist); + /* APPLE LOCAL end 4099020 */ + case IX86_BUILTIN_LOADUPS: return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1); @@ -15028,15 +15418,30 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) return (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode)); } - /* We handle both integer and floats in the general purpose registers. - In future we should be able to handle vector modes as well. */ - if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode)) - return 0; - /* Take care for QImode values - they can be in non-QI regs, but then - they do cause partial register stalls. */ - if (regno < 4 || mode != QImode || TARGET_64BIT) + + if (mode == QImode) + { + /* Take care for QImode values - they can be in non-QI regs, + but then they do cause partial register stalls. */ + if (regno < 4 || TARGET_64BIT) + return 1; + if (!TARGET_PARTIAL_REG_STALL) + return 1; + return reload_in_progress || reload_completed; + } + /* We handle both integer and floats in the general purpose registers. */ + else if (VALID_INT_MODE_P (mode)) + return 1; + else if (VALID_FP_MODE_P (mode)) + return 1; + /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go + on to use that value in smaller contexts, this can easily force a + pseudo to be allocated to GENERAL_REGS. Since this is no worse than + supporting DImode, allow it. */ + else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) return 1; - return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL; + + return 0; } /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a @@ -15089,12 +15494,14 @@ ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) /* If MODE2 is only appropriate for an SSE register, then tie with any other mode acceptable to SSE registers. */ - if (SSE_REG_MODE_P (mode2)) + if (GET_MODE_SIZE (mode2) >= 8 + && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1); /* If MODE2 is appropriate for an MMX (or SSE) register, then tie with any other mode acceptable to MMX registers. */ - if (MMX_REG_MODE_P (mode2)) + if (GET_MODE_SIZE (mode2) == 8 + && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2)) return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1); return false; @@ -15190,6 +15597,21 @@ ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in) } } +/* APPLE LOCAL begin why is this local? */ +#if TARGET_MACHO +/* Cross-module name binding. Darwin does not support overriding + functions at dynamic-link time. */ + +static bool +ix86_binds_local_p (tree decl) +{ + /* APPLE LOCAL kext treat vtables as overridable */ + return default_binds_local_p_1 (decl, + flag_apple_kext && lang_hooks.vtable_p (decl)); +} +#endif +/* APPLE LOCAL end why is this local? */ + /* Compute a (partial) cost for rtx X. Return true if the complete cost has been computed, and false if subexpressions should be scanned. In either case, *TOTAL contains the cost result. */ @@ -15519,40 +15941,99 @@ machopic_output_stub (FILE *file, const char *symb, const char *stub) sprintf (lazy_ptr_name, "L%d$lz", label); + /* APPLE LOCAL begin deep branch prediction pic-base */ + /* Choose one of three possible sections for this stub. */ if (MACHOPIC_PURE) - machopic_picsymbol_stub_section (); + { + if (TARGET_DEEP_BRANCH_PREDICTION) + machopic_picsymbol_stub2_section (); /* 25 byte PIC stub. */ + else + machopic_picsymbol_stub_section (); /* 26 byte PIC stub. */ + } else - machopic_symbol_stub_section (); + machopic_symbol_stub_section (); /* 16-byte -mdynamic-no-pic stub. */ + /* APPLE LOCAL end deep branch prediction pic-base */ fprintf (file, "%s:\n", stub); fprintf (file, "\t.indirect_symbol %s\n", symbol_name); + /* APPLE LOCAL begin use %ecx in stubs 4146993 */ + /* APPLE LOCAL begin deep branch prediction pic-base */ if (MACHOPIC_PURE) { - fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label); - fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label); - fprintf (file, "\tjmp %%edx\n"); + /* PIC stub. */ + if (TARGET_DEEP_BRANCH_PREDICTION) + { + /* 25-byte PIC stub using "CALL get_pc_thunk". */ + rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */); + output_set_got (tmp); /* "CALL ___<cpu>.get_pc_thunk.cx". */ + fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label); + } + else + { + /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */ + fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label); + fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label); + } + fprintf (file, "\tjmp\t%%ecx\n"); } - else - fprintf (file, "\tjmp *%s\n", lazy_ptr_name); + else /* 16-byte -mdynamic-no-pic stub. */ + fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name); + /* The "stub_binding_helper" is a fragment that gets executed only + once, the first time this stub is invoked (then it becomes "dead + code"). It asks the dynamic linker to set the + lazy_symbol_pointer to point at the function we want + (e.g. printf) so that subsequent invocations of this stub go + directly to that dynamically-linked callee. Other UN*X systems + use similar stubs, but those are generated by the static linker + and never appear in assembly files. */ + /* APPLE LOCAL end deep branch prediction pic-base */ fprintf (file, "%s:\n", binder_name); + /* APPLE LOCAL begin deep branch prediction pic-base * tabify insns */ if (MACHOPIC_PURE) { - fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label); - fprintf (file, "\tpushl %%eax\n"); + fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name); + fprintf (file, "\tpushl\t%%ecx\n"); } else - fprintf (file, "\t pushl $%s\n", lazy_ptr_name); + fprintf (file, "\t pushl\t$%s\n", lazy_ptr_name); - fprintf (file, "\tjmp dyld_stub_binding_helper\n"); + fprintf (file, "\tjmp\tdyld_stub_binding_helper\n"); + /* APPLE LOCAL end deep branch prediction pic-base * tabify insns */ + /* APPLE LOCAL end use %ecx in stubs 4146993 */ + + /* APPLE LOCAL begin deep branch prediction pic-base. */ + /* N.B. Keep the correspondence of these + 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the + old-pic/new-pic/non-pic stubs; altering this will break + compatibility with existing dylibs. */ + if (MACHOPIC_PURE) + { + /* PIC stubs. */ + if (TARGET_DEEP_BRANCH_PREDICTION) + machopic_lazy_symbol_ptr2_section (); /* 25-byte PIC stub using "CALL get_pc_thunk". */ + else + machopic_lazy_symbol_ptr_section (); /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */ + } + else + machopic_lazy_symbol_ptr3_section (); /* 16-byte -mdynamic-no-pic stub. */ - machopic_lazy_symbol_ptr_section (); fprintf (file, "%s:\n", lazy_ptr_name); fprintf (file, "\t.indirect_symbol %s\n", symbol_name); - fprintf (file, "\t.long %s\n", binder_name); + fprintf (file, "\t.long\t%s\n", binder_name); + /* APPLE LOCAL end deep branch prediction pic-base. */ +} + +/* APPLE LOCAL begin deep branch prediction pic-base */ +void +darwin_x86_file_end (void) +{ + darwin_file_end (); + ix86_file_end (); } +/* APPLE LOCAL end deep branch prediction pic-base */ #endif /* TARGET_MACHO */ /* Order the registers for register allocator. */ @@ -15873,6 +16354,14 @@ x86_field_alignment (tree field, int computed) if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) return computed; + /* APPLE LOCAL begin mac68k alignment */ + if (TARGET_ALIGN_MAC68K) + { + if (computed >= 128) + return computed; + return MIN (computed, 16); + } + /* APPLE LOCAL end mac68k alignment */ mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE ? get_inner_array_type (type) : type); if (mode == DFmode || mode == DCmode @@ -16198,11 +16687,21 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, case V4HImode: if (!mmx_ok) return false; - val = gen_lowpart (SImode, val); - x = gen_rtx_TRUNCATE (HImode, val); - x = gen_rtx_VEC_DUPLICATE (mode, x); - emit_insn (gen_rtx_SET (VOIDmode, target, x)); - return true; + if (TARGET_SSE || TARGET_3DNOW_A) + { + val = gen_lowpart (SImode, val); + x = gen_rtx_TRUNCATE (HImode, val); + x = gen_rtx_VEC_DUPLICATE (mode, x); + emit_insn (gen_rtx_SET (VOIDmode, target, x)); + return true; + } + else + { + smode = HImode; + wsmode = SImode; + wvmode = V2SImode; + goto widen; + } case V8QImode: if (!mmx_ok) |