diff options
Diffstat (limited to 'gcc/config/rs6000/rs6000.c')
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 3132 |
1 files changed, 2729 insertions, 403 deletions
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 267a16131d1..ccb9004b7fc 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -53,6 +53,8 @@ #include "cfglayout.h" #include "sched-int.h" #include "tree-gimple.h" +/* APPLE LOCAL mainline 2005-04-14 */ +#include "intl.h" #if TARGET_XCOFF #include "xcoffout.h" /* get declarations of xcoff_*_section_name */ #endif @@ -60,6 +62,17 @@ #include "gstab.h" /* for N_SLINE */ #endif +/* APPLE LOCAL begin pascal strings */ +#include "../../libcpp/internal.h" +extern struct cpp_reader* parse_in; +/* APPLE LOCAL end pascal strings */ + +/* APPLE LOCAL begin Macintosh alignment */ +#ifndef TARGET_ALIGN_MAC68K +#define TARGET_ALIGN_MAC68K 0 +#endif +/* APPLE LOCAL end Macintosh alignment */ + #ifndef TARGET_NO_PROTOTYPE #define TARGET_NO_PROTOTYPE 0 #endif @@ -257,6 +270,10 @@ static GTY(()) tree pixel_V8HI_type_node; /* __vector __pixel */ int rs6000_warn_altivec_long = 1; /* On by default. */ const char *rs6000_warn_altivec_long_switch; +/* APPLE LOCAL begin AltiVec */ +int rs6000_altivec_pim = 0; +const char *rs6000_altivec_pim_switch; +/* APPLE LOCAL end AltiVec */ const char *rs6000_traceback_name; static enum { @@ -586,6 +603,12 @@ struct processor_costs ppc8540_cost = { COSTS_N_INSNS (29), /* ddiv */ }; +/* APPLE LOCAL begin AltiVec */ +/* NB: We do not store the PIM operations/predicates in the + VECTOR_BUILTIN_FNS array. */ +static GTY(()) tree vector_builtin_fns[ALTIVEC_PIM__FIRST]; +/* APPLE LOCAL end AltiVec */ + /* Instruction costs on POWER4 and POWER5 processors. */ static const struct processor_costs power4_cost = { @@ -628,6 +651,8 @@ static bool macho_lo_sum_memory_operand (rtx x, enum machine_mode mode); static bool legitimate_lo_sum_address_p (enum machine_mode, rtx, int); static struct machine_function * rs6000_init_machine_status (void); static bool rs6000_assemble_integer (rtx, unsigned int, int); +/* APPLE LOCAL mainline */ +static bool no_global_regs_above (int); #ifdef HAVE_GAS_HIDDEN static void rs6000_assemble_visibility (tree, int); #endif @@ -671,6 +696,8 @@ static void rs6000_xcoff_file_end (void); #endif #if TARGET_MACHO static bool rs6000_binds_local_p (tree); +/* APPLE LOCAL pragma reverse_bitfield */ +static bool rs6000_reverse_bitfields_p (tree); #endif static int rs6000_variable_issue (FILE *, int, rtx, int); static bool rs6000_rtx_costs (rtx, int, int, int *); @@ -724,6 +751,16 @@ static rtx altivec_expand_predicate_builtin (enum insn_code, const char *, tree, rtx); static rtx altivec_expand_lv_builtin (enum insn_code, tree, rtx); static rtx altivec_expand_stv_builtin (enum insn_code, tree); +/* APPLE LOCAL begin AltiVec */ +static tree altivec_cov_rt_12 (tree, tree); +static tree altivec_cov_rt_2p (tree); +static tree altivec_cov_rt_1d (tree); +static tree altivec_cov_rt_1h (tree); +static struct altivec_pim_info *altivec_ovl_resolve (struct altivec_pim_info *, + tree, tree); +static tree altivec_convert_args (tree, tree); +/* APPLE LOCAL end AltiVec */ + static void rs6000_parse_abi_options (void); static void rs6000_parse_alignment_option (void); static void rs6000_parse_tls_size_option (void); @@ -748,17 +785,32 @@ static int rs6000_get_some_local_dynamic_name_1 (rtx *, void *); static rtx rs6000_complex_function_value (enum machine_mode); static rtx rs6000_spe_function_arg (CUMULATIVE_ARGS *, enum machine_mode, tree); -static rtx rs6000_darwin64_function_arg (CUMULATIVE_ARGS *, - enum machine_mode, tree, int); +static void rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *, + /* APPLE LOCAL fix 64-bit varargs 4028089 */ + HOST_WIDE_INT, int); +static void rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *, + tree, HOST_WIDE_INT); +static void rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *, + HOST_WIDE_INT, + rtx[], int *); +static void rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *, + tree, HOST_WIDE_INT, + rtx[], int *); +static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, tree, int, bool); static rtx rs6000_mixed_function_arg (enum machine_mode, tree, int); static void rs6000_move_block_from_reg (int regno, rtx x, int nregs); static void setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int); +/* APPLE LOCAL begin Altivec */ +static bool skip_vec_args (tree, int, int*); +/* APPLE LOCAL end Altivec */ static bool rs6000_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, tree, bool); static int rs6000_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, tree, bool); +/* APPLE LOCAL mainline 2005-04-14 */ +static const char *invalid_arg_for_unprototyped_fn (tree, tree, tree); #if TARGET_MACHO static void macho_branch_islands (void); static void add_compiler_branch_island (tree, tree, int); @@ -936,6 +988,17 @@ static const char alt_reg_names[][8] = #undef TARGET_INIT_BUILTINS #define TARGET_INIT_BUILTINS rs6000_init_builtins + +/* APPLE LOCAL begin AltiVec */ +/* If we are running in Apple AltiVec (as opposed to FSF AltiVec) mode, + we will need to handle the Motorola PIM instructions ourselves instead + of relying on <altivec.h>. The rs6000_fold_builtin() routine will + rewrite the PIM instructions into the __builtin... (AldyVec) + instructions. */ +#undef TARGET_FOLD_BUILTIN +#define TARGET_FOLD_BUILTIN rs6000_fold_builtin +/* APPLE LOCAL end AltiVec */ + #undef TARGET_EXPAND_BUILTIN #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin @@ -948,6 +1011,10 @@ static const char alt_reg_names[][8] = #if TARGET_MACHO #undef TARGET_BINDS_LOCAL_P #define TARGET_BINDS_LOCAL_P rs6000_binds_local_p +/* APPLE LOCAL begin pragma reverse_bitfields */ +#undef TARGET_REVERSE_BITFIELDS_P +#define TARGET_REVERSE_BITFIELDS_P rs6000_reverse_bitfields_p +/* APPLE LOCAL end pragma reverse_bitfields */ #endif #undef TARGET_ASM_OUTPUT_MI_THUNK @@ -983,6 +1050,11 @@ static const char alt_reg_names[][8] = #undef TARGET_SETUP_INCOMING_VARARGS #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs +/* APPLE LOCAL begin Altivec */ +#undef TARGET_SKIP_VEC_ARGS +#define TARGET_SKIP_VEC_ARGS skip_vec_args +/* APPLE LOCAL end Altivec */ + /* Always strict argument naming on rs6000. */ #undef TARGET_STRICT_ARGUMENT_NAMING #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true @@ -1008,6 +1080,11 @@ static const char alt_reg_names[][8] = #undef TARGET_VECTOR_MODE_SUPPORTED_P #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p +/* APPLE LOCAL begin mainline 2005-04-14 */ + +#undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN +#define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn +/* APPLE LOCAL end mainline 2005-04-14 */ /* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors The PowerPC architecture requires only weak consistency among @@ -1090,6 +1167,8 @@ rs6000_override_options (const char *default_cpu) size_t i, j; struct rs6000_cpu_select *ptr; int set_masks; + /* APPLE LOCAL -fast */ + enum processor_type mcpu_cpu = PROCESSOR_POWER4; /* Simplifications for entries below. */ @@ -1173,6 +1252,13 @@ rs6000_override_options (const char *default_cpu) const size_t ptt_size = ARRAY_SIZE (processor_target_table); + /* APPLE LOCAL begin -mmultiple/-mstring fixme */ + /* Save current -mmultiple/-mno-multiple status. */ + int multiple = TARGET_MULTIPLE; + /* Save current -mstring/-mno-string status. */ + int string = TARGET_STRING; + /* APPLE LOCAL end -mmultiple/-mstring fixme */ + /* Some OSs don't support saving the high part of 64-bit registers on context switch. Other OSs don't support saving Altivec registers. On those OSs, we don't touch the MASK_POWERPC64 or MASK_ALTIVEC @@ -1198,14 +1284,25 @@ rs6000_override_options (const char *default_cpu) set_masks &= ~MASK_ALTIVEC; #endif - /* Don't override these by the processor default if given explicitly. */ - set_masks &= ~(target_flags_explicit - & (MASK_MULTIPLE | MASK_STRING | MASK_SOFT_FLOAT)); + /* Don't override by the processor default if given explicitly. */ + set_masks &= ~target_flags_explicit; /* Identify the processor type. */ rs6000_select[0].string = default_cpu; rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT; + /* APPLE LOCAL begin -fast */ + if (flag_fast || flag_fastf || flag_fastcp) + { + if (rs6000_select[1].string == (char *)0 && rs6000_select[2].string == (char *)0) + { + /* -mcpu and -mtune unspecified. Assume both are G5 */ + set_target_switch ("tune=G5"); + set_target_switch ("cpu=G5"); + } + } + /* APPLE LOCAL end -fast */ + for (i = 0; i < ARRAY_SIZE (rs6000_select); i++) { ptr = &rs6000_select[i]; @@ -1222,6 +1319,9 @@ rs6000_override_options (const char *default_cpu) target_flags &= ~set_masks; target_flags |= (processor_target_table[j].target_enable & set_masks); + /* APPLE LOCAL begin -fast */ + mcpu_cpu = processor_target_table[j].processor; + /* APPLE LOCAL end -fast */ } break; } @@ -1231,13 +1331,115 @@ rs6000_override_options (const char *default_cpu) } } + /* APPLE LOCAL begin AltiVec */ + /* If '-maltivec' has been specified or if anything else turns on + AltiVec, enable AltiVec optimizations, even if previously turned + off via '-faltivec'. */ + if (TARGET_ALTIVEC) + flag_disable_opts_for_faltivec = 0; + + /* Handle -m(no-)pim-altivec. */ + if (rs6000_altivec_pim_switch) + { + const char *base = rs6000_altivec_pim_switch - strlen ("pim-altivec");; + while (base[-1] != 'm') base--; + if (*rs6000_altivec_pim_switch != '\0') + error ("invalid option `%s'", base); + + rs6000_altivec_pim = (base[0] != 'n'); + /* If '-faltivec' or '-mpim-altivec' has been specified and we + have not already selected AltiVec codegen, disable certain + unsafe AltiVec optimizations so that the resulting binary can + run on a G3. These may be re-enabled by subsequently + specifying '-maltivec' or '-mcpu=xxx', where xxx supports + AltiVec instructions. */ + if (rs6000_altivec_pim) + { + if (! TARGET_ALTIVEC) + { + flag_disable_opts_for_faltivec = 1; + /* APPLE LOCAL radar 4161346 */ + target_flags |= (MASK_ALTIVEC | MASK_PIM_ALTIVEC); + } + } + else + target_flags &= ~MASK_ALTIVEC; + } + /* APPLE LOCAL end AltiVec */ + + /* APPLE LOCAL begin -fast */ + if (flag_fast || flag_fastf || flag_fastcp) + { + flag_gcse_sm = 1; + rs6000_sched_insert_nops = sched_finish_regroup_exact; + flag_unroll_loops = 1; + flag_tree_loop_linear = 1; + flag_strict_aliasing = 1; + flag_schedule_interblock = 1; + flag_gcse_las = 1; + align_jumps_max_skip = 15; + align_loops_max_skip = 15; + align_functions = 16; + align_loops = 16; + align_jumps = 16; + set_fast_math_flags (1); + flag_reorder_blocks = 1; + if (flag_branch_probabilities && !flag_exceptions) + flag_reorder_blocks_and_partition = 1; + if (!flag_pic) + set_target_switch ("dynamic-no-pic"); + + if (mcpu_cpu == PROCESSOR_POWER4) + { + set_target_switch ("powerpc-gpopt"); + set_target_switch ("powerpc64"); + } + if (flag_fast || flag_fastcp) + /* This doesn't work with NAG Fortran output. The gcc 3.5 C++ libraries + have been adjusted so that it now works with them. */ + set_target_switch ("align-natural"); + if (flag_fastf) + /* This applies Fortran argument semantics; for NAG Fortran output only. */ + flag_argument_noalias = 2; + /* IMI flags */ + disable_typechecking_for_spec_flag = 1; + flag_unit_at_a_time = 1; + } + /* APPLE LOCAL end -fast */ + + /* APPLE LOCAL rs6000_init_hard_regno_mode_ok must come AFTER setting of -fast flags */ + rs6000_init_hard_regno_mode_ok (); + if (TARGET_E500) rs6000_isel = 1; + /* APPLE LOCAL begin Disable string insns with -Os on Darwin (radar 3509006) */ /* If we are optimizing big endian systems for space, use the load/store - multiple and string instructions. */ + multiple instructions. */ if (BYTES_BIG_ENDIAN && optimize_size) - target_flags |= ~target_flags_explicit & (MASK_MULTIPLE | MASK_STRING); + target_flags |= ~target_flags_explicit & MASK_MULTIPLE; + + /* If we are optimizing big endian systems for space, use the + string instructions. But do not do this for Darwin, as the + kernel can't properly support some hardware that doesn't have + these instructions. It's not clear that the compiler is the + right place to fix this, but that's how it is for now. See + *extensive* discussion in Radar 3509006. */ + if (BYTES_BIG_ENDIAN && optimize_size && DEFAULT_ABI != ABI_DARWIN) + target_flags |= MASK_STRING; + /* APPLE LOCAL end Disable string insns with -Os on Darwin (radar 3509006) */ + + /* APPLE LOCAL begin -mmultiple/-mstring fixme */ + /* If -mmultiple or -mno-multiple was explicitly used, don't + override with the processor default */ + if ((target_flags_explicit & MASK_MULTIPLE) != 0) + target_flags = (target_flags & ~MASK_MULTIPLE) | multiple; + + /* If -mstring or -mno-string was explicitly used, don't override + with the processor default. */ + if ((target_flags_explicit & MASK_STRING) != 0) + target_flags = (target_flags & ~MASK_STRING) | string; + /* APPLE LOCAL end -mmultiple/-mstring fixme */ /* Don't allow -mmultiple or -mstring on little endian systems unless the cpu is a 750, because the hardware doesn't support the @@ -1315,7 +1517,11 @@ rs6000_override_options (const char *default_cpu) /* Setting to empty string is same as "-mone-byte-bool". */ #if TARGET_MACHO darwin_one_byte_bool = ""; + /* APPLE LOCAL pragma reverse_bitfields */ + darwin_reverse_bitfields = 0; #endif + /* Default to natural alignment, for better performance. */ + rs6000_alignment_flags = MASK_ALIGN_NATURAL; } /* Handle -mabi= options. */ @@ -1417,9 +1623,11 @@ rs6000_override_options (const char *default_cpu) rs6000_sched_restricted_insns_priority = atoi (rs6000_sched_restricted_insns_priority_str); + /* APPLE LOCAL begin only consider true dependency for grouping */ /* Handle -msched-costly-dep option. */ rs6000_sched_costly_dep - = (rs6000_sched_groups ? store_to_load_dep_costly : no_dep_costly); + = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly); + /* APPLE LOCAL end only consider true dependency for grouping */ if (rs6000_sched_costly_dep_str) { if (! strcmp (rs6000_sched_costly_dep_str, "no")) @@ -1511,6 +1719,12 @@ rs6000_override_options (const char *default_cpu) if (DEFAULT_ABI != ABI_AIX) targetm.calls.split_complex_arg = NULL; + /* APPLE LOCAL begin AltiVec */ + /* Enable '(vector signed int)(a, b, c, d)' vector literal notation. */ + if (TARGET_ALTIVEC) + targetm.cast_expr_as_vector_init = true; + /* APPLE LOCAL end AltiVec */ + /* Initialize rs6000_cost with the appropriate target costs. */ if (optimize_size) rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost; @@ -1683,8 +1897,27 @@ rs6000_parse_alignment_option (void) { if (rs6000_alignment_string == 0) return; + /* APPLE LOCAL begin Macintosh alignment 2002-2-26 --ff */ + else if (! strcmp (rs6000_alignment_string, "mac68k")) + { + /* The old mac68k alignment has zero value for 64-bit work, + forbid its use. */ + if (DEFAULT_ABI == ABI_DARWIN && TARGET_64BIT) + error ("-malign-mac68k is not allowed for 64-bit Darwin"); + rs6000_alignment_flags = MASK_ALIGN_MAC68K; + } + /* APPLE LOCAL end Macintosh alignment 2002-2-26 --ff */ else if (! strcmp (rs6000_alignment_string, "power")) - rs6000_alignment_flags = MASK_ALIGN_POWER; + { + /* On 64-bit Darwin, power alignment is ABI-incompatible with + some C library functions, so warn about it. The flag may be + useful for performance studies from time to time though, so + don't disable it entirely. */ + if (DEFAULT_ABI == ABI_DARWIN && TARGET_64BIT) + warning ("-malign-power is not supported for 64-bit Darwin;" + " it is incompatible with the installed C and C++ libraries"); + rs6000_alignment_flags = MASK_ALIGN_POWER; + } else if (! strcmp (rs6000_alignment_string, "natural")) rs6000_alignment_flags = MASK_ALIGN_NATURAL; else @@ -1712,7 +1945,61 @@ rs6000_parse_tls_size_option (void) void optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED) { -} + /* APPLE LOCAL begin tweak default optimizations */ + if (DEFAULT_ABI == ABI_DARWIN) + { + /* Turn these on only if specifically requested, not with -O* */ + /* Strict aliasing breaks too much existing code */ + flag_strict_aliasing = 0; + /* Block reordering causes code bloat, and very little speedup */ + flag_reorder_blocks = 0; + /* Multi-basic-block scheduling loses badly when the compiler + misguesses which blocks are going to be executed, more than + it gains when it guesses correctly. Its guesses for cases + where interblock scheduling occurs (if-then-else's) are + little better than random, so disable this unless requested. */ + flag_schedule_interblock = 0; + /* The Darwin libraries never set errno, so we might as well + avoid calling them when that's the only reason we would. */ + flag_errno_math = 0; + /* Trapping math is not needed by many users, and is expensive. + C99 permits us to default it off and we do that. It is + turned on when <fenv.h> is included (see darwin_pragma_fenv + in darwin-c.c). */ + flag_trapping_math = 0; + } + /* APPLE LOCAL end tweak default optimizations */ +} + +/* APPLE LOCAL begin optimization pragmas 3124235/3420242 */ +/* Version of the above for use from #pragma optimization_level. + Do not reset things unless they're per-function. */ + +void +reset_optimization_options (int level ATTRIBUTE_UNUSED, + int size ATTRIBUTE_UNUSED) +{ + if (DEFAULT_ABI == ABI_DARWIN) + { + /* Block reordering causes code bloat, and very little speedup */ + flag_reorder_blocks = 0; + /* Multi-basic-block scheduling loses badly when the compiler + misguesses which blocks are going to be executed, more than + it gains when it guesses correctly. Its guesses for cases + where interblock scheduling occurs (if-then-else's) are + little better than random, so disable this unless requested. */ + flag_schedule_interblock = 0; + /* The Darwin libraries never set errno, so we might as well + avoid calling them when that's the only reason we would. */ + flag_errno_math = 0; + /* Trapping math is not needed by many users, and is expensive. + C99 permits us to default it off and we do that. It is + turned on when <fenv.h> is included (see darwin_pragma_fenv + in darwin-c.c). */ + flag_trapping_math = 0; + } +} +/* APPLE LOCAL end optimization pragmas 3124235/3420242 */ /* Do anything needed at the start of the asm file. */ @@ -1774,6 +2061,38 @@ rs6000_file_start (void) toc_section (); text_section (); } + + /* APPLE LOCAL begin lno */ +#if TARGET_MACHO + if (DEFAULT_ABI == ABI_DARWIN) + { + /* Emit declarations for all code sections at the beginning of the file; this + keeps them from being separated by data sections, which can lead to + out-of-range branches. Also align the unlikely text section properly; the + first real occurrence of this may be a label within a function, which does + not otherwise get aligned. */ + if (flag_pic || MACHO_DYNAMIC_NO_PIC_P || flag_reorder_blocks_and_partition) + { + fprintf (asm_out_file, "\t.section __TEXT,__text,regular,pure_instructions\n"); + if (flag_reorder_blocks_and_partition) + { + fprintf (asm_out_file, "\t.section __TEXT,__unlikely,regular,pure_instructions\n"); + fprintf (asm_out_file, "\t.align 2\n"); + } + if (MACHO_DYNAMIC_NO_PIC_P ) + { + fprintf (asm_out_file, "\t.section __TEXT,__symbol_stub1,"); + fprintf (asm_out_file, "symbol_stubs,pure_instructions,16\n"); + } + else + { + fprintf (asm_out_file, "\t.section __TEXT,__picsymbolstub1,"); + fprintf (asm_out_file, "symbol_stubs,pure_instructions,32\n"); + } + } + } +#endif + /* APPLE LOCAL end lno */ } @@ -1857,6 +2176,24 @@ xer_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) return 0; } +/* APPLE LOCAL begin 4119059 */ +/* Return 1 if OP is a signed 5-bit constant. */ +int +s5bit_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + return (GET_CODE (op) == CONST_INT + && (INTVAL (op) >= -16 && INTVAL (op) <= 15)); +} + +/* Return 1 if OP is a unsigned 5-bit constant. */ +int +u5bit_cint_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + return (GET_CODE (op) == CONST_INT + && (INTVAL (op) >= 0 && INTVAL (op) <= 31)); +} + +/* APPLE LOCAL end 4119059 */ /* Return 1 if OP is a signed 8-bit constant. Int multiplication by such constants completes more quickly. */ @@ -2001,6 +2338,21 @@ reg_or_cint_operand (rtx op, enum machine_mode mode) return (GET_CODE (op) == CONST_INT || gpc_reg_operand (op, mode)); } +/* APPLE LOCAL begin radar 3869444 (also in mainline) */ +/* Return 1 if op is an integer meeting one of 'I','J','O','L'(TARGET_32BIT) + or 'J'(TARGET_64BIT) constraints or if it is a non-special register. */ + +int +scc_operand (rtx op, enum machine_mode mode) +{ + return ((GET_CODE (op) == CONST_INT + && (CONST_OK_FOR_LETTER_P (INTVAL (op), 'I') + || CONST_OK_FOR_LETTER_P (INTVAL (op), 'K') + || CONST_OK_FOR_LETTER_P (INTVAL (op), 'O') + || CONST_OK_FOR_LETTER_P (INTVAL (op), (TARGET_32BIT ? 'L' : 'J')))) + || gpc_reg_operand (op, mode)); +} +/* APPLE LOCAL end radar 3869444 (also in mainline) */ /* Return 1 is the operand is either a non-special register or ANY 32-bit signed constant integer. */ @@ -2966,6 +3318,11 @@ call_operand (rtx op, enum machine_mode mode) return 0; return (GET_CODE (op) == SYMBOL_REF + /* APPLE LOCAL begin accept hard R12 as target reg */ +#ifdef MAGIC_INDIRECT_CALL_REG + || (GET_CODE (op) == REG && REGNO (op) == MAGIC_INDIRECT_CALL_REG) +#endif + /* APPLE LOCAL end accept hard R12 as target reg */ || (GET_CODE (op) == REG && (REGNO (op) == LINK_REGISTER_REGNUM || REGNO (op) == COUNT_REGISTER_REGNUM @@ -3652,21 +4009,16 @@ rs6000_legitimize_tls_address (rtx addr, enum tls_model model) rs6000_emit_move (got, gsym, Pmode); else { - char buf[30]; - static int tls_got_labelno = 0; - rtx tempLR, lab, tmp3, mem; + rtx tempLR, tmp3, mem; rtx first, last; - ASM_GENERATE_INTERNAL_LABEL (buf, "LTLS", tls_got_labelno++); - lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); tempLR = gen_reg_rtx (Pmode); tmp1 = gen_reg_rtx (Pmode); tmp2 = gen_reg_rtx (Pmode); tmp3 = gen_reg_rtx (Pmode); mem = gen_const_mem (Pmode, tmp1); - first = emit_insn (gen_load_toc_v4_PIC_1b (tempLR, lab, - gsym)); + first = emit_insn (gen_load_toc_v4_PIC_1b (tempLR, gsym)); emit_move_insn (tmp1, tempLR); emit_move_insn (tmp2, mem); emit_insn (gen_addsi3 (tmp3, tmp1, tmp2)); @@ -3870,6 +4222,7 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode, && REG_MODE_OK_FOR_BASE_P (XEXP (x, 0), mode) && GET_CODE (XEXP (x, 1)) == CONST_INT && (INTVAL (XEXP (x, 1)) & 3) != 0 + && !ALTIVEC_VECTOR_MODE (mode) && GET_MODE_SIZE (mode) >= UNITS_PER_WORD && TARGET_POWERPC64) { @@ -4134,8 +4487,9 @@ rs6000_conditional_register_usage (void) if (DEFAULT_ABI == ABI_DARWIN && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) - global_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] - = fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] +/* APPLE LOCAL begin mainline remove global_regs[PIC] */ + fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] +/* APPLE LOCAL end mainline remove global_regs[PIC] */ = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; @@ -4716,12 +5070,23 @@ rs6000_return_in_memory (tree type, tree fntype ATTRIBUTE_UNUSED) { /* In the darwin64 abi, try to use registers for larger structs if possible. */ - if (AGGREGATE_TYPE_P (type) - && rs6000_darwin64_abi + if (rs6000_darwin64_abi && TREE_CODE (type) == RECORD_TYPE - && ((unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 32) - && ((unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 0)) - return false; + && int_size_in_bytes (type) > 0) + { + CUMULATIVE_ARGS valcum; + rtx valret; + + valcum.words = 0; + valcum.fregno = FP_ARG_MIN_REG; + valcum.vregno = ALTIVEC_ARG_MIN_REG; + /* Do a trial code generation as if this were going to be passed + as an argument; if any part goes in memory, we return NULL. */ + valret = rs6000_darwin64_record_arg (&valcum, type, 1, true); + if (valret) + return false; + /* Otherwise fall through to more conventional ABI rules. */ + } if (AGGREGATE_TYPE_P (type) && (TARGET_AIX_STRUCT_RET @@ -4912,6 +5277,9 @@ function_arg_boundary (enum machine_mode mode, tree type) || (type && TREE_CODE (type) == VECTOR_TYPE && int_size_in_bytes (type) >= 16)) return 128; + else if (rs6000_darwin64_abi && mode == BLKmode + && type && TYPE_ALIGN (type) > 64) + return 128; else return PARM_BOUNDARY; } @@ -4934,46 +5302,140 @@ rs6000_arg_size (enum machine_mode mode, tree type) return (size + 7) >> 3; } -/* The darwin64 ABI calls for us to recurse down through structs, - applying the same rules to struct elements as if a reference to - each were being passed directly. */ +/* Use this to flush pending int fields. */ static void -darwin64_function_arg_advance (CUMULATIVE_ARGS *cum, tree type, - int named, int depth) +rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum, + /* APPLE LOCAL fix 64-bit varargs 4028089 */ + HOST_WIDE_INT bitpos, int final) { - tree f, ftype; - int i, tot; + unsigned int startbit, endbit; + int intregs, intoffset; + enum machine_mode mode; - switch (TREE_CODE (type)) + /* APPLE LOCAL begin fix 64-bit varargs 4028089 */ + /* Handle the situations where a float is taking up the first half + of the GPR, and the other half is empty (typically due to + alignment restrictions). We can detect this by a 8-byte-aligned + int field, or by seeing that this is the final flush for this + argument. Count the word and continue on. */ + if (cum->floats_in_gpr == 1 + && (cum->intoffset % 64 == 0 + || (cum->intoffset == -1 && final))) { - case RECORD_TYPE: - for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f)) - if (TREE_CODE (f) == FIELD_DECL) - { - ftype = TREE_TYPE (f); - function_arg_advance (cum, TYPE_MODE (ftype), ftype, - named, depth + 1); - } - break; + cum->words++; + cum->floats_in_gpr = 0; + } + /* APPLE LOCAL end fix 64-bit varargs 4028089 */ - case ARRAY_TYPE: - tot = int_size_in_bytes (type); - if (tot <= 0) - return; - ftype = TREE_TYPE (type); - tot /= int_size_in_bytes (ftype); - - for (i = 0; i < tot; ++i) + if (cum->intoffset == -1) + return; + + intoffset = cum->intoffset; + cum->intoffset = -1; + /* APPLE LOCAL fix 64-bit varargs 4028089 */ + cum->floats_in_gpr = 0; + + if (intoffset % BITS_PER_WORD != 0) + { + mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD, + MODE_INT, 0); + if (mode == BLKmode) { - function_arg_advance (cum, TYPE_MODE (ftype), ftype, - named, depth + 1); + /* We couldn't find an appropriate mode, which happens, + e.g., in packed structs when there are 3 bytes to load. + Back intoffset back to the beginning of the word in this + case. */ + intoffset = intoffset & -BITS_PER_WORD; } - break; - - default: - abort (); } + + startbit = intoffset & -BITS_PER_WORD; + endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD; + intregs = (endbit - startbit) / BITS_PER_WORD; + cum->words += intregs; +} + +/* The darwin64 ABI calls for us to recurse down through structs, + looking for elements passed in registers. Unfortunately, we have + to track int register count here also because of misalignments + in powerpc alignment mode. */ + +static void +rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum, + tree type, + HOST_WIDE_INT startbitpos) +{ + tree f; + + for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f)) + if (TREE_CODE (f) == FIELD_DECL) + { + HOST_WIDE_INT bitpos = startbitpos; + tree ftype = TREE_TYPE (f); + enum machine_mode mode = TYPE_MODE (ftype); + + if (DECL_SIZE (f) != 0 + && host_integerp (bit_position (f), 1)) + bitpos += int_bit_position (f); + + /* ??? FIXME: else assume zero offset. */ + + if (TREE_CODE (ftype) == RECORD_TYPE) + rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos); + else if (USE_FP_FOR_ARG_P (cum, mode, ftype)) + { + /* APPLE LOCAL fix 64-bit varargs 4028089 */ + rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0); + cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3; + /* APPLE LOCAL begin fix 64-bit varargs 4028089 */ + /* Single-precision floats present a special problem for + us, because they are smaller than an 8-byte GPR, and so + the structure-packing rules combined with the standard + varargs behavior mean that we want to pack float/float + and float/int combinations into a single register's + space. This is complicated by the arg advance flushing, + which works on arbitrarily large groups of int-type + fields. */ + if (mode == SFmode) + { + if (cum->floats_in_gpr == 1) + { + /* Two floats in a word; count the word and reset + the float count. */ + cum->words++; + cum->floats_in_gpr = 0; + } + else if (bitpos % 64 == 0) + { + /* A float at the beginning of an 8-byte word; + count it and put off adjusting cum->words until + we see if a arg advance flush is going to do it + for us. */ + cum->floats_in_gpr++; + } + else + { + /* The float is at the end of a word, preceded + by integer fields, so the arg advance flush + just above has already set cum->words and + everything is taken care of. */ + } + } + else + /* APPLE LOCAL end fix 64-bit varargs 4028089 */ + cum->words += (GET_MODE_SIZE (mode) + 7) >> 3; + } + else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, 1)) + { + /* APPLE LOCAL fix 64-bit varargs 4028089 */ + rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0); + cum->vregno++; + cum->words += 2; + } + else if (cum->intoffset == -1) + cum->intoffset = bitpos; + } } /* Update the data in CUM to advance over an argument @@ -4988,6 +5450,8 @@ void function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type, int named, int depth) { + int size; + /* Only tick off an argument if we're not recursing. */ if (depth == 0) cum->nargs_prototype--; @@ -5051,10 +5515,33 @@ function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, else if (rs6000_darwin64_abi && mode == BLKmode - && (TREE_CODE (type) == RECORD_TYPE - || TREE_CODE (type) == ARRAY_TYPE)) - darwin64_function_arg_advance (cum, type, named, depth); - + && TREE_CODE (type) == RECORD_TYPE + && (size = int_size_in_bytes (type)) > 0) + { + /* Variable sized types have size == -1 and are + treated as if consisting entirely of ints. + Pad to 16 byte boundary if needed. */ + if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD + && (cum->words % 2) != 0) + cum->words++; + /* For varargs, we can just go up by the size of the struct. */ + if (!named) + cum->words += (size + 7) / 8; + else + { + /* It is tempting to say int register count just goes up by + sizeof(type)/8, but this is wrong in a case such as + { int; double; int; } [powerpc alignment]. We have to + grovel through the fields for these too. */ + cum->intoffset = 0; + /* APPLE LOCAL fix 64-bit varargs 4028089 */ + cum->floats_in_gpr = 0; + rs6000_darwin64_record_arg_advance_recurse (cum, type, 0); + rs6000_darwin64_record_arg_advance_flush (cum, + /* APPLE LOCAL fix 64-bit varargs 4028089 */ + size * BITS_PER_UNIT, 1); + } + } else if (DEFAULT_ABI == ABI_V4) { if (TARGET_HARD_FLOAT && TARGET_FPRS @@ -5213,136 +5700,186 @@ rs6000_spe_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, } } -/* For the darwin64 ABI, we want to construct a PARALLEL consisting of - the register(s) to be used for each field and subfield of a struct - being passed by value, along with the offset of where the - register's value may be found in the block. */ +/* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the + structure between cum->intoffset and bitpos to integer registers. */ -static rtx -rs6000_darwin64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, - tree type, int named) +static void +rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum, + HOST_WIDE_INT bitpos, rtx rvec[], int *k) { - tree f, ftype, offset; - rtx rvec[FIRST_PSEUDO_REGISTER], sub, suboff, roffset; - int k = 0, i, j, bytepos, subbytepos, tot; - CUMULATIVE_ARGS saved_cum = *cum; - enum machine_mode submode; + enum machine_mode mode; + unsigned int regno; + unsigned int startbit, endbit; + int this_regno, intregs, intoffset; + rtx reg; + + if (cum->intoffset == -1) + return; + + intoffset = cum->intoffset; + cum->intoffset = -1; - switch (TREE_CODE (type)) + /* If this is the trailing part of a word, try to only load that + much into the register. Otherwise load the whole register. Note + that in the latter case we may pick up unwanted bits. It's not a + problem at the moment but may wish to revisit. */ + + if (intoffset % BITS_PER_WORD != 0) { - case RECORD_TYPE: - for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f)) - if (TREE_CODE (f) == FIELD_DECL) + mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD, + MODE_INT, 0); + if (mode == BLKmode) + { + /* We couldn't find an appropriate mode, which happens, + e.g., in packed structs when there are 3 bytes to load. + Back intoffset back to the beginning of the word in this + case. */ + intoffset = intoffset & -BITS_PER_WORD; + mode = word_mode; + } + } + else + mode = word_mode; + + startbit = intoffset & -BITS_PER_WORD; + endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD; + intregs = (endbit - startbit) / BITS_PER_WORD; + this_regno = cum->words + intoffset / BITS_PER_WORD; + + if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno) + cum->use_stack = 1; + + intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno); + if (intregs <= 0) + return; + + intoffset /= BITS_PER_UNIT; + do + { + regno = GP_ARG_MIN_REG + this_regno; + reg = gen_rtx_REG (mode, regno); + rvec[(*k)++] = + gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset)); + + this_regno += 1; + intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1; + mode = word_mode; + intregs -= 1; + } + while (intregs > 0); +} + +/* Recursive workhorse for the following. */ + +static void +rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, tree type, + HOST_WIDE_INT startbitpos, rtx rvec[], + int *k) +{ + tree f; + + for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f)) + if (TREE_CODE (f) == FIELD_DECL) + { + HOST_WIDE_INT bitpos = startbitpos; + tree ftype = TREE_TYPE (f); + enum machine_mode mode = TYPE_MODE (ftype); + + if (DECL_SIZE (f) != 0 + && host_integerp (bit_position (f), 1)) + bitpos += int_bit_position (f); + + /* ??? FIXME: else assume zero offset. */ + + if (TREE_CODE (ftype) == RECORD_TYPE) + rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k); + else if (cum->named && USE_FP_FOR_ARG_P (cum, mode, ftype)) { - ftype = TREE_TYPE (f); - offset = DECL_FIELD_OFFSET (f); - bytepos = int_bit_position (f) / BITS_PER_UNIT; - /* Force substructs to be handled as BLKmode even if - they're small enough to be recorded as DImode, so we - drill through to non-record fields. */ - submode = TYPE_MODE (ftype); - if (TREE_CODE (ftype) == RECORD_TYPE) - submode = BLKmode; - sub = function_arg (cum, submode, ftype, named); - if (sub == NULL_RTX) - return NULL_RTX; - if (GET_CODE (sub) == PARALLEL) - { - for (i = 0; i < XVECLEN (sub, 0); i++) - { - rtx subsub = XVECEXP (sub, 0, i); - suboff = XEXP (subsub, 1); - subbytepos = INTVAL (suboff); - subbytepos += bytepos; - roffset = gen_rtx_CONST_INT (SImode, subbytepos); - subsub = XEXP (subsub, 0); - rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, subsub, roffset); - } - } - else +#if 0 + switch (mode) { - roffset = gen_rtx_CONST_INT (SImode, bytepos); - rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, sub, roffset); + case SCmode: mode = SFmode; break; + case DCmode: mode = DFmode; break; + case TCmode: mode = TFmode; break; + default: break; } - /* Now do an arg advance to get all the cumulative arg - stuff set correctly for the next subfield. Note that it - has no lasting effect, because it is being done on a - temporary copy of the cumulative arg data. */ - function_arg_advance (cum, submode, ftype, named, 1); +#endif + rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k); + rvec[(*k)++] + = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (mode, cum->fregno++), + GEN_INT (bitpos / BITS_PER_UNIT)); + if (mode == TFmode) + cum->fregno++; } - break; + else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, ftype, 1)) + { + rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k); + rvec[(*k)++] + = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (mode, cum->vregno++), + GEN_INT (bitpos / BITS_PER_UNIT)); + } + else if (cum->intoffset == -1) + cum->intoffset = bitpos; + } +} - case UNION_TYPE: - tot = rs6000_arg_size (mode, type); - if (tot <= 0) - return NULL_RTX; - bytepos = 0; +/* For the darwin64 ABI, we want to construct a PARALLEL consisting of + the register(s) to be used for each field and subfield of a struct + being passed by value, along with the offset of where the + register's value may be found in the block. FP fields go in FP + register, vector fields go in vector registers, and everything + else goes in int registers, packed as in memory. - for (j = 0; j < tot; ++j) - { - sub = gen_rtx_REG ((TARGET_64BIT ? DImode : SImode), GP_ARG_MIN_REG + cum->words++); - roffset = gen_rtx_CONST_INT (SImode, bytepos); - rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, sub, roffset); - if (cum->words >= GP_ARG_NUM_REG) - break; - bytepos += (TARGET_64BIT ? 8 : 4); - } - break; + This code is also used for function return values. RETVAL indicates + whether this is the case. - case ARRAY_TYPE: - tot = int_size_in_bytes (type); - if (tot <= 0) - return NULL_RTX; - ftype = TREE_TYPE (type); - tot /= int_size_in_bytes (ftype); - bytepos = 0; + Much of this is taken from the Sparc V9 port, which has a similar + calling convention. */ - for (j = 0; j < tot; ++j) - { - /* Force substructs to be handled as BLKmode even if - they're small enough to be recorded as DImode, so we - drill through to non-record fields. */ - submode = TYPE_MODE (ftype); - if (TREE_CODE (ftype) == RECORD_TYPE) - submode = BLKmode; - sub = function_arg (cum, submode, ftype, named); - if (sub == NULL_RTX) - return NULL_RTX; - if (GET_CODE (sub) == PARALLEL) - { - for (i = 0; i < XVECLEN (sub, 0); i++) - { - rtx subsub = XVECEXP (sub, 0, i); - - suboff = XEXP (subsub, 1); - subbytepos = INTVAL (suboff); - subbytepos += bytepos; - roffset = gen_rtx_CONST_INT (SImode, subbytepos); - subsub = XEXP (subsub, 0); - rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, subsub, roffset); - } - } - else - { - roffset = gen_rtx_CONST_INT (SImode, bytepos); - rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, sub, roffset); - } - /* Now do an arg advance to get all the cumulative arg - stuff set correctly for the next subfield. Note that it - has no lasting effect, because it is being done on a - temporary copy of the cumulative arg data. */ - function_arg_advance (cum, submode, ftype, named, 1); - bytepos += int_size_in_bytes (ftype); - } - break; - - default: - abort (); - } - - *cum = saved_cum; - if (k > 0) - return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec)); +static rtx +rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, tree type, + int named, bool retval) +{ + rtx rvec[FIRST_PSEUDO_REGISTER]; + int k = 1, kbase = 1; + HOST_WIDE_INT typesize = int_size_in_bytes (type); + /* This is a copy; modifications are not visible to our caller. */ + CUMULATIVE_ARGS copy_cum = *orig_cum; + CUMULATIVE_ARGS *cum = ©_cum; + + /* Pad to 16 byte boundary if needed. */ + if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD + && (cum->words % 2) != 0) + cum->words++; + + cum->intoffset = 0; + cum->use_stack = 0; + /* APPLE LOCAL fix 64-bit varargs 4028089 */ + cum->floats_in_gpr = 0; + cum->named = named; + + /* Put entries into rvec[] for individual FP and vector fields, and + for the chunks of memory that go in int regs. Note we start at + element 1; 0 is reserved for an indication of using memory, and + may or may not be filled in below. */ + rs6000_darwin64_record_arg_recurse (cum, type, 0, rvec, &k); + rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k); + + /* If any part of the struct went on the stack put all of it there. + This hack is because the generic code for + FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register + parts of the struct are not at the beginning. */ + if (cum->use_stack) + { + if (retval) + return NULL_RTX; /* doesn't go in registers at all */ + kbase = 0; + rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); + } + if (k > 1 || cum->use_stack) + return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase])); else return NULL_RTX; } @@ -5406,7 +5943,8 @@ rs6000_mixed_function_arg (enum machine_mode mode, tree type, int align_words) This is null for libcalls where that information may not be available. CUM is a variable of type CUMULATIVE_ARGS which gives info about - the preceding args and about the function being called. + the preceding args and about the function being called. It is + not modified in this routine. NAMED is nonzero if this argument is a named parameter (otherwise it is an extra parameter matching an ellipsis). @@ -5454,13 +5992,10 @@ function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, return GEN_INT (cum->call_cookie); } - if (mode == BLKmode - && rs6000_darwin64_abi - && (TREE_CODE (type) == RECORD_TYPE - || TREE_CODE (type) == UNION_TYPE - || TREE_CODE (type) == ARRAY_TYPE)) + if (rs6000_darwin64_abi && mode == BLKmode + && TREE_CODE (type) == RECORD_TYPE) { - rtx rslt = rs6000_darwin64_function_arg (cum, mode, type, named); + rtx rslt = rs6000_darwin64_record_arg (cum, type, named, false); if (rslt != NULL_RTX) return rslt; /* Else fall through to usual handling. */ @@ -5701,6 +6236,12 @@ rs6000_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode, && cum->nargs_prototype >= 0) return 0; + /* In this complicated case we just disable the partial_nregs code. */ + if (rs6000_darwin64_abi && mode == BLKmode + && TREE_CODE (type) == RECORD_TYPE + && int_size_in_bytes (type) > 0) + return 0; + align = function_arg_boundary (mode, type) / PARM_BOUNDARY - 1; parm_offset = TARGET_32BIT ? 2 : 0; align_words = cum->words + ((parm_offset - cum->words) & align); @@ -5716,16 +6257,16 @@ rs6000_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode, && align_words >= GP_ARG_NUM_REG)))) { if (cum->fregno + ((GET_MODE_SIZE (mode) + 7) >> 3) > FP_ARG_MAX_REG + 1) - ret = FP_ARG_MAX_REG + 1 - cum->fregno; + /* APPLE LOCAL mainline 2005-04-21 */ + ret = (FP_ARG_MAX_REG + 1 - cum->fregno) * 8; else if (cum->nargs_prototype >= 0) return 0; } if (align_words < GP_ARG_NUM_REG && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type)) - ret = GP_ARG_NUM_REG - align_words; - - ret *= (TARGET_32BIT ? 4 : 8); + /* APPLE LOCAL mainline 2005-04-21 */ + ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8); if (ret != 0 && TARGET_DEBUG_ARG) fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret); @@ -5926,6 +6467,32 @@ setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode, } } +/* APPLE LOCAL begin Altivec */ +/* This routine determins if an extra pass over argument list is needed + for vector aruments. It returns true, if current argument need be + skipped. This depends on if we are in the first iteration (to skip + vectors), or 2nd iteration (to skip non-vectors). +*/ + +static +bool skip_vec_args(tree arg_type, int pass, int *last_pass) +{ + if (DEFAULT_ABI != ABI_DARWIN) + return false; + + if (TREE_CODE (arg_type) == VECTOR_TYPE) + { + *last_pass = 2; + if (pass == 1) + return true; + } + else if (pass == 2) + return true; + return false; +} +/* APPLE LOCAL end Altivec */ + + /* Create the va_list data type. */ static tree @@ -6217,12 +6784,117 @@ rs6000_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) /* Builtins. */ +/* APPLE LOCAL begin Altivec */ #define def_builtin(MASK, NAME, TYPE, CODE) \ do { \ if ((MASK) & target_flags) \ - lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ + vector_builtin_fns[(CODE)] = lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ NULL, NULL_TREE); \ } while (0) +/* APPLE LOCAL end Altivec */ + +/* APPLE LOCAL begin AltiVec */ +/* The AltiVec PIM operations and predicates (used in Apple AltiVec mode) + are stored in ALTIVEC_PIM_TABLE below, each annotated with flags indicating + how its arguments should be matched and/or how its return type is to be + determined. */ + +enum pim_flags +{ + /* CR6 predicate modifiers. Not used for operations. For predicates, + one of the following four values shall be prepended to the argument + list as an INTEGER_CST. */ + + pim_cr6_eq = 0, /* __CR6_EQ */ + pim_cr6_ne = 1, /* __CR6_EQ_REV */ + pim_cr6_lt = 2, /* __CR6_LT */ + pim_cr6_ge = 3, /* __CR6_LT_REV */ + pim_cr6_MASK = pim_cr6_eq | pim_cr6_ne | pim_cr6_lt | pim_cr6_ge, + + /* Function overload argument matching. Operations and predicates with + multiple overload candidates will have multiple entries, listed + contiguously, in the ALTIVEC_PIM_TABLE below. When the + rs6000_fold_builtin() routine is called, it will first point at + the first entry. If any of the pim_ovl_... flags is set for this + entry, the argument(s) to rs6000_fold_builtin() will be type-checked + accordingly. If the check succeeds, the current entry will be + used to rewrite the PIM instruction into a __builtin instruction; + if the check fails, the next entry in ALTIVEC_PIM_TABLE is selected + and the pim_ovl_... type comparison is made again. */ + + pim_ovl_16 = 4, /* First argument must be a 16-element vector */ + pim_ovl_16u = 8, + pim_ovl_8 = 12, /* First argument must be an 8-element vector */ + pim_ovl_8u = 16, + pim_ovl_8p = 20, /* First argument must be a vector pixel */ + pim_ovl_4 = 24, /* First argument must be a 4-element vector */ + pim_ovl_4u = 28, + pim_ovl_4f = 32, /* First argument must be a vector float */ + pim_ovl_16u_16u = 36, /* First two args must be unsigned 16-el vectors */ + pim_ovl_8u_8u = 40, + pim_ovl_4u_4u = 44, + pim_ovl_pqi_2 = 48, /* Second argument must be a pointer to QI. */ + pim_ovl_phi_2 = 52, /* Second argument must be a pointer to HI. */ + pim_ovl_psi_2 = 56, /* Second argument must be a pointer to SI. */ + pim_ovl_MASK = pim_ovl_16 | pim_ovl_16u | pim_ovl_8 | pim_ovl_8u + | pim_ovl_8p | pim_ovl_4 | pim_ovl_4u | pim_ovl_4f + | pim_ovl_16u_16u | pim_ovl_8u_8u | pim_ovl_4u_4u + | pim_ovl_pqi_2 | pim_ovl_phi_2 | pim_ovl_psi_2, + + /* Return type computation. For some operations/predicates, the return + type is not always the same (in which case it will be stored + in the ALTIVEC_PIM_table), but rather is a function of the arguments + supplied. */ + + pim_rt_12 = 512, /* Covariant with first two arguments. */ + pim_rt_2p = 1024, /* Covariant with pointee of second argument. */ + pim_rt_1 = 1536, /* Covariant with first argument only. */ + pim_rt_1d = 2048, /* Double the vector element size of first arg. */ + pim_rt_1h = 2560, /* Halve the vector element size of first arg. */ + pim_rt_MASK = pim_rt_12 | pim_rt_2p | pim_rt_1 | pim_rt_1d | pim_rt_1h, + + /* Argument manipulation. Before the __builtin instructions are called, + the arguments may need to be rearranged. In addition, for all + predicates, one of the CR6 values will be prepended to the argument + list (see pim_cr6_... above). */ + + pim_manip_swap = 8192, /* Swap the first two arguments. */ + pim_manip_dup = 16384, /* Duplicate first argument. */ + pim_manip_MASK = pim_manip_swap | pim_manip_dup, + + /* Mark the beginning of instruction groups. For our purposes, an + instruction group is the collection of overload candidates for + a particular instruction or predicate. For example, the entries + "vec_abss", "vec_abss.2" and "vec_abss.3" defined in + altivec_init_builtins() below constitute a group, as does the + singleton "vec_addc" entry. */ + + pim_group = 32768 +}; + +struct altivec_pim_info GTY(()) +{ + tree rettype; /* Return type (unless pim_rt_... flags are used). */ + int insn; /* FUNCTION_DECL_CODE of the underlying '__builtin_...'. */ + enum pim_flags flags; /* See 'enum pim_flags' above. */ +}; + +static GTY(()) struct altivec_pim_info +altivec_pim_table[ALTIVEC_PIM__LAST - ALTIVEC_PIM__FIRST + 1]; + +#define def_pim_builtin(NAME, TYPE, INSN, FLAGS) \ +do { \ + lang_hooks.builtin_function (NAME, int_ftype_ellipsis, pim_code, \ + BUILT_IN_MD, NULL, NULL_TREE); \ + \ + altivec_pim_table[pim_code - ALTIVEC_PIM__FIRST].rettype = TYPE; \ + altivec_pim_table[pim_code - ALTIVEC_PIM__FIRST].insn \ + = ALTIVEC_BUILTIN_##INSN; \ + altivec_pim_table[pim_code - ALTIVEC_PIM__FIRST].flags = FLAGS; \ + \ + ++pim_code; \ +} while (0) +/* APPLE LOCAL end AltiVec */ /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */ @@ -6355,12 +7027,14 @@ static struct builtin_description bdesc_2arg[] = { MASK_ALTIVEC, CODE_FOR_altivec_vspltb, "__builtin_altivec_vspltb", ALTIVEC_BUILTIN_VSPLTB }, { MASK_ALTIVEC, CODE_FOR_altivec_vsplth, "__builtin_altivec_vsplth", ALTIVEC_BUILTIN_VSPLTH }, { MASK_ALTIVEC, CODE_FOR_altivec_vspltw, "__builtin_altivec_vspltw", ALTIVEC_BUILTIN_VSPLTW }, - { MASK_ALTIVEC, CODE_FOR_altivec_vsrb, "__builtin_altivec_vsrb", ALTIVEC_BUILTIN_VSRB }, - { MASK_ALTIVEC, CODE_FOR_altivec_vsrh, "__builtin_altivec_vsrh", ALTIVEC_BUILTIN_VSRH }, - { MASK_ALTIVEC, CODE_FOR_altivec_vsrw, "__builtin_altivec_vsrw", ALTIVEC_BUILTIN_VSRW }, - { MASK_ALTIVEC, CODE_FOR_altivec_vsrab, "__builtin_altivec_vsrab", ALTIVEC_BUILTIN_VSRAB }, - { MASK_ALTIVEC, CODE_FOR_altivec_vsrah, "__builtin_altivec_vsrah", ALTIVEC_BUILTIN_VSRAH }, - { MASK_ALTIVEC, CODE_FOR_altivec_vsraw, "__builtin_altivec_vsraw", ALTIVEC_BUILTIN_VSRAW }, + /* APPLE LOCAL begin mainline 2005-04-05 3972515 */ + { MASK_ALTIVEC, CODE_FOR_lshrv16qi3, "__builtin_altivec_vsrb", ALTIVEC_BUILTIN_VSRB }, + { MASK_ALTIVEC, CODE_FOR_lshrv8hi3, "__builtin_altivec_vsrh", ALTIVEC_BUILTIN_VSRH }, + { MASK_ALTIVEC, CODE_FOR_lshrv4si3, "__builtin_altivec_vsrw", ALTIVEC_BUILTIN_VSRW }, + { MASK_ALTIVEC, CODE_FOR_ashrv16qi3, "__builtin_altivec_vsrab", ALTIVEC_BUILTIN_VSRAB }, + { MASK_ALTIVEC, CODE_FOR_ashrv8hi3, "__builtin_altivec_vsrah", ALTIVEC_BUILTIN_VSRAH }, + { MASK_ALTIVEC, CODE_FOR_ashrv4si3, "__builtin_altivec_vsraw", ALTIVEC_BUILTIN_VSRAW }, + /* APPLE LOCAL end mainline 2005-04-05 3972515 */ { MASK_ALTIVEC, CODE_FOR_altivec_vsr, "__builtin_altivec_vsr", ALTIVEC_BUILTIN_VSR }, { MASK_ALTIVEC, CODE_FOR_altivec_vsro, "__builtin_altivec_vsro", ALTIVEC_BUILTIN_VSRO }, { MASK_ALTIVEC, CODE_FOR_subv16qi3, "__builtin_altivec_vsububm", ALTIVEC_BUILTIN_VSUBUBM }, @@ -6661,6 +7335,382 @@ static struct builtin_description bdesc_1arg[] = { 0, CODE_FOR_spe_evsubfusiaaw, "__builtin_spe_evsubfusiaaw", SPE_BUILTIN_EVSUBFUSIAAW }, }; +/* APPLE LOCAL begin AltiVec */ +/* Determine the return type from types T1 and T2 of the first two arguments. + This is required for some of the AltiVec PIM operations/predicates. */ + +static tree +altivec_cov_rt_12 (tree t1, tree t2) +{ + /* NB: The ordering of the following statements is important. + Matching of more specific types (e.g., 'vector pixel') should + precede matching of more general types, esp. if they subsume the + former (e.g., 'vector of 8 elements'). */ + +#define RETURN_IF_EITHER_IS(TYPE) if (t1 == TYPE || t2 == TYPE) return TYPE + + RETURN_IF_EITHER_IS (unsigned_V16QI_type_node); + RETURN_IF_EITHER_IS (V16QI_type_node); + RETURN_IF_EITHER_IS (bool_V16QI_type_node); + RETURN_IF_EITHER_IS (unsigned_V8HI_type_node); + RETURN_IF_EITHER_IS (pixel_V8HI_type_node); + RETURN_IF_EITHER_IS (V8HI_type_node); + RETURN_IF_EITHER_IS (bool_V8HI_type_node); + RETURN_IF_EITHER_IS (unsigned_V4SI_type_node); + RETURN_IF_EITHER_IS (V4SF_type_node); + RETURN_IF_EITHER_IS (V4SI_type_node); + RETURN_IF_EITHER_IS (bool_V4SI_type_node); + +#undef RETURN_IF_EITHER_IS + + return NULL_TREE; +} + +/* Determine the return type from the pointee type of argument type T. + This is required for some of the AltiVec PIM operations/predicates. */ + +static tree +altivec_cov_rt_2p (tree t) +{ + /* Must be a pointer. */ + + if (TREE_CODE (t) != POINTER_TYPE) + return NULL_TREE; + + t = TYPE_MAIN_VARIANT (TREE_TYPE (t)); + + /* For pointers to vectors, the return type is the vector itself. */ + + if (TREE_CODE (t) == VECTOR_TYPE) + return t; + + switch (TYPE_MODE (t)) + { + case QImode: + return TYPE_UNSIGNED (t) ? unsigned_V16QI_type_node : V16QI_type_node; + + case HImode: + return TYPE_UNSIGNED (t) ? unsigned_V8HI_type_node : V8HI_type_node; + + case SImode: + return TYPE_UNSIGNED (t) ? unsigned_V4SI_type_node : V4SI_type_node; + + case SFmode: + return V4SF_type_node; + + default: + return NULL_TREE; + } +} + +/* Determine the return type from type T by doubling the size of its + constituent vector elements. This is required for some of the AltiVec + PIM operations/predicates. */ + +static tree +altivec_cov_rt_1d (tree t) +{ + if (t == V16QI_type_node) + return V8HI_type_node; + else if (t == unsigned_V16QI_type_node) + return unsigned_V8HI_type_node; + else if (t == bool_V16QI_type_node) + return bool_V8HI_type_node; + else if (t == V8HI_type_node) + return V4SI_type_node; + else if (t == unsigned_V8HI_type_node || t == pixel_V8HI_type_node) + return unsigned_V4SI_type_node; + else if (t == bool_V8HI_type_node) + return bool_V4SI_type_node; + else + return NULL_TREE; /* Invalid argument. */ +} + +/* Determine the return type from type T by halving the size of its + constituent vector elements. This is required for some of the AltiVec + PIM operations/predicates. */ + +static tree +altivec_cov_rt_1h (tree t) +{ + if (t == V8HI_type_node) + return V16QI_type_node; + else if (t == unsigned_V8HI_type_node || t == pixel_V8HI_type_node) + return unsigned_V16QI_type_node; + else if (t == bool_V8HI_type_node) + return bool_V16QI_type_node; + else if (t == V4SI_type_node) + return V8HI_type_node; + else if (t == unsigned_V4SI_type_node) + return unsigned_V8HI_type_node; + else if (t == bool_V4SI_type_node) + return bool_V8HI_type_node; + else + return NULL_TREE; /* Invalid argument. */ +} + +/* Given the types T1 and T2 of the first two arguments, and INFO pointing + to the first of available overload candidates (in the ALTIVEC_PIM_TABLE) + for an AltiVec PIM operation or predicate, select a desired overload + candidate by incrementing and returning INFO as appropriate. If no + overload candidate is suitable, return NULL. */ + +static struct altivec_pim_info * +altivec_ovl_resolve (struct altivec_pim_info *info, tree t1, tree t2) +{ + /* Make sure we have all the types that we need. */ + if (!t1 || (!t2 && (info->flags & pim_ovl_MASK) >= pim_ovl_16u_16u)) + return 0; + + /* Examine overload candidates in order, and return the first one + that matches. For this scheme to work, overload candidates must + be ordered from most to least type-specific. */ + do + { + switch (info->flags & pim_ovl_MASK) + { + +#define OVL_MATCH(EXPR) if (EXPR) return info; break + + case pim_ovl_16: + OVL_MATCH (TYPE_MODE (t1) == V16QImode); + + case pim_ovl_16u: + OVL_MATCH (TYPE_MODE (t1) == V16QImode && TYPE_UNSIGNED (t1)); + + case pim_ovl_8: + OVL_MATCH (TYPE_MODE (t1) == V8HImode); + + case pim_ovl_8u: + OVL_MATCH (TYPE_MODE (t1) == V8HImode && TYPE_UNSIGNED (t1)); + + case pim_ovl_8p: + OVL_MATCH (t1 == pixel_V8HI_type_node); + + case pim_ovl_4: + OVL_MATCH (TYPE_MODE (t1) == V4SImode || TYPE_MODE (t1) == V4SFmode); + + case pim_ovl_4u: + OVL_MATCH (TYPE_MODE (t1) == V4SImode && TYPE_UNSIGNED (t1)); + + case pim_ovl_4f: + OVL_MATCH (TYPE_MODE (t1) == V4SFmode); + + case pim_ovl_16u_16u: + OVL_MATCH (t1 == unsigned_V16QI_type_node + || t2 == unsigned_V16QI_type_node); + + case pim_ovl_8u_8u: + OVL_MATCH (t1 == unsigned_V8HI_type_node + || t1 == pixel_V8HI_type_node + || t2 == unsigned_V8HI_type_node + || t2 == pixel_V8HI_type_node); + + case pim_ovl_4u_4u: + OVL_MATCH (t1 == unsigned_V4SI_type_node + || t2 == unsigned_V4SI_type_node); + + case pim_ovl_pqi_2: + OVL_MATCH (TREE_CODE (t2) == POINTER_TYPE + && (TYPE_MODE (TREE_TYPE (t2)) == QImode + || TYPE_MODE (TREE_TYPE (t2)) == V16QImode)); + + case pim_ovl_phi_2: + OVL_MATCH (TREE_CODE (t2) == POINTER_TYPE + && (TYPE_MODE (TREE_TYPE (t2)) == HImode + || TYPE_MODE (TREE_TYPE (t2)) == V8HImode)); + + case pim_ovl_psi_2: + OVL_MATCH (TREE_CODE (t2) == POINTER_TYPE + && (TYPE_MODE (TREE_TYPE (t2)) == SImode + || TYPE_MODE (TREE_TYPE (t2)) == V4SImode + || TYPE_MODE (TREE_TYPE (t2)) == SFmode + || TYPE_MODE (TREE_TYPE (t2)) == V4SFmode)); + + default: /* Catch-all. */ + return info; + +#undef OVL_MATCH + } + } + while (!((++info)->flags & pim_group)); /* Advance to next candidate. */ + + return NULL; /* No suitable overload candidate found. */ +} + +/* Convert each function argument in the ARGS list into a corresponding + type found in the TYPES list. This must be done before calling the + __builtin_... AltiVec instructions, whose declared argument types may differ + from what was passed to rs6000_fold_builtin(). */ + +static tree +altivec_convert_args (tree types, tree args) +{ + tree t, a; + + for (t = types, a = args; t && a; t = TREE_CHAIN (t), a = TREE_CHAIN (a)) + { + TREE_VALUE (a) = convert (TREE_VALUE (t), TREE_VALUE (a)); + + /* Suppress overflows, so that GIMPLE does not create temporary + variables on us. */ + if (TREE_CODE (TREE_VALUE (a)) == INTEGER_CST) + { + TREE_OVERFLOW (TREE_VALUE (a)) = 0; + TREE_CONSTANT_OVERFLOW (TREE_VALUE (a)) = 0; + } + } + + return args; +} + +/* The following function rewrites EXP by substituting AltiVec PIM operations + or predicates with built-in instructions defined above. Type casts are + provided if needed. */ + +tree +rs6000_fold_builtin (tree exp, bool ARG_UNUSED (ignore)) +{ + tree fndecl, arglist, rettype; + tree typ1 = NULL_TREE, typ2 = NULL_TREE; + int fcode, ovl_error = 0; + struct altivec_pim_info *info; + + /* Bail out if not in Apple AltiVec mode. */ + if (!rs6000_altivec_pim) + return NULL_TREE; + + fndecl = get_callee_fndecl (exp); + fcode = DECL_FUNCTION_CODE (fndecl); + + /* Bail out unless we are looking at one of the AltiVec PIM + operations/predicates. */ + + if (fcode < ALTIVEC_PIM__FIRST || fcode > ALTIVEC_PIM__LAST) + return NULL_TREE; + + /* Point at the first (and possibly only) entry in ALTIVEC_PIM_TABLE + describing this PIM operation/predicate, and how to convert it to + a __builtin_... call. */ + + info = altivec_pim_table + (fcode - ALTIVEC_PIM__FIRST); + + /* Separate out the argument types for further analysis. */ + + arglist = TREE_OPERAND (exp, 1); + + if (arglist) + typ1 = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_VALUE (arglist))); + + if (arglist && TREE_CHAIN (arglist)) + typ2 = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_VALUE (TREE_CHAIN (arglist)))); + + /* Select from a list of overloaded functions, if needed. */ + + if (info->flags & pim_ovl_MASK) + { + info = altivec_ovl_resolve (info, typ1, typ2); + + if (!info) + { + /* No suitable overload candidate was found! */ + ovl_error = 1; /* We use this to indicate error. */ + /* Point at the first overload candidate again. */ + info = altivec_pim_table + (fcode - ALTIVEC_PIM__FIRST); + } + } + + /* Determine the return type. */ + + switch (info->flags & pim_rt_MASK) + { + case pim_rt_12: + /* Return type is covariant with the first two arguments. */ + rettype = altivec_cov_rt_12 (typ1, typ2); + break; + + /* Return type is covariant with pointee of second argument. */ + case pim_rt_2p: + rettype = altivec_cov_rt_2p (typ2); + break; + + /* Return type is covariant with the first argument only. */ + case pim_rt_1: + rettype = typ1; + break; + + /* Return type is covariant with first argument, but with doubled + vector element sizes. */ + case pim_rt_1d: + rettype = altivec_cov_rt_1d (typ1); + break; + + /* Return type is covariant with first argument, but with halved + vector element sizes. */ + case pim_rt_1h: + rettype = altivec_cov_rt_1h (typ1); + break; + + default: + /* Retrieve return type to use from ALTIVEC_PIM_TABLE. */ + rettype = info->rettype; + } + + /* Rearrange arguments, as needed. */ + + switch (info->flags & pim_manip_MASK) + { + case pim_manip_swap: + if (!typ1 || !typ2) + rettype = NULL_TREE; + else + { + tree swap = TREE_VALUE (arglist); + + TREE_VALUE (arglist) = TREE_VALUE (TREE_CHAIN (arglist)); + TREE_VALUE (TREE_CHAIN (arglist)) = swap; + } + + break; + + case pim_manip_dup: + if (!typ1 || typ2) + rettype = NULL_TREE; + else + TREE_CHAIN (arglist) = tree_cons (NULL_TREE, TREE_VALUE (arglist), + NULL_TREE); + + break; + } + + /* For predicates, prepend the proper CR6 value to the argument list. */ + + if (fcode >= ALTIVEC_PIM_VEC_ALL_EQ) + arglist = tree_cons (NULL_TREE, + build_int_cst (NULL_TREE, info->flags & pim_cr6_MASK), + arglist); + + /* If we could not properly determine an overload candidate or a return type, + issue an error. */ + + if (ovl_error || !rettype) + { + error ("invalid argument(s) for AltiVec operation or predicate"); + /* Choose the return type for the first overload candidate, if + a type has been provided. Otherwise, use 'vector signed int'. */ + rettype = info->rettype ? info->rettype : V4SI_type_node; + } + + /* Retrieve the underlying AltiVec __builtin_... to call, and call it. */ + + fndecl = vector_builtin_fns [info->insn]; + arglist = altivec_convert_args (TYPE_ARG_TYPES (TREE_TYPE (fndecl)), + arglist); + + return convert (rettype, build_function_call_expr (fndecl, arglist)); +} +/* APPLE LOCAL end AltiVec */ + static rtx rs6000_expand_unop_builtin (enum insn_code icode, tree arglist, rtx target) { @@ -6680,8 +7730,20 @@ rs6000_expand_unop_builtin (enum insn_code icode, tree arglist, rtx target) if (icode == CODE_FOR_altivec_vspltisb || icode == CODE_FOR_altivec_vspltish - || icode == CODE_FOR_altivec_vspltisw - || icode == CODE_FOR_spe_evsplatfi + /* APPLE LOCAL begin 4119059 */ + || icode == CODE_FOR_altivec_vspltisw) + { + /* Only allow 5-bit *signed* literals. */ + if (GET_CODE (op0) != CONST_INT + || INTVAL (op0) > 15 + || INTVAL (op0) < -16) + { + error ("argument 1 must be a 5-bit signed literal"); + return const0_rtx; + } + } + if (icode == CODE_FOR_spe_evsplatfi + /* APPLE LOCAL end 4119059 */ || icode == CODE_FOR_spe_evsplati) { /* Only allow 5-bit *signed* literals. */ @@ -7924,6 +8986,12 @@ rs6000_init_builtins (void) altivec_init_builtins (); if (TARGET_ALTIVEC || TARGET_SPE) rs6000_common_init_builtins (); + + /* APPLE LOCAL begin constant cfstrings */ +#ifdef SUBTARGET_INIT_BUILTINS + SUBTARGET_INIT_BUILTINS; +#endif + /* APPLE LOCAL end constant cfstrings */ } /* Search through a set of builtins and enable the mask bits. @@ -8392,6 +9460,716 @@ altivec_init_builtins (void) /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */ altivec_builtin_mask_for_load = decl; } + + /* APPLE LOCAL begin AltiVec */ + /* If Apple AltiVec is enabled, we need to define additional builtins + in lieu of what <altivec.h> provides for FSF AltiVec. */ + if (rs6000_altivec_pim) + { + tree int_ftype_ellipsis = build_function_type (integer_type_node, + NULL_TREE); + int pim_code = ALTIVEC_PIM__FIRST; + + /* NB: For overloaded operations/predicates, the pim_... flags specify + how to match up the argument types and how to determine the + return type, if necessary; the rs6000_fold_builtin() routine + does all this. */ + + /* PIM Operations. */ + + gcc_assert (pim_code == ALTIVEC_PIM_VEC_ABS); + + def_pim_builtin ("vec_abs", V16QI_type_node, ABS_V16QI, pim_ovl_16 | pim_group); + def_pim_builtin ("vec_abs.2", V8HI_type_node, ABS_V8HI, pim_ovl_8); + def_pim_builtin ("vec_abs.3", V4SF_type_node, ABS_V4SF, pim_ovl_4f); + def_pim_builtin ("vec_abs.4", V4SI_type_node, ABS_V4SI, pim_ovl_4); + + def_pim_builtin ("vec_abss", V16QI_type_node, ABSS_V16QI, pim_ovl_16 | pim_group); + def_pim_builtin ("vec_abss.2", V8HI_type_node, ABSS_V8HI, pim_ovl_8); + def_pim_builtin ("vec_abss.3", V4SI_type_node, ABSS_V4SI, pim_ovl_4); + + def_pim_builtin ("vec_add", NULL_TREE, VADDUBM, pim_ovl_16 | pim_rt_12 | pim_group); + def_pim_builtin ("vec_add.2", NULL_TREE, VADDUHM, pim_ovl_8 | pim_rt_12); + def_pim_builtin ("vec_add.3", V4SF_type_node, VADDFP, pim_ovl_4f); + def_pim_builtin ("vec_add.4", NULL_TREE, VADDUWM, pim_ovl_4 | pim_rt_12); + + def_pim_builtin ("vec_addc", unsigned_V4SI_type_node, VADDCUW, pim_group); + + def_pim_builtin ("vec_adds", NULL_TREE, VADDUBS, pim_ovl_16u_16u | pim_rt_12 | pim_group); + def_pim_builtin ("vec_adds.2", NULL_TREE, VADDSBS, pim_ovl_16 | pim_rt_12); + def_pim_builtin ("vec_adds.3", NULL_TREE, VADDUHS, pim_ovl_8u_8u | pim_rt_12); + def_pim_builtin ("vec_adds.4", NULL_TREE, VADDSHS, pim_ovl_8 | pim_rt_12); + def_pim_builtin ("vec_adds.5", NULL_TREE, VADDUWS, pim_ovl_4u_4u | pim_rt_12); + def_pim_builtin ("vec_adds.6", NULL_TREE, VADDSWS, pim_ovl_4 | pim_rt_12); + + def_pim_builtin ("vec_and", NULL_TREE, VAND, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_andc", NULL_TREE, VANDC, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_avg", NULL_TREE, VAVGUB, pim_ovl_16u | pim_rt_12 | pim_group); + def_pim_builtin ("vec_avg.2", NULL_TREE, VAVGSB, pim_ovl_16 | pim_rt_12); + def_pim_builtin ("vec_avg.3", NULL_TREE, VAVGUH, pim_ovl_8u | pim_rt_12); + def_pim_builtin ("vec_avg.4", NULL_TREE, VAVGSH, pim_ovl_8 | pim_rt_12); + def_pim_builtin ("vec_avg.5", NULL_TREE, VAVGUW, pim_ovl_4u | pim_rt_12); + def_pim_builtin ("vec_avg.6", NULL_TREE, VAVGSW, pim_ovl_4 | pim_rt_12); + + def_pim_builtin ("vec_ceil", V4SF_type_node, VRFIP, pim_group); + + def_pim_builtin ("vec_cmpb", V4SI_type_node, VCMPBFP, pim_group); + + def_pim_builtin ("vec_cmpeq", bool_V16QI_type_node, VCMPEQUB, pim_ovl_16 | pim_group); + def_pim_builtin ("vec_cmpeq.2", bool_V8HI_type_node, VCMPEQUH, pim_ovl_8); + def_pim_builtin ("vec_cmpeq.3", bool_V4SI_type_node, VCMPEQFP, pim_ovl_4f); + def_pim_builtin ("vec_cmpeq.4", bool_V4SI_type_node, VCMPEQUW, pim_ovl_4); + + def_pim_builtin ("vec_cmpge", bool_V4SI_type_node, VCMPGEFP, pim_group); + + def_pim_builtin ("vec_cmpgt", bool_V16QI_type_node, VCMPGTUB, pim_ovl_16u | pim_group); + def_pim_builtin ("vec_cmpgt.2", bool_V16QI_type_node, VCMPGTSB, pim_ovl_16); + def_pim_builtin ("vec_cmpgt.3", bool_V8HI_type_node, VCMPGTUH, pim_ovl_8u); + def_pim_builtin ("vec_cmpgt.4", bool_V8HI_type_node, VCMPGTSH, pim_ovl_8); + def_pim_builtin ("vec_cmpgt.5", bool_V4SI_type_node, VCMPGTFP, pim_ovl_4f); + def_pim_builtin ("vec_cmpgt.6", bool_V4SI_type_node, VCMPGTUW, pim_ovl_4u); + def_pim_builtin ("vec_cmpgt.7", bool_V4SI_type_node, VCMPGTSW, pim_ovl_4); + + def_pim_builtin ("vec_cmple", bool_V4SI_type_node, VCMPGEFP, pim_manip_swap | pim_group); + + def_pim_builtin ("vec_cmplt", bool_V16QI_type_node, VCMPGTUB, pim_ovl_16u | pim_manip_swap | pim_group); + def_pim_builtin ("vec_cmplt.2", bool_V16QI_type_node, VCMPGTSB, pim_ovl_16 | pim_manip_swap); + def_pim_builtin ("vec_cmplt.3", bool_V8HI_type_node, VCMPGTUH, pim_ovl_8u | pim_manip_swap); + def_pim_builtin ("vec_cmplt.4", bool_V8HI_type_node, VCMPGTSH, pim_ovl_8 | pim_manip_swap); + def_pim_builtin ("vec_cmplt.5", bool_V4SI_type_node, VCMPGTFP, pim_ovl_4f | pim_manip_swap); + def_pim_builtin ("vec_cmplt.6", bool_V4SI_type_node, VCMPGTUW, pim_ovl_4u | pim_manip_swap); + def_pim_builtin ("vec_cmplt.7", bool_V4SI_type_node, VCMPGTSW, pim_ovl_4 | pim_manip_swap); + + def_pim_builtin ("vec_ctf", V4SF_type_node, VCFUX, pim_ovl_4u | pim_group); + def_pim_builtin ("vec_ctf.2", V4SF_type_node, VCFSX, pim_ovl_4); + + def_pim_builtin ("vec_cts", V4SI_type_node, VCTSXS, pim_ovl_4f | pim_group); + + def_pim_builtin ("vec_ctu", unsigned_V4SI_type_node, VCTUXS, pim_ovl_4f | pim_group); + + def_pim_builtin ("vec_dss", void_type_node, DSS, pim_group); + + def_pim_builtin ("vec_dssall", void_type_node, DSSALL, pim_group); + + def_pim_builtin ("vec_dst", void_type_node, DST, pim_group); + + def_pim_builtin ("vec_dstst", void_type_node, DSTST, pim_group); + + def_pim_builtin ("vec_dststt", void_type_node, DSTSTT, pim_group); + + def_pim_builtin ("vec_dstt", void_type_node, DSTT, pim_group); + + def_pim_builtin ("vec_expte", V4SF_type_node, VEXPTEFP, pim_group); + + def_pim_builtin ("vec_floor", V4SF_type_node, VRFIM, pim_group); + + def_pim_builtin ("vec_ld", NULL_TREE, LVX, pim_rt_2p | pim_group); + + def_pim_builtin ("vec_lde", NULL_TREE, LVEBX, pim_ovl_pqi_2 | pim_rt_2p | pim_group); + def_pim_builtin ("vec_lde.2", NULL_TREE, LVEHX, pim_ovl_phi_2 | pim_rt_2p); + def_pim_builtin ("vec_lde.3", NULL_TREE, LVEWX, pim_ovl_psi_2 | pim_rt_2p); + + def_pim_builtin ("vec_ldl", NULL_TREE, LVXL, pim_rt_2p | pim_group); + + def_pim_builtin ("vec_loge", V4SF_type_node, VLOGEFP, pim_group); + + def_pim_builtin ("vec_lvebx", NULL_TREE, LVEBX, pim_rt_2p | pim_group); + def_pim_builtin ("vec_lvehx", NULL_TREE, LVEHX, pim_rt_2p | pim_group); + def_pim_builtin ("vec_lvewx", NULL_TREE, LVEWX, pim_rt_2p | pim_group); + + def_pim_builtin ("vec_lvsl", unsigned_V16QI_type_node, LVSL, pim_group); + + def_pim_builtin ("vec_lvsr", unsigned_V16QI_type_node, LVSR, pim_group); + + def_pim_builtin ("vec_lvx", NULL_TREE, LVX, pim_rt_2p | pim_group); + + def_pim_builtin ("vec_lvxl", NULL_TREE, LVXL, pim_rt_2p | pim_group); + + def_pim_builtin ("vec_madd", V4SF_type_node, VMADDFP, pim_group); + + def_pim_builtin ("vec_madds", V8HI_type_node, VMHADDSHS, pim_group); + + def_pim_builtin ("vec_max", NULL_TREE, VMAXUB, pim_ovl_16u_16u | pim_rt_12 | pim_group); + def_pim_builtin ("vec_max.2", NULL_TREE, VMAXSB, pim_ovl_16 | pim_rt_12); + def_pim_builtin ("vec_max.3", NULL_TREE, VMAXUH, pim_ovl_8u_8u | pim_rt_12); + def_pim_builtin ("vec_max.4", NULL_TREE, VMAXSH, pim_ovl_8 | pim_rt_12); + def_pim_builtin ("vec_max.5", NULL_TREE, VMAXFP, pim_ovl_4f | pim_rt_12); + def_pim_builtin ("vec_max.6", NULL_TREE, VMAXUW, pim_ovl_4u_4u | pim_rt_12); + def_pim_builtin ("vec_max.7", NULL_TREE, VMAXSW, pim_ovl_4 | pim_rt_12); + + def_pim_builtin ("vec_mergeh", NULL_TREE, VMRGHB, pim_ovl_16 | pim_rt_12 | pim_group); + def_pim_builtin ("vec_mergeh.2", NULL_TREE, VMRGHH, pim_ovl_8 | pim_rt_12); + def_pim_builtin ("vec_mergeh.3", NULL_TREE, VMRGHW, pim_ovl_4 | pim_rt_12); + + def_pim_builtin ("vec_mergel", NULL_TREE, VMRGLB, pim_ovl_16 | pim_rt_12 | pim_group); + def_pim_builtin ("vec_mergel.2", NULL_TREE, VMRGLH, pim_ovl_8 | pim_rt_12); + def_pim_builtin ("vec_mergel.3", NULL_TREE, VMRGLW, pim_ovl_4 | pim_rt_12); + + def_pim_builtin ("vec_mfvscr", unsigned_V8HI_type_node, MFVSCR, pim_group); + + def_pim_builtin ("vec_min", NULL_TREE, VMINUB, pim_ovl_16u_16u | pim_rt_12 | pim_group); + def_pim_builtin ("vec_min.2", NULL_TREE, VMINSB, pim_ovl_16 | pim_rt_12); + def_pim_builtin ("vec_min.3", NULL_TREE, VMINUH, pim_ovl_8u_8u | pim_rt_12); + def_pim_builtin ("vec_min.4", NULL_TREE, VMINSH, pim_ovl_8 | pim_rt_12); + def_pim_builtin ("vec_min.5", NULL_TREE, VMINFP, pim_ovl_4f | pim_rt_12); + def_pim_builtin ("vec_min.6", NULL_TREE, VMINUW, pim_ovl_4u_4u | pim_rt_12); + def_pim_builtin ("vec_min.7", NULL_TREE, VMINSW, pim_ovl_4 | pim_rt_12); + + def_pim_builtin ("vec_mladd", unsigned_V8HI_type_node, VMLADDUHM, pim_ovl_8u_8u | pim_group); + def_pim_builtin ("vec_mladd.2", V8HI_type_node, VMLADDUHM, pim_ovl_8); + + def_pim_builtin ("vec_mradds", V8HI_type_node, VMHRADDSHS, pim_group); + + def_pim_builtin ("vec_msum", unsigned_V4SI_type_node, VMSUMUBM, pim_ovl_16u | pim_group); + def_pim_builtin ("vec_msum.2", V4SI_type_node, VMSUMMBM, pim_ovl_16); + def_pim_builtin ("vec_msum.3", unsigned_V4SI_type_node, VMSUMUHM, pim_ovl_8u); + def_pim_builtin ("vec_msum.4", V4SI_type_node, VMSUMSHM, pim_ovl_8); + + def_pim_builtin ("vec_msums", unsigned_V4SI_type_node, VMSUMUHS, pim_ovl_8u | pim_group); + def_pim_builtin ("vec_msums.2", V4SI_type_node, VMSUMSHS, pim_ovl_8); + + def_pim_builtin ("vec_mtvscr", void_type_node, MTVSCR, pim_group); + + def_pim_builtin ("vec_mule", unsigned_V8HI_type_node, VMULEUB, pim_ovl_16u | pim_group); + def_pim_builtin ("vec_mule.2", V8HI_type_node, VMULESB, pim_ovl_16); + def_pim_builtin ("vec_mule.3", unsigned_V4SI_type_node, VMULEUH, pim_ovl_8u); + def_pim_builtin ("vec_mule.4", V4SI_type_node, VMULESH, pim_ovl_8); + + def_pim_builtin ("vec_mulo", unsigned_V8HI_type_node, VMULOUB, pim_ovl_16u | pim_group); + def_pim_builtin ("vec_mulo.2", V8HI_type_node, VMULOSB, pim_ovl_16); + def_pim_builtin ("vec_mulo.3", unsigned_V4SI_type_node, VMULOUH, pim_ovl_8u); + def_pim_builtin ("vec_mulo.4", V4SI_type_node, VMULOSH, pim_ovl_8); + + def_pim_builtin ("vec_nmsub", V4SF_type_node, VNMSUBFP, pim_group); + + def_pim_builtin ("vec_nor", NULL_TREE, VNOR, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_or", NULL_TREE, VOR, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_pack", NULL_TREE, VPKUHUM, pim_ovl_8 | pim_rt_1h | pim_group); + def_pim_builtin ("vec_pack.2", NULL_TREE, VPKUWUM, pim_ovl_4 | pim_rt_1h); + + def_pim_builtin ("vec_packpx", pixel_V8HI_type_node, VPKPX, pim_group); + + def_pim_builtin ("vec_packs", unsigned_V16QI_type_node, VPKUHUS, pim_ovl_8u | pim_group); + def_pim_builtin ("vec_packs.2", V16QI_type_node, VPKSHSS, pim_ovl_8); + def_pim_builtin ("vec_packs.3", unsigned_V8HI_type_node, VPKUWUS, pim_ovl_4u); + def_pim_builtin ("vec_packs.4", V8HI_type_node, VPKSWSS, pim_ovl_4); + + def_pim_builtin ("vec_packsu", unsigned_V16QI_type_node, VPKUHUS, pim_ovl_8u | pim_group); + def_pim_builtin ("vec_packsu.2", unsigned_V16QI_type_node, VPKSHUS, pim_ovl_8); + def_pim_builtin ("vec_packsu.3", unsigned_V8HI_type_node, VPKUWUS, pim_ovl_4u); + def_pim_builtin ("vec_packsu.4", unsigned_V8HI_type_node, VPKSWUS, pim_ovl_4); + + def_pim_builtin ("vec_perm", V16QI_type_node, VPERM_4SI, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_re", V4SF_type_node, VREFP, pim_group); + + def_pim_builtin ("vec_rl", NULL_TREE, VRLB, pim_ovl_16 | pim_rt_1 | pim_group); + def_pim_builtin ("vec_rl.2", NULL_TREE, VRLH, pim_ovl_8 | pim_rt_1); + def_pim_builtin ("vec_rl.3", NULL_TREE, VRLW, pim_ovl_4 | pim_rt_1); + + def_pim_builtin ("vec_round", V4SF_type_node, VRFIN, pim_group); + + def_pim_builtin ("vec_rsqrte", V4SF_type_node, VRSQRTEFP, pim_group); + + def_pim_builtin ("vec_sel", NULL_TREE, VSEL_4SI, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_sl", NULL_TREE, VSLB, pim_ovl_16 | pim_rt_1 | pim_group); + def_pim_builtin ("vec_sl.2", NULL_TREE, VSLH, pim_ovl_8 | pim_rt_1); + def_pim_builtin ("vec_sl.3", NULL_TREE, VSLW, pim_ovl_4 | pim_rt_1); + + def_pim_builtin ("vec_sld", NULL_TREE, VSLDOI_4SI, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_sll", NULL_TREE, VSL, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_slo", NULL_TREE, VSLO, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_splat", NULL_TREE, VSPLTB, pim_ovl_16 | pim_rt_1 | pim_group); + def_pim_builtin ("vec_splat.2", NULL_TREE, VSPLTH, pim_ovl_8 | pim_rt_1); + def_pim_builtin ("vec_splat.3", NULL_TREE, VSPLTW, pim_ovl_4 | pim_rt_1); + + def_pim_builtin ("vec_splat_s8", V16QI_type_node, VSPLTISB, pim_group); + + def_pim_builtin ("vec_splat_s16", V8HI_type_node, VSPLTISH, pim_group); + + def_pim_builtin ("vec_splat_s32", V4SI_type_node, VSPLTISW, pim_group); + + def_pim_builtin ("vec_splat_u8", unsigned_V16QI_type_node, VSPLTISB, pim_group); + + def_pim_builtin ("vec_splat_u16", unsigned_V8HI_type_node, VSPLTISH, pim_group); + + def_pim_builtin ("vec_splat_u32", unsigned_V4SI_type_node, VSPLTISW, pim_group); + + def_pim_builtin ("vec_sr", NULL_TREE, VSRB, pim_ovl_16 | pim_rt_1 | pim_group); + def_pim_builtin ("vec_sr.2", NULL_TREE, VSRH, pim_ovl_8 | pim_rt_1); + def_pim_builtin ("vec_sr.3", NULL_TREE, VSRW, pim_ovl_4 | pim_rt_1); + + def_pim_builtin ("vec_sra", NULL_TREE, VSRAB, pim_ovl_16 | pim_rt_1 | pim_group); + def_pim_builtin ("vec_sra.2", NULL_TREE, VSRAH, pim_ovl_8 | pim_rt_1); + def_pim_builtin ("vec_sra.3", NULL_TREE, VSRAW, pim_ovl_4 | pim_rt_1); + + def_pim_builtin ("vec_srl", NULL_TREE, VSR, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_sro", NULL_TREE, VSRO, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_st", void_type_node, STVX, pim_group); + + def_pim_builtin ("vec_ste", void_type_node, STVEBX, pim_ovl_16 | pim_group); + def_pim_builtin ("vec_ste.2", void_type_node, STVEHX, pim_ovl_8); + def_pim_builtin ("vec_ste.3", void_type_node, STVEWX, pim_ovl_4); + + def_pim_builtin ("vec_stl", void_type_node, STVXL, pim_group); + + def_pim_builtin ("vec_stvebx", void_type_node, STVEBX, pim_group); + def_pim_builtin ("vec_stvehx", void_type_node, STVEHX, pim_group); + def_pim_builtin ("vec_stvewx", void_type_node, STVEWX, pim_group); + + def_pim_builtin ("vec_stvx", void_type_node, STVX, pim_group); + + def_pim_builtin ("vec_stvxl", void_type_node, STVXL, pim_group); + + def_pim_builtin ("vec_sub", NULL_TREE, VSUBUBM, pim_ovl_16 | pim_rt_12 | pim_group); + def_pim_builtin ("vec_sub.2", NULL_TREE, VSUBUHM, pim_ovl_8 | pim_rt_12); + def_pim_builtin ("vec_sub.3", NULL_TREE, VSUBFP, pim_ovl_4f | pim_rt_12); + def_pim_builtin ("vec_sub.4", NULL_TREE, VSUBUWM, pim_ovl_4 | pim_rt_12); + + def_pim_builtin ("vec_subc", unsigned_V4SI_type_node, VSUBCUW, pim_group); + + def_pim_builtin ("vec_subs", NULL_TREE, VSUBUBS, pim_ovl_16u_16u | pim_rt_12 | pim_group); + def_pim_builtin ("vec_subs.2", NULL_TREE, VSUBSBS, pim_ovl_16 | pim_rt_12); + def_pim_builtin ("vec_subs.3", NULL_TREE, VSUBUHS, pim_ovl_8u_8u | pim_rt_12); + def_pim_builtin ("vec_subs.4", NULL_TREE, VSUBSHS, pim_ovl_8 | pim_rt_12); + def_pim_builtin ("vec_subs.5", NULL_TREE, VSUBUWS, pim_ovl_4u_4u | pim_rt_12); + def_pim_builtin ("vec_subs.6", NULL_TREE, VSUBSWS, pim_ovl_4 | pim_rt_12); + + def_pim_builtin ("vec_sum4s", unsigned_V4SI_type_node, VSUM4UBS, pim_ovl_16u | pim_group); + def_pim_builtin ("vec_sum4s.2", V4SI_type_node, VSUM4SBS, pim_ovl_16); + def_pim_builtin ("vec_sum4s.3", V4SI_type_node, VSUM4SHS, pim_ovl_8); + + def_pim_builtin ("vec_sum2s", V4SI_type_node, VSUM2SWS, pim_group); + + def_pim_builtin ("vec_sums", V4SI_type_node, VSUMSWS, pim_group); + + def_pim_builtin ("vec_trunc", V4SF_type_node, VRFIZ, pim_group); + + def_pim_builtin ("vec_unpackh", NULL_TREE, VUPKHSB, pim_ovl_16 | pim_rt_1d | pim_group); + def_pim_builtin ("vec_unpackh.2", NULL_TREE, VUPKHPX, pim_ovl_8p | pim_rt_1d); + def_pim_builtin ("vec_unpackh.3", NULL_TREE, VUPKHSH, pim_ovl_8 | pim_rt_1d); + + def_pim_builtin ("vec_unpackl", NULL_TREE, VUPKLSB, pim_ovl_16 | pim_rt_1d | pim_group); + def_pim_builtin ("vec_unpackl.2", NULL_TREE, VUPKLPX, pim_ovl_8p | pim_rt_1d); + def_pim_builtin ("vec_unpackl.3", NULL_TREE, VUPKLSH, pim_ovl_8 | pim_rt_1d); + + gcc_assert (pim_code == ALTIVEC_PIM_VEC_VADDCUW); + + def_pim_builtin ("vec_vaddcuw", unsigned_V4SI_type_node, VADDCUW, pim_group); + + def_pim_builtin ("vec_vaddfp", V4SF_type_node, VADDFP, pim_group); + + def_pim_builtin ("vec_vaddsbs", NULL_TREE, VADDSBS, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vaddshs", NULL_TREE, VADDSHS, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vaddsws", NULL_TREE, VADDSWS, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vaddubm", NULL_TREE, VADDUBM, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vaddubs", NULL_TREE, VADDUBS, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vadduhm", NULL_TREE, VADDUHM, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vadduhs", NULL_TREE, VADDUHS, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vadduwm", NULL_TREE, VADDUWM, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vadduws", NULL_TREE, VADDUWS, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vand", NULL_TREE, VAND, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vandc", NULL_TREE, VANDC, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vavgsb", NULL_TREE, VAVGSB, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vavgsh", NULL_TREE, VAVGSH, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vavgsw", NULL_TREE, VAVGSW, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vavgub", NULL_TREE, VAVGUB, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vavguh", NULL_TREE, VAVGUH, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vavguw", NULL_TREE, VAVGUW, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vcfsx", V4SF_type_node, VCFSX, pim_group); + + def_pim_builtin ("vec_vcfux", V4SF_type_node, VCFUX, pim_group); + + def_pim_builtin ("vec_vcmpbfp", V4SI_type_node, VCMPBFP, pim_group); + + def_pim_builtin ("vec_vcmpeqfp", bool_V4SI_type_node, VCMPEQFP, pim_group); + + def_pim_builtin ("vec_vcmpequb", bool_V16QI_type_node, VCMPEQUB, pim_group); + + def_pim_builtin ("vec_vcmpequh", bool_V8HI_type_node, VCMPEQUH, pim_group); + + def_pim_builtin ("vec_vcmpequw", bool_V4SI_type_node, VCMPEQUW, pim_group); + + def_pim_builtin ("vec_vcmpgefp", bool_V4SI_type_node, VCMPGEFP, pim_group); + + def_pim_builtin ("vec_vcmpgtfp", bool_V4SI_type_node, VCMPGTFP, pim_group); + + def_pim_builtin ("vec_vcmpgtsb", bool_V16QI_type_node, VCMPGTSB, pim_group); + + def_pim_builtin ("vec_vcmpgtsh", bool_V8HI_type_node, VCMPGTSH, pim_group); + + def_pim_builtin ("vec_vcmpgtsw", bool_V4SI_type_node, VCMPGTSW, pim_group); + + def_pim_builtin ("vec_vcmpgtub", bool_V16QI_type_node, VCMPGTUB, pim_group); + + def_pim_builtin ("vec_vcmpgtuh", bool_V8HI_type_node, VCMPGTUH, pim_group); + + def_pim_builtin ("vec_vcmpgtuw", bool_V4SI_type_node, VCMPGTUW, pim_group); + + def_pim_builtin ("vec_vctsxs", V4SI_type_node, VCTSXS, pim_group); + + def_pim_builtin ("vec_vctuxs", unsigned_V4SI_type_node, VCTUXS, pim_group); + + def_pim_builtin ("vec_vexptefp", V4SF_type_node, VEXPTEFP, pim_group); + + def_pim_builtin ("vec_vlogefp", V4SF_type_node, VLOGEFP, pim_group); + + def_pim_builtin ("vec_vmaddfp", V4SF_type_node, VMADDFP, pim_group); + + def_pim_builtin ("vec_vmaxfp", NULL_TREE, VMAXFP, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vmaxsb", NULL_TREE, VMAXSB, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vmaxsh", NULL_TREE, VMAXSH, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vmaxsw", NULL_TREE, VMAXSW, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vmaxub", NULL_TREE, VMAXUB, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vmaxuh", NULL_TREE, VMAXUH, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vmaxuw", NULL_TREE, VMAXUW, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vmhaddshs", V8HI_type_node, VMHADDSHS, pim_group); + + def_pim_builtin ("vec_vmhraddshs", V8HI_type_node, VMHRADDSHS, pim_group); + + def_pim_builtin ("vec_vminfp", NULL_TREE, VMINFP, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vminsb", NULL_TREE, VMINSB, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vminsh", NULL_TREE, VMINSH, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vminsw", NULL_TREE, VMINSW, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vminub", NULL_TREE, VMINUB, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vminuh", NULL_TREE, VMINUH, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vminuw", NULL_TREE, VMINUW, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vmladduhm", NULL_TREE, VMLADDUHM, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vmrghb", NULL_TREE, VMRGHB, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vmrghh", NULL_TREE, VMRGHH, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vmrghw", NULL_TREE, VMRGHW, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vmrglb", NULL_TREE, VMRGLB, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vmrglh", NULL_TREE, VMRGLH, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vmrglw", NULL_TREE, VMRGLW, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vmsummbm", V4SI_type_node, VMSUMMBM, pim_group); + + def_pim_builtin ("vec_vmsumshm", V4SI_type_node, VMSUMSHM, pim_group); + + def_pim_builtin ("vec_vmsumshs", V4SI_type_node, VMSUMSHS, pim_group); + + def_pim_builtin ("vec_vmsumubm", unsigned_V4SI_type_node, VMSUMUBM, pim_group); + + def_pim_builtin ("vec_vmsumuhm", unsigned_V4SI_type_node, VMSUMUHM, pim_group); + + def_pim_builtin ("vec_vmsumuhs", unsigned_V4SI_type_node, VMSUMUHS, pim_group); + + def_pim_builtin ("vec_vmulesb", V8HI_type_node, VMULESB, pim_group); + + def_pim_builtin ("vec_vmulesh", V4SI_type_node, VMULESH, pim_group); + + def_pim_builtin ("vec_vmuleub", unsigned_V8HI_type_node, VMULEUB, pim_group); + + def_pim_builtin ("vec_vmuleuh", unsigned_V4SI_type_node, VMULEUH, pim_group); + + def_pim_builtin ("vec_vmulosb", V8HI_type_node, VMULOSB, pim_group); + + def_pim_builtin ("vec_vmulosh", V4SI_type_node, VMULOSH, pim_group); + + def_pim_builtin ("vec_vmuloub", unsigned_V8HI_type_node, VMULOUB, pim_group); + + def_pim_builtin ("vec_vmulouh", unsigned_V4SI_type_node, VMULOUH, pim_group); + + def_pim_builtin ("vec_vnmsubfp", V4SF_type_node, VNMSUBFP, pim_group); + + def_pim_builtin ("vec_vnor", NULL_TREE, VNOR, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vor", NULL_TREE, VOR, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vperm", V16QI_type_node, VPERM_4SI, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vpkpx", pixel_V8HI_type_node, VPKPX, pim_group); + + def_pim_builtin ("vec_vpkshss", V16QI_type_node, VPKSHSS, pim_group); + + def_pim_builtin ("vec_vpkshus", unsigned_V16QI_type_node, VPKSHUS, pim_group); + + def_pim_builtin ("vec_vpkswss", V8HI_type_node, VPKSWSS, pim_group); + + def_pim_builtin ("vec_vpkswus", unsigned_V8HI_type_node, VPKSWUS, pim_group); + + def_pim_builtin ("vec_vpkuhum", NULL_TREE, VPKUHUM, pim_rt_1h | pim_group); + + def_pim_builtin ("vec_vpkuhus", unsigned_V16QI_type_node, VPKUHUS, pim_group); + + def_pim_builtin ("vec_vpkuwum", NULL_TREE, VPKUWUM, pim_rt_1h | pim_group); + + def_pim_builtin ("vec_vpkuwus", unsigned_V8HI_type_node, VPKUWUS, pim_group); + + def_pim_builtin ("vec_vrefp", V4SF_type_node, VREFP, pim_group); + + def_pim_builtin ("vec_vrfim", V4SF_type_node, VRFIM, pim_group); + + def_pim_builtin ("vec_vrfin", V4SF_type_node, VRFIN, pim_group); + + def_pim_builtin ("vec_vrfip", V4SF_type_node, VRFIP, pim_group); + + def_pim_builtin ("vec_vrfiz", V4SF_type_node, VRFIZ, pim_group); + + def_pim_builtin ("vec_vrlb", NULL_TREE, VRLB, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vrlh", NULL_TREE, VRLH, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vrlw", NULL_TREE, VRLW, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vrsqrtefp", V4SF_type_node, VRSQRTEFP, pim_group); + + def_pim_builtin ("vec_vsel", NULL_TREE, VSEL_4SI, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vsl", NULL_TREE, VSL, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vslb", NULL_TREE, VSLB, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vsldoi", NULL_TREE, VSLDOI_4SI, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vslh", NULL_TREE, VSLH, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vslo", NULL_TREE, VSLO, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vslw", NULL_TREE, VSLW, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vspltb", NULL_TREE, VSPLTB, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vsplth", NULL_TREE, VSPLTH, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vspltisb", V16QI_type_node, VSPLTISB, pim_group); + + def_pim_builtin ("vec_vspltish", V8HI_type_node, VSPLTISH, pim_group); + + def_pim_builtin ("vec_vspltisw", V4SI_type_node, VSPLTISW, pim_group); + + def_pim_builtin ("vec_vspltw", NULL_TREE, VSPLTW, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vsr", NULL_TREE, VSR, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vsrab", NULL_TREE, VSRAB, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vsrah", NULL_TREE, VSRAH, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vsraw", NULL_TREE, VSRAW, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vsrb", NULL_TREE, VSRB, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vsrh", NULL_TREE, VSRH, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vsro", NULL_TREE, VSRO, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vsrw", NULL_TREE, VSRW, pim_rt_1 | pim_group); + + def_pim_builtin ("vec_vsubcuw", unsigned_V4SI_type_node, VSUBCUW, pim_group); + + def_pim_builtin ("vec_vsubfp", NULL_TREE, VSUBFP, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vsubsbs", NULL_TREE, VSUBSBS, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vsubshs", NULL_TREE, VSUBSHS, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vsubsws", NULL_TREE, VSUBSWS, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vsububm", NULL_TREE, VSUBUBM, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vsububs", NULL_TREE, VSUBUBS, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vsubuhm", NULL_TREE, VSUBUHM, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vsubuhs", NULL_TREE, VSUBUHS, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vsubuwm", NULL_TREE, VSUBUWM, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vsubuws", NULL_TREE, VSUBUWS, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_vsum4sbs", V4SI_type_node, VSUM4SBS, pim_group); + + def_pim_builtin ("vec_vsum4shs", V4SI_type_node, VSUM4SHS, pim_group); + + def_pim_builtin ("vec_vsum4ubs", unsigned_V4SI_type_node, VSUM4UBS, pim_group); + + def_pim_builtin ("vec_vsum2sws", V4SI_type_node, VSUM2SWS, pim_group); + + def_pim_builtin ("vec_vsumsws", V4SI_type_node, VSUMSWS, pim_group); + + def_pim_builtin ("vec_vupkhpx", NULL_TREE, VUPKHPX, pim_rt_1d | pim_group); + + def_pim_builtin ("vec_vupkhsb", NULL_TREE, VUPKHSB, pim_rt_1d | pim_group); + + def_pim_builtin ("vec_vupkhsh", NULL_TREE, VUPKHSH, pim_rt_1d | pim_group); + + def_pim_builtin ("vec_vupklpx", NULL_TREE, VUPKLPX, pim_rt_1d | pim_group); + + def_pim_builtin ("vec_vupklsb", NULL_TREE, VUPKLSB, pim_rt_1d | pim_group); + + def_pim_builtin ("vec_vupklsh", NULL_TREE, VUPKLSH, pim_rt_1d | pim_group); + + def_pim_builtin ("vec_vxor", NULL_TREE, VXOR, pim_rt_12 | pim_group); + + def_pim_builtin ("vec_xor", NULL_TREE, VXOR, pim_rt_12 | pim_group); + + /* PIM Predicates. */ + + gcc_assert (pim_code == ALTIVEC_PIM_VEC_ALL_EQ); + + def_pim_builtin ("vec_all_eq", integer_type_node, VCMPEQUB_P, pim_ovl_16 | pim_cr6_lt | pim_group); + def_pim_builtin ("vec_all_eq.2", integer_type_node, VCMPEQUH_P, pim_ovl_8 | pim_cr6_lt); + def_pim_builtin ("vec_all_eq.3", integer_type_node, VCMPEQFP_P, pim_ovl_4f | pim_cr6_lt); + def_pim_builtin ("vec_all_eq.4", integer_type_node, VCMPEQUW_P, pim_ovl_4 | pim_cr6_lt); + + def_pim_builtin ("vec_all_ge", integer_type_node, VCMPGTUB_P, pim_ovl_16u_16u | pim_manip_swap | pim_cr6_eq | pim_group); + def_pim_builtin ("vec_all_ge.2", integer_type_node, VCMPGTSB_P, pim_ovl_16 | pim_manip_swap | pim_cr6_eq); + def_pim_builtin ("vec_all_ge.3", integer_type_node, VCMPGTUH_P, pim_ovl_8u_8u | pim_manip_swap | pim_cr6_eq); + def_pim_builtin ("vec_all_ge.4", integer_type_node, VCMPGTSH_P, pim_ovl_8 | pim_manip_swap | pim_cr6_eq); + def_pim_builtin ("vec_all_ge.5", integer_type_node, VCMPGEFP_P, pim_ovl_4f | pim_cr6_lt); + def_pim_builtin ("vec_all_ge.6", integer_type_node, VCMPGTUW_P, pim_ovl_4u_4u | pim_manip_swap | pim_cr6_eq); + def_pim_builtin ("vec_all_ge.7", integer_type_node, VCMPGTSW_P, pim_ovl_4 | pim_manip_swap | pim_cr6_eq); + + def_pim_builtin ("vec_all_gt", integer_type_node, VCMPGTUB_P, pim_ovl_16u_16u | pim_cr6_lt | pim_group); + def_pim_builtin ("vec_all_gt.2", integer_type_node, VCMPGTSB_P, pim_ovl_16 | pim_cr6_lt); + def_pim_builtin ("vec_all_gt.3", integer_type_node, VCMPGTUH_P, pim_ovl_8u_8u | pim_cr6_lt); + def_pim_builtin ("vec_all_gt.4", integer_type_node, VCMPGTSH_P, pim_ovl_8 | pim_cr6_lt); + def_pim_builtin ("vec_all_gt.5", integer_type_node, VCMPGTFP_P, pim_ovl_4f | pim_cr6_lt); + def_pim_builtin ("vec_all_gt.6", integer_type_node, VCMPGTUW_P, pim_ovl_4u_4u | pim_cr6_lt); + def_pim_builtin ("vec_all_gt.7", integer_type_node, VCMPGTSW_P, pim_ovl_4 | pim_cr6_lt); + + def_pim_builtin ("vec_all_in", integer_type_node, VCMPBFP_P, pim_cr6_eq | pim_group); + + def_pim_builtin ("vec_all_le", integer_type_node, VCMPGTUB_P, pim_ovl_16u_16u | pim_cr6_eq | pim_group); + def_pim_builtin ("vec_all_le.2", integer_type_node, VCMPGTSB_P, pim_ovl_16 | pim_cr6_eq); + def_pim_builtin ("vec_all_le.3", integer_type_node, VCMPGTUH_P, pim_ovl_8u_8u | pim_cr6_eq); + def_pim_builtin ("vec_all_le.4", integer_type_node, VCMPGTSH_P, pim_ovl_8 | pim_cr6_eq); + def_pim_builtin ("vec_all_le.5", integer_type_node, VCMPGEFP_P, pim_ovl_4f | pim_manip_swap | pim_cr6_lt); + def_pim_builtin ("vec_all_le.6", integer_type_node, VCMPGTUW_P, pim_ovl_4u_4u | pim_cr6_eq); + def_pim_builtin ("vec_all_le.7", integer_type_node, VCMPGTSW_P, pim_ovl_4 | pim_cr6_eq); + + def_pim_builtin ("vec_all_lt", integer_type_node, VCMPGTUB_P, pim_ovl_16u_16u | pim_manip_swap | pim_cr6_lt | pim_group); + def_pim_builtin ("vec_all_lt.2", integer_type_node, VCMPGTSB_P, pim_ovl_16 | pim_manip_swap | pim_cr6_lt); + def_pim_builtin ("vec_all_lt.3", integer_type_node, VCMPGTUH_P, pim_ovl_8u_8u | pim_manip_swap | pim_cr6_lt); + def_pim_builtin ("vec_all_lt.4", integer_type_node, VCMPGTSH_P, pim_ovl_8 | pim_manip_swap | pim_cr6_lt); + def_pim_builtin ("vec_all_lt.5", integer_type_node, VCMPGTFP_P, pim_ovl_4f | pim_manip_swap | pim_cr6_lt); + def_pim_builtin ("vec_all_lt.6", integer_type_node, VCMPGTUW_P, pim_ovl_4u_4u | pim_manip_swap | pim_cr6_lt); + def_pim_builtin ("vec_all_lt.7", integer_type_node, VCMPGTSW_P, pim_ovl_4 | pim_manip_swap | pim_cr6_lt); + + def_pim_builtin ("vec_all_nan", integer_type_node, VCMPEQFP_P, pim_manip_dup | pim_cr6_eq | pim_group); + + def_pim_builtin ("vec_all_ne", integer_type_node, VCMPEQUB_P, pim_ovl_16 | pim_cr6_eq | pim_group); + def_pim_builtin ("vec_all_ne.2", integer_type_node, VCMPEQUH_P, pim_ovl_8 | pim_cr6_eq); + def_pim_builtin ("vec_all_ne.3", integer_type_node, VCMPEQFP_P, pim_ovl_4f | pim_cr6_eq); + def_pim_builtin ("vec_all_ne.4", integer_type_node, VCMPEQUW_P, pim_ovl_4 | pim_cr6_eq); + + def_pim_builtin ("vec_all_nge", integer_type_node, VCMPGEFP_P, pim_cr6_eq | pim_group); + + def_pim_builtin ("vec_all_ngt", integer_type_node, VCMPGTFP_P, pim_cr6_eq | pim_group); + + def_pim_builtin ("vec_all_nle", integer_type_node, VCMPGEFP_P, pim_manip_swap | pim_cr6_eq | pim_group); + + def_pim_builtin ("vec_all_nlt", integer_type_node, VCMPGEFP_P, pim_manip_swap | pim_cr6_eq | pim_group); + + def_pim_builtin ("vec_all_numeric", integer_type_node, VCMPEQFP_P, pim_manip_dup | pim_cr6_lt | pim_group); + + def_pim_builtin ("vec_any_eq", integer_type_node, VCMPEQUB_P, pim_ovl_16 | pim_cr6_ne | pim_group); + def_pim_builtin ("vec_any_eq.2", integer_type_node, VCMPEQUH_P, pim_ovl_8 | pim_cr6_ne); + def_pim_builtin ("vec_any_eq.3", integer_type_node, VCMPEQFP_P, pim_ovl_4f | pim_cr6_ne); + def_pim_builtin ("vec_any_eq.4", integer_type_node, VCMPEQUW_P, pim_ovl_4 | pim_cr6_ne); + + def_pim_builtin ("vec_any_ge", integer_type_node, VCMPGTUB_P, pim_ovl_16u_16u | pim_manip_swap | pim_cr6_ge | pim_group); + def_pim_builtin ("vec_any_ge.2", integer_type_node, VCMPGTSB_P, pim_ovl_16 | pim_manip_swap | pim_cr6_ge); + def_pim_builtin ("vec_any_ge.3", integer_type_node, VCMPGTUH_P, pim_ovl_8u_8u | pim_manip_swap | pim_cr6_ge); + def_pim_builtin ("vec_any_ge.4", integer_type_node, VCMPGTSH_P, pim_ovl_8 | pim_manip_swap | pim_cr6_ge); + def_pim_builtin ("vec_any_ge.5", integer_type_node, VCMPGEFP_P, pim_ovl_4f | pim_cr6_ne); + def_pim_builtin ("vec_any_ge.6", integer_type_node, VCMPGTUW_P, pim_ovl_4u_4u | pim_manip_swap | pim_cr6_ge); + def_pim_builtin ("vec_any_ge.7", integer_type_node, VCMPGTSW_P, pim_ovl_4 | pim_manip_swap | pim_cr6_ge); + + def_pim_builtin ("vec_any_gt", integer_type_node, VCMPGTUB_P, pim_ovl_16u_16u | pim_cr6_ne | pim_group); + def_pim_builtin ("vec_any_gt.2", integer_type_node, VCMPGTSB_P, pim_ovl_16 | pim_cr6_ne); + def_pim_builtin ("vec_any_gt.3", integer_type_node, VCMPGTUH_P, pim_ovl_8u_8u | pim_cr6_ne); + def_pim_builtin ("vec_any_gt.4", integer_type_node, VCMPGTSH_P, pim_ovl_8 | pim_cr6_ne); + def_pim_builtin ("vec_any_gt.5", integer_type_node, VCMPGTFP_P, pim_ovl_4f | pim_cr6_ne); + def_pim_builtin ("vec_any_gt.6", integer_type_node, VCMPGTUW_P, pim_ovl_4u_4u | pim_cr6_ne); + def_pim_builtin ("vec_any_gt.7", integer_type_node, VCMPGTSW_P, pim_ovl_4 | pim_cr6_ne); + + def_pim_builtin ("vec_any_le", integer_type_node, VCMPGTUB_P, pim_ovl_16u_16u | pim_cr6_ge | pim_group); + def_pim_builtin ("vec_any_le.2", integer_type_node, VCMPGTSB_P, pim_ovl_16 | pim_cr6_ge); + def_pim_builtin ("vec_any_le.3", integer_type_node, VCMPGTUH_P, pim_ovl_8u_8u | pim_cr6_ge); + def_pim_builtin ("vec_any_le.4", integer_type_node, VCMPGTSH_P, pim_ovl_8 | pim_cr6_ge); + def_pim_builtin ("vec_any_le.5", integer_type_node, VCMPGEFP_P, pim_ovl_4f | pim_manip_swap | pim_cr6_ne); + def_pim_builtin ("vec_any_le.6", integer_type_node, VCMPGTUW_P, pim_ovl_4u_4u | pim_cr6_ge); + def_pim_builtin ("vec_any_le.7", integer_type_node, VCMPGTSW_P, pim_ovl_4 | pim_cr6_ge); + + def_pim_builtin ("vec_any_lt", integer_type_node, VCMPGTUB_P, pim_ovl_16u_16u | pim_manip_swap | pim_cr6_ne | pim_group); + def_pim_builtin ("vec_any_lt.2", integer_type_node, VCMPGTSB_P, pim_ovl_16 | pim_manip_swap | pim_cr6_ne); + def_pim_builtin ("vec_any_lt.3", integer_type_node, VCMPGTUH_P, pim_ovl_8u_8u | pim_manip_swap | pim_cr6_ne); + def_pim_builtin ("vec_any_lt.4", integer_type_node, VCMPGTSH_P, pim_ovl_8 | pim_manip_swap | pim_cr6_ne); + def_pim_builtin ("vec_any_lt.5", integer_type_node, VCMPGTFP_P, pim_ovl_4f | pim_manip_swap | pim_cr6_ne); + def_pim_builtin ("vec_any_lt.6", integer_type_node, VCMPGTUW_P, pim_ovl_4u_4u | pim_manip_swap | pim_cr6_ne); + def_pim_builtin ("vec_any_lt.7", integer_type_node, VCMPGTSW_P, pim_ovl_4 | pim_manip_swap | pim_cr6_ne); + + def_pim_builtin ("vec_any_nan", integer_type_node, VCMPEQFP_P, pim_manip_dup | pim_cr6_ge | pim_group); + + def_pim_builtin ("vec_any_ne", integer_type_node, VCMPEQUB_P, pim_ovl_16 | pim_cr6_ge | pim_group); + def_pim_builtin ("vec_any_ne.2", integer_type_node, VCMPEQUH_P, pim_ovl_8 | pim_cr6_ge); + def_pim_builtin ("vec_any_ne.3", integer_type_node, VCMPEQFP_P, pim_ovl_4f | pim_cr6_ge); + def_pim_builtin ("vec_any_ne.4", integer_type_node, VCMPEQUW_P, pim_ovl_4 | pim_cr6_ge); + + def_pim_builtin ("vec_any_nge", integer_type_node, VCMPGEFP_P, pim_cr6_ge | pim_group); + + def_pim_builtin ("vec_any_ngt", integer_type_node, VCMPGTFP_P, pim_cr6_ge | pim_group); + + def_pim_builtin ("vec_any_nle", integer_type_node, VCMPGEFP_P, pim_manip_swap | pim_cr6_ge | pim_group); + + def_pim_builtin ("vec_any_nlt", integer_type_node, VCMPGEFP_P, pim_manip_swap | pim_cr6_ge | pim_group); + + def_pim_builtin ("vec_any_numeric", integer_type_node, VCMPEQFP_P, pim_manip_dup | pim_cr6_ne | pim_group); + + def_pim_builtin ("vec_any_out", integer_type_node, VCMPBFP_P, pim_cr6_ne | pim_group); + + gcc_assert (pim_code == ALTIVEC_PIM__LAST + 1); + } + /* APPLE LOCAL end AltiVec */ } static void @@ -8925,6 +10703,8 @@ expand_block_clear (rtx operands[]) int offset; int clear_bytes; int clear_step; + /* APPLE LOCAL Altivec 3840704 */ + bool cpu_altivec = TARGET_ALTIVEC && ! flag_disable_opts_for_faltivec; /* If this is not a fixed size move, just call memcpy */ if (! constp) @@ -8940,11 +10720,23 @@ expand_block_clear (rtx operands[]) if (bytes <= 0) return 1; + /* APPLE LOCAL begin Altivec 3840704 */ + { + static bool warned; + if (flag_disable_opts_for_faltivec && align >= 128 && ! warned) + { + warned = true; + warning ("vectorised memset disabled due to use of -faltivec without -maltivec"); + } + } + /* APPLE LOCAL end Altivec 3840704 */ + /* Use the builtin memset after a point, to avoid huge code bloat. When optimize_size, avoid any significant code bloat; calling memset is about 4 instructions, so allow for one instruction to load zero and three to do clearing. */ - if (TARGET_ALTIVEC && align >= 128) + /* APPLE LOCAL Altivec 3840704 */ + if (cpu_altivec && align >= 128) clear_step = 16; else if (TARGET_POWERPC64 && align >= 32) clear_step = 8; @@ -8961,7 +10753,8 @@ expand_block_clear (rtx operands[]) enum machine_mode mode = BLKmode; rtx dest; - if (bytes >= 16 && TARGET_ALTIVEC && align >= 128) + /* APPLE LOCAL Altivec 3840704 */ + if (bytes >= 16 && cpu_altivec && align >= 128) { clear_bytes = 16; mode = V4SImode; @@ -9052,9 +10845,24 @@ expand_block_move (rtx operands[]) enum machine_mode mode = BLKmode; rtx src, dest; + /* APPLE LOCAL begin Altivec 3840704 */ + { + static bool warned; + if (flag_disable_opts_for_faltivec && bytes >= 16 && align >= 128 + && ! warned) + { + warned = true; + warning ("vectorised memcpy disabled due to use of -faltivec without -maltivec"); + } + } + /* APPLE LOCAL end Altivec 3840704 */ + /* Altivec first, since it will be faster than a string move when it applies, and usually not significantly larger. */ - if (TARGET_ALTIVEC && bytes >= 16 && align >= 128) + /* APPLE LOCAL begin Altivec 3840704 */ + if (TARGET_ALTIVEC && ! flag_disable_opts_for_faltivec + && bytes >= 16 && align >= 128) + /* APPLE LOCAL end Altivec 3840704 */ { move_bytes = 16; mode = V4SImode; @@ -10315,7 +12123,11 @@ rs6000_got_register (rtx value ATTRIBUTE_UNUSED) static struct machine_function * rs6000_init_machine_status (void) { - return ggc_alloc_cleared (sizeof (machine_function)); + /* APPLE LOCAL begin volatile pic base reg in leaves */ + machine_function *mf = (machine_function *) ggc_alloc_cleared (sizeof (machine_function)); + mf->substitute_pic_base_reg = INVALID_REGNUM; + return mf; + /* APPLE LOCAL end volatile pic base reg in leaves */ } /* These macros test for integers and extract the low-order bits. */ @@ -10535,8 +12347,7 @@ print_operand (FILE *file, rtx x, int code) /* Bit 1 is EQ bit. */ i = 4 * (REGNO (x) - CR0_REGNO) + 2; - /* If we want bit 31, write a shift count of zero, not 32. */ - fprintf (file, "%d", i == 31 ? 0 : i + 1); + fprintf (file, "%d", i); return; case 'E': @@ -11341,7 +13152,7 @@ rs6000_generate_compare (enum rtx_code code) if ((TARGET_E500 && !TARGET_FPRS && TARGET_HARD_FLOAT) && rs6000_compare_fp_p) { - rtx cmp, or1, or2, or_result, compare_result2; + rtx cmp, or_result, compare_result2; enum machine_mode op_mode = GET_MODE (rs6000_compare_op0); if (op_mode == VOIDmode) @@ -11415,9 +13226,6 @@ rs6000_generate_compare (enum rtx_code code) default: abort (); } - or1 = gen_reg_rtx (SImode); - or2 = gen_reg_rtx (SImode); - or_result = gen_reg_rtx (CCEQmode); compare_result2 = gen_reg_rtx (CCFPmode); /* Do the EQ. */ @@ -11436,14 +13244,10 @@ rs6000_generate_compare (enum rtx_code code) else abort (); emit_insn (cmp); - or1 = gen_rtx_GT (SImode, compare_result, const0_rtx); - or2 = gen_rtx_GT (SImode, compare_result2, const0_rtx); - /* OR them together. */ - cmp = gen_rtx_SET (VOIDmode, or_result, - gen_rtx_COMPARE (CCEQmode, - gen_rtx_IOR (SImode, or1, or2), - const_true_rtx)); + or_result = gen_reg_rtx (CCFPmode); + cmp = gen_e500_cr_ior_compare (or_result, compare_result, + compare_result2); compare_result = or_result; code = EQ; } @@ -11553,9 +13357,9 @@ rs6000_emit_sCOND (enum rtx_code code, rtx result) abort (); if (cond_code == NE) - emit_insn (gen_e500_flip_eq_bit (t, t)); + emit_insn (gen_e500_flip_gt_bit (t, t)); - emit_insn (gen_move_from_CR_eq_bit (result, t)); + emit_insn (gen_move_from_CR_gt_bit (result, t)); return; } @@ -11736,9 +13540,9 @@ output_cbranch (rtx op, const char *label, int reversed, rtx insn) return string; } -/* Return the string to flip the EQ bit on a CR. */ +/* Return the string to flip the GT bit on a CR. */ char * -output_e500_flip_eq_bit (rtx dst, rtx src) +output_e500_flip_gt_bit (rtx dst, rtx src) { static char string[64]; int a, b; @@ -11747,9 +13551,9 @@ output_e500_flip_eq_bit (rtx dst, rtx src) || GET_CODE (src) != REG || ! CR_REGNO_P (REGNO (src))) abort (); - /* EQ bit. */ - a = 4 * (REGNO (dst) - CR0_REGNO) + 2; - b = 4 * (REGNO (src) - CR0_REGNO) + 2; + /* GT bit. */ + a = 4 * (REGNO (dst) - CR0_REGNO) + 1; + b = 4 * (REGNO (src) - CR0_REGNO) + 1; sprintf (string, "crnot %d,%d", a, b); return string; @@ -11988,10 +13792,14 @@ rs6000_emit_vector_select (rtx dest, rtx op1, rtx op2, rtx mask) temp = gen_reg_rtx (dest_mode); + /* APPLE LOCAL begin AV */ + /* Fix op1 and op2 order. */ t = gen_rtx_fmt_ee (SET, VOIDmode, temp, gen_rtx_fmt_Ei (UNSPEC, dest_mode, - gen_rtvec (3, op1, op2, mask), + gen_rtvec (3, op2, op1, mask), vsel_insn_index)); + /* APPLE LOCAL end AV */ + emit_insn (t); emit_move_insn (dest, temp); return; @@ -12266,6 +14074,10 @@ rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1) else target = emit_conditional_move (dest, c, op0, op1, mode, op1, op0, mode, 0); + /* APPLE LOCAL begin pragma fenv 3943021 */ + if (flag_trapping_math && target == NULL_RTX) + return; + /* APPLE LOCAL end pragma fenv 3943021 */ if (target == NULL_RTX) abort (); if (target != dest) @@ -12444,6 +14256,8 @@ first_reg_to_save (void) #if TARGET_MACHO if (flag_pic && current_function_uses_pic_offset_table + /* APPLE LOCAL volatile pic base reg in leaves */ + && cfun->machine->substitute_pic_base_reg == INVALID_REGNUM && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM) return RS6000_PIC_OFFSET_TABLE_REGNUM; #endif @@ -12755,9 +14569,8 @@ rs6000_stack_info (void) && !FP_SAVE_INLINE (info_ptr->first_fp_reg_save)) || info_ptr->first_altivec_reg_save <= LAST_ALTIVEC_REGNO || (DEFAULT_ABI == ABI_V4 && current_function_calls_alloca) - || (DEFAULT_ABI == ABI_DARWIN - && flag_pic - && current_function_uses_pic_offset_table) + /* APPLE LOCAL mainline */ + /* Test for flag_pic, abi, current_function deleted deliberately. */ || info_ptr->calls_p) { info_ptr->lr_save_p = 1; @@ -12916,7 +14729,8 @@ rs6000_stack_info (void) + ehrd_size + info_ptr->cr_size + info_ptr->lr_size - + info_ptr->vrsave_size + /* APPLE LOCAL fix redundant add? */ + /* FIXME: the FSF does "+ info_ptr->vrsave_size" here, shouldn't we? */ + info_ptr->toc_size, save_align); @@ -13203,16 +15017,33 @@ rs6000_return_addr (int count, rtx frame) } /* Say whether a function is a candidate for sibcall handling or not. - We do not allow indirect calls to be optimized into sibling calls. + APPLE LOCAL sibling calls + Also, we can't do it if there are any vector parameters; there's nowhere to put the VRsave code so it works; note that functions with vector parameters are required to have a prototype, so the argument type info must be available here. (The tail recursion case can work with vector parameters, but there's no way to distinguish here.) */ + +/* APPLE LOCAL begin sibling calls + On Darwin only, indirect calls may be sibcalls. This is enabled + primarily by target-specific logic in calls.c. + APPLE LOCAL end sibling calls */ static bool rs6000_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) { tree type; + /* APPLE LOCAL begin long-branch */ + if (TARGET_LONG_BRANCH) + return 0; + /* APPLE LOCAL end long-branch */ + + /* APPLE LOCAL begin indirect sibcalls */ + /* This goes with a lot of local changes in expand_call. */ + if (DEFAULT_ABI == ABI_DARWIN && !decl) + return true; + /* APPLE LOCAL end indirect sibcalls */ + if (decl) { if (TARGET_ALTIVEC_VRSAVE) @@ -13325,11 +15156,10 @@ rs6000_emit_load_toc_table (int fromprolog) rtx temp0 = (fromprolog ? gen_rtx_REG (Pmode, 0) : gen_reg_rtx (Pmode)); - rtx symF; if (fromprolog) { - rtx symL; + rtx symF, symL; ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); @@ -13347,14 +15177,9 @@ rs6000_emit_load_toc_table (int fromprolog) else { rtx tocsym; - static int reload_toc_labelno = 0; tocsym = gen_rtx_SYMBOL_REF (Pmode, toc_label_name); - - ASM_GENERATE_INTERNAL_LABEL (buf, "LCG", reload_toc_labelno++); - symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); - - emit_insn (gen_load_toc_v4_PIC_1b (tempLR, symF, tocsym)); + emit_insn (gen_load_toc_v4_PIC_1b (tempLR, tocsym)); emit_move_insn (dest, tempLR); emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest)); } @@ -13827,6 +15652,126 @@ generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep) return insn; } +/* APPLE LOCAL begin special ObjC method use of R12 */ +/* Determine whether a name is an ObjC method. */ + +static int name_encodes_objc_method_p (const char *piclabel_name) +{ + return (piclabel_name[0] == '+' || piclabel_name[0] == '-'); +} +/* APPLE LOCAL end special ObjC method use of R12 */ + +/* APPLE LOCAL begin recompute PIC register use */ +/* Sometimes a function has references that require the PIC register, + but optimization removes them all. To catch this case + recompute current_function_uses_pic_offset_table here. + This may allow us to eliminate the prologue and epilogue. */ + +static int +recompute_PIC_register_use (void) +{ + if (DEFAULT_ABI == ABI_DARWIN + && flag_pic && current_function_uses_pic_offset_table + && !cfun->machine->ra_needs_full_frame) + { + rtx insn; + current_function_uses_pic_offset_table = 0; + push_topmost_sequence (); + for (insn = get_insns (); insn != NULL; insn = NEXT_INSN (insn)) + if ( reg_mentioned_p (pic_offset_table_rtx, insn)) + { + current_function_uses_pic_offset_table = 1; + break; + } + pop_topmost_sequence (); + } + return 0; +} +/* APPLE LOCAL end recompute PIC register use */ + +/* APPLE LOCAL begin volatile pic base reg in leaves */ +/* If this is a leaf function and we used any pic-based references, + see if there is an unused volatile reg we can use instead of R31. + If so set substitute_pic_base_reg to this reg, set its reg_ever_used + bit (to avoid confusing later calls to alloc_volatile_reg), and + make a pass through the existing RTL, substituting the new reg for + the old one wherever it appears. + Logically this is a void function; it is int so it can be used to + initialize a dummy variable, thus getting executed ahead of other + initializations. Technicolour yawn. */ + +/* ALLOC_VOLATILE_REG allocates a volatile register AFTER all gcc + register allocations have been done; we use it to reserve an + unused reg for holding VRsave. Returns -1 in case of failure (all + volatile regs are in use.) */ +/* Note, this is called from both the prologue and epilogue code, + with the assumption that it will return the same result both + times! Since the register arrays are not changed in between + this is valid, if a bit fragile. */ +/* In future we may also use this to grab an unused volatile reg to + hold the PIC base reg in the event that the current function makes + no procedure calls; this was done in 2.95. */ +static int +alloc_volatile_reg (void) +{ + if (current_function_is_leaf + && reload_completed + && !cfun->machine->ra_needs_full_frame) + { + int r; + for (r = 10; r >= 2; --r) + if (! fixed_regs[r] && ! regs_ever_live[r]) + return r; + } + + return -1; /* fail */ +} + +static int +try_leaf_pic_optimization (void) +{ + if ( DEFAULT_ABI==ABI_DARWIN + && flag_pic && current_function_uses_pic_offset_table + && current_function_is_leaf + && !cfun->machine->ra_needs_full_frame ) + { + int reg = alloc_volatile_reg (); + if ( reg != -1 ) + { + /* Run through the insns, changing references to the original + PIC_OFFSET_TABLE_REGNUM to our new one. */ + rtx insn; + const int nregs = PIC_OFFSET_TABLE_REGNUM + 1; + rtx *reg_map = (rtx *) xmalloc (nregs * sizeof (rtx)); + memset (reg_map, 0, nregs * sizeof (rtx)); + reg_map[PIC_OFFSET_TABLE_REGNUM] = gen_rtx_REG (SImode, reg); + + push_topmost_sequence (); + for (insn = get_insns (); insn != NULL; insn = NEXT_INSN (insn)) + { + if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN) + { + replace_regs (PATTERN (insn), reg_map, nregs, 1); + replace_regs (REG_NOTES (insn), reg_map, nregs, 1); + } + else if (GET_CODE (insn) == CALL_INSN) + { + if ( !SIBLING_CALL_P (insn)) + abort (); + } + } + pop_topmost_sequence (); + free (reg_map); + + regs_ever_live[reg] = 1; + regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 0; + cfun->machine->substitute_pic_base_reg = reg; + } + } + return 0; +} +/* APPLE LOCAL end volatile pic base reg in leaves */ + /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes. Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */ @@ -13893,6 +15838,20 @@ gen_frame_mem_offset (enum machine_mode mode, rtx reg, int offset) return gen_rtx_MEM (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx)); } +/* APPLE LOCAL begin mainline */ +/* Look for user-defined global regs. We should not save and restore these, + and cannot use stmw/lmw if there are any in its range. */ + +static bool +no_global_regs_above (int first_greg) +{ + int i; + for (i = 0; i < 32 - first_greg; i++) + if (global_regs[first_greg + i]) + return false; + return true; +} +/* APPLE LOCAL end mainline */ #ifndef TARGET_FIX_AND_CONTINUE #define TARGET_FIX_AND_CONTINUE 0 #endif @@ -13902,6 +15861,10 @@ gen_frame_mem_offset (enum machine_mode mode, rtx reg, int offset) void rs6000_emit_prologue (void) { + /* APPLE LOCAL recompute PIC register use */ + int dummy ATTRIBUTE_UNUSED = recompute_PIC_register_use (); + /* APPLE LOCAL volatile pic base reg in leaves */ + int ignored ATTRIBUTE_UNUSED = try_leaf_pic_optimization (); rs6000_stack_t *info = rs6000_stack_info (); enum machine_mode reg_mode = Pmode; int reg_size = TARGET_32BIT ? 4 : 8; @@ -13913,11 +15876,50 @@ rs6000_emit_prologue (void) int saving_FPRs_inline; int using_store_multiple; HOST_WIDE_INT sp_offset = 0; + /* APPLE LOCAL begin callers_lr_already_saved */ + int callers_lr_already_saved = 0; +#if TARGET_MACHO + int lr_already_set_up_for_pic = 0; +#endif + /* APPLE LOCAL end callers_lr_already_saved */ + /* APPLE LOCAL special ObjC method use of R12 */ + int objc_method_using_pic = 0; + + /* APPLE LOCAL begin special ObjC method use of R12 */ +#if TARGET_MACHO + if (DEFAULT_ABI == ABI_DARWIN + && current_function_uses_pic_offset_table && flag_pic + && current_function_decl + && DECL_ASSEMBLER_NAME_SET_P (current_function_decl)) + { + /* At -O0, this will not be set yet, so we won't do this opt. */ + const char *piclabel_name + = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl)); + + if (name_encodes_objc_method_p (piclabel_name) + /* If we're saving vector or FP regs via a function call, + then don't bother with this ObjC R12 optimization. + This test also eliminates world_save. */ + && (info->first_altivec_reg_save > LAST_ALTIVEC_REGNO + || VECTOR_SAVE_INLINE (info->first_altivec_reg_save)) + && (info->first_fp_reg_save == 64 + || FP_SAVE_INLINE (info->first_fp_reg_save))) + { + rtx lr = gen_rtx_REG (Pmode, LINK_REGISTER_REGNUM); + rtx src = machopic_function_base_sym (); + objc_method_using_pic = 1; + rs6000_maybe_dead (emit_insn (gen_load_macho_picbase_label (lr, + src))); + } + } +#endif /* TARGET_MACHO */ + /* APPLE LOCAL end special ObjC method use of R12 */ if (TARGET_FIX_AND_CONTINUE) { + /* APPLE LOCAL begin mainline 5 nops */ /* gdb on darwin arranges to forward a function from the old - address by modifying the first 4 instructions of the function + address by modifying the first 5 instructions of the function to branch to the overriding function. This is necessary to permit function pointers that point to the old function to actually forward to the new function. */ @@ -13925,6 +15927,8 @@ rs6000_emit_prologue (void) emit_insn (gen_nop ()); emit_insn (gen_nop ()); emit_insn (gen_nop ()); + emit_insn (gen_nop ()); + /* APPLE LOCAL end mainline 5 nops */ } if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0) @@ -13936,7 +15940,10 @@ rs6000_emit_prologue (void) using_store_multiple = (TARGET_MULTIPLE && ! TARGET_POWERPC64 && (!TARGET_SPE_ABI || info->spe_64bit_regs_used == 0) - && info->first_gp_reg_save < 31); +/* APPLE LOCAL begin mainline */ + && info->first_gp_reg_save < 31 + && no_global_regs_above (info->first_gp_reg_save)); +/* APPLE LOCAL end mainline */ saving_FPRs_inline = (info->first_fp_reg_save == 64 || FP_SAVE_INLINE (info->first_fp_reg_save) || current_function_calls_eh_return @@ -14123,8 +16130,9 @@ rs6000_emit_prologue (void) used in this function, and do the corresponding magic in the epilogue. */ + /* APPLE LOCAL begin mainline radar 4105210 */ if (TARGET_ALTIVEC && TARGET_ALTIVEC_VRSAVE - && !WORLD_SAVE_P (info) && info->vrsave_mask != 0) + && info->vrsave_mask != 0) { rtx reg, mem, vrsave; int offset; @@ -14139,19 +16147,23 @@ rs6000_emit_prologue (void) else emit_insn (gen_rtx_SET (VOIDmode, reg, vrsave)); - /* Save VRSAVE. */ - offset = info->vrsave_save_offset + sp_offset; - mem - = gen_rtx_MEM (SImode, - gen_rtx_PLUS (Pmode, frame_reg_rtx, GEN_INT (offset))); - set_mem_alias_set (mem, rs6000_sr_alias_set); - insn = emit_move_insn (mem, reg); + if (!WORLD_SAVE_P (info)) + { + /* Save VRSAVE. */ + offset = info->vrsave_save_offset + sp_offset; + mem + = gen_rtx_MEM (SImode, + gen_rtx_PLUS (Pmode, frame_reg_rtx, GEN_INT (offset))); + set_mem_alias_set (mem, rs6000_sr_alias_set); + insn = emit_move_insn (mem, reg); + } /* Include the registers in the mask. */ emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask))); insn = emit_insn (generate_set_vrsave (reg, info, 0)); } + /* APPLE LOCAL end mainline radar 4105210 */ /* If we use the link register, get it into r0. */ if (!WORLD_SAVE_P (info) && info->lr_save_p) @@ -14166,7 +16178,12 @@ rs6000_emit_prologue (void) { rtx set; - cr_save_rtx = gen_rtx_REG (SImode, 12); + /* APPLE LOCAL begin special ObjC method use of R12 */ + /* For Darwin, use R2, so we don't clobber the special ObjC + method use of R12. R11 has a special meaning for Ada, so we + can't use that. */ + cr_save_rtx = gen_rtx_REG (SImode, DEFAULT_ABI == ABI_DARWIN ? 2 : 12); + /* APPLE LOCAL end special ObjC method use of R12 */ insn = emit_insn (gen_movesi_from_cr (cr_save_rtx)); RTX_FRAME_RELATED_P (insn) = 1; /* Now, there's no way that dwarf2out_frame_debug_expr is going @@ -14201,17 +16218,62 @@ rs6000_emit_prologue (void) char rname[30]; const char *alloc_rname; rtvec p; - p = rtvec_alloc (2 + 64 - info->first_fp_reg_save); + /* APPLE LOCAL begin reduce code size */ + + int gen_following_label = 0; + int count = 0; + + if (current_function_uses_pic_offset_table && flag_pic +#ifdef INSN_SCHEDULING + /* Prevent the compiler from crashing + while scheduling insns after global_alloc! */ + && (optimize == 0 || !flag_schedule_insns_after_reload) +#endif + /* If this is the last CALL in the prolog, then we've got our PC. + If we're saving AltiVec regs via a function, we're not last. */ + && (info->first_altivec_reg_save > LAST_ALTIVEC_REGNO + || VECTOR_SAVE_INLINE (info->first_altivec_reg_save))) + gen_following_label = lr_already_set_up_for_pic = 1; + /* APPLE LOCAL end reduce code size */ + + /* APPLE LOCAL begin +2 (could be conditionalized) */ + p = rtvec_alloc (2 + 64 - info->first_fp_reg_save + 2 + + gen_following_label); + /* APPLE LOCAL end +2 (could be conditionalized) */ + + /* APPLE LOCAL begin reduce code size */ + /* 0 -> count++ */ + RTVEC_ELT (p, count++) = gen_rtx_CLOBBER (VOIDmode, + gen_rtx_REG (Pmode, + LINK_REGISTER_REGNUM)); +#if TARGET_MACHO + /* We have to calculate the offset into saveFP to where we must + call (!!) SAVEFP also saves the caller's LR -- placed into + R0 above -- into 8(R1). SAVEFP/RESTOREFP should never be + called to save or restore only F31. */ + + if (info->lr_save_offset != (POINTER_SIZE / 4) || info->first_fp_reg_save == 63) + abort (); - RTVEC_ELT (p, 0) = gen_rtx_CLOBBER (VOIDmode, - gen_rtx_REG (Pmode, - LINK_REGISTER_REGNUM)); + sprintf (rname, "*saveFP%s%.0d ; save f%d-f31", + (info->first_fp_reg_save - 32 == 14 ? "" : "+"), + (info->first_fp_reg_save - 46) * 4, + info->first_fp_reg_save - 32); +#else + /* APPLE LOCAL end reduce code size */ sprintf (rname, "%s%d%s", SAVE_FP_PREFIX, info->first_fp_reg_save - 32, SAVE_FP_SUFFIX); + /* APPLE LOCAL reduce code size */ +#endif /* TARGET_MACHO */ alloc_rname = ggc_strdup (rname); - RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, + /* APPLE LOCAL reduce code size */ + RTVEC_ELT (p, count++) = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname)); + /* APPLE LOCAL begin reduce code size */ + if (gen_following_label) + RTVEC_ELT (p, count++) = gen_rtx_USE (VOIDmode, const0_rtx); + /* APPLE LOCAL end reduce code size */ for (i = 0; i < 64 - info->first_fp_reg_save; i++) { rtx addr, reg, mem; @@ -14222,11 +16284,32 @@ rs6000_emit_prologue (void) mem = gen_rtx_MEM (DFmode, addr); set_mem_alias_set (mem, rs6000_sr_alias_set); - RTVEC_ELT (p, i + 2) = gen_rtx_SET (VOIDmode, mem, reg); + /* APPLE LOCAL reduce code size */ + RTVEC_ELT (p, count++) = gen_rtx_SET (VOIDmode, mem, reg); } + /* APPLE LOCAL begin C++ EH and setjmp (radar 2866661) */ +#if TARGET_MACHO + /* Darwin version of these functions stores R0. */ + RTVEC_ELT (p, count++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0)); + + /* If we saved LR, *tell* people about it! */ + if (info->lr_save_p) + { + rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, + GEN_INT (info->lr_save_offset + sp_offset)); + rtx mem = gen_rtx_MEM (Pmode, addr); + /* This should not be of rs6000_sr_alias_set, because of + __builtin_return_address. */ + RTVEC_ELT (p, count++) = gen_rtx_SET (Pmode, mem, + gen_rtx_REG (Pmode, LINK_REGISTER_REGNUM)); + } +#endif + /* APPLE LOCAL end C++ EH and setjmp (radar 2866661) */ insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); rs6000_frame_related (insn, frame_ptr_rtx, info->total_size, NULL_RTX, NULL_RTX); + /* APPLE LOCAL callers_lr_already_saved */ + callers_lr_already_saved = 1; } /* Save GPRs. This is done as a PARALLEL if we are using @@ -14257,14 +16340,22 @@ rs6000_emit_prologue (void) { int i; for (i = 0; i < 32 - info->first_gp_reg_save; i++) - if ((regs_ever_live[info->first_gp_reg_save+i] - && (! call_used_regs[info->first_gp_reg_save+i] - || (i+info->first_gp_reg_save +/* APPLE LOCAL begin mainline cosmetics */ + if ((regs_ever_live[info->first_gp_reg_save + i] + && (!call_used_regs[info->first_gp_reg_save + i] + || (i + info->first_gp_reg_save == RS6000_PIC_OFFSET_TABLE_REGNUM && TARGET_TOC && TARGET_MINIMAL_TOC))) - || (i+info->first_gp_reg_save == RS6000_PIC_OFFSET_TABLE_REGNUM + || (i + info->first_gp_reg_save == RS6000_PIC_OFFSET_TABLE_REGNUM +/* APPLE LOCAL end mainline cosmetics */ && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0) - || (DEFAULT_ABI == ABI_DARWIN && flag_pic)))) + /* APPLE LOCAL begin volatile pic base reg in leaves */ + || (DEFAULT_ABI == ABI_DARWIN && flag_pic + && ((current_function_uses_pic_offset_table + && cfun->machine->substitute_pic_base_reg + == INVALID_REGNUM) + || cfun->machine->ra_needs_full_frame))))) + /* APPLE LOCAL end volatile pic base reg in leaves */ { rtx addr, reg, mem; reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i); @@ -14346,8 +16437,20 @@ rs6000_emit_prologue (void) } } + /* APPLE LOCAL begin special ObjC method use of R12 */ + if (objc_method_using_pic) + rs6000_maybe_dead ( + emit_move_insn (gen_rtx_REG (Pmode, + cfun->machine->substitute_pic_base_reg + == INVALID_REGNUM + ? PIC_OFFSET_TABLE_REGNUM + : cfun->machine->substitute_pic_base_reg), + gen_rtx_REG (Pmode, 12))); + /* APPLE LOCAL end special ObjC method use of R12 */ + /* Save lr if we used it. */ - if (!WORLD_SAVE_P (info) && info->lr_save_p) + /* APPLE LOCAL callers_lr_already_saved */ + if (!WORLD_SAVE_P (info) && info->lr_save_p && !callers_lr_already_saved) { rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, GEN_INT (info->lr_save_offset + sp_offset)); @@ -14441,17 +16544,37 @@ rs6000_emit_prologue (void) #if TARGET_MACHO if (DEFAULT_ABI == ABI_DARWIN + /* APPLE LOCAL special ObjC method use of R12 */ + && !objc_method_using_pic && flag_pic && current_function_uses_pic_offset_table) { rtx lr = gen_rtx_REG (Pmode, LINK_REGISTER_REGNUM); rtx src = machopic_function_base_sym (); - rs6000_maybe_dead (emit_insn (gen_load_macho_picbase (lr, src))); + /* APPLE LOCAL begin save and restore LR mainline */ + /* Save and restore LR locally around this call (in R0). */ + if (!info->lr_save_p) + rs6000_maybe_dead (emit_move_insn (gen_rtx_REG (Pmode, 0), lr)); + /* APPLE LOCAL end save and restore LR mainline */ + + /* APPLE LOCAL begin performance enhancement */ + if (!lr_already_set_up_for_pic) + rs6000_maybe_dead (emit_insn (gen_load_macho_picbase (lr, src))); + /* APPLE LOCAL end performance enhancement */ + /* APPLE LOCAL begin volatile pic base reg in leaves */ insn = emit_move_insn (gen_rtx_REG (Pmode, - RS6000_PIC_OFFSET_TABLE_REGNUM), + (cfun->machine->substitute_pic_base_reg + == INVALID_REGNUM) + ? RS6000_PIC_OFFSET_TABLE_REGNUM + : cfun->machine->substitute_pic_base_reg), lr); rs6000_maybe_dead (insn); + /* APPLE LOCAL end volatile pic base reg in leaves */ + /* APPLE LOCAL begin save and restore LR mainline */ + if (!info->lr_save_p) + rs6000_maybe_dead (emit_move_insn (lr, gen_rtx_REG (Pmode, 0))); + /* APPLE LOCAL end save and restore LR mainline */ } #endif } @@ -14467,6 +16590,8 @@ rs6000_output_function_prologue (FILE *file, if (TARGET_DEBUG_STACK) debug_stack_info (info); + /* APPLE LOCAL do not extern fp save/restore */ +#if !TARGET_MACHO /* Write .extern for any function we will call to save and restore fp values. */ if (info->first_fp_reg_save < 64 @@ -14475,6 +16600,8 @@ rs6000_output_function_prologue (FILE *file, SAVE_FP_PREFIX, info->first_fp_reg_save - 32, SAVE_FP_SUFFIX, RESTORE_FP_PREFIX, info->first_fp_reg_save - 32, RESTORE_FP_SUFFIX); + /* APPLE LOCAL do not extern fp save/restore */ +#endif /* !TARGET_MACHO */ /* Write .extern for AIX common mode routines, if needed. */ if (! TARGET_POWER && ! TARGET_POWERPC && ! common_mode_defined) @@ -14551,7 +16678,10 @@ rs6000_emit_epilogue (int sibcall) using_load_multiple = (TARGET_MULTIPLE && ! TARGET_POWERPC64 && (!TARGET_SPE_ABI || info->spe_64bit_regs_used == 0) - && info->first_gp_reg_save < 31); +/* APPLE LOCAL begin mainline */ + && info->first_gp_reg_save < 31 + && no_global_regs_above (info->first_gp_reg_save)); +/* APPLE LOCAL end mainline */ restoring_FPRs_inline = (sibcall || current_function_calls_eh_return || info->first_fp_reg_save == 64 @@ -14562,6 +16692,8 @@ rs6000_emit_epilogue (int sibcall) using_mfcr_multiple = (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603 || rs6000_cpu == PROCESSOR_PPC750 + /* APPLE LOCAL ? */ + || rs6000_cpu == PROCESSOR_PPC7400 || optimize_size); if (WORLD_SAVE_P (info)) @@ -14749,7 +16881,10 @@ rs6000_emit_epilogue (int sibcall) set_mem_alias_set (mem, rs6000_sr_alias_set); - emit_move_insn (gen_rtx_REG (SImode, 12), mem); + /* APPLE LOCAL begin use R11 because of ObjC use of R12 in sibcall to CTR */ + emit_move_insn (gen_rtx_REG (SImode, + DEFAULT_ABI == ABI_DARWIN ? 11 : 12), mem); + /* APPLE LOCAL end use R11 because of ObjC use of R12 in sibcall to CTR */ } /* Set LR here to try to overlap restores below. */ @@ -14815,13 +16950,22 @@ rs6000_emit_epilogue (int sibcall) } else for (i = 0; i < 32 - info->first_gp_reg_save; i++) - if ((regs_ever_live[info->first_gp_reg_save+i] - && (! call_used_regs[info->first_gp_reg_save+i] - || (i+info->first_gp_reg_save == RS6000_PIC_OFFSET_TABLE_REGNUM +/* APPLE LOCAL begin mainline cosmetics */ + if ((regs_ever_live[info->first_gp_reg_save + i] + && (!call_used_regs[info->first_gp_reg_save + i] + || (i + info->first_gp_reg_save == RS6000_PIC_OFFSET_TABLE_REGNUM && TARGET_TOC && TARGET_MINIMAL_TOC))) - || (i+info->first_gp_reg_save == RS6000_PIC_OFFSET_TABLE_REGNUM + || (i + info->first_gp_reg_save == RS6000_PIC_OFFSET_TABLE_REGNUM +/* APPLE LOCAL end mainline cosmetics */ && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0) - || (DEFAULT_ABI == ABI_DARWIN && flag_pic)))) + /* APPLE LOCAL begin darwin native */ + || (DEFAULT_ABI == ABI_DARWIN && flag_pic + && ((current_function_uses_pic_offset_table + && cfun->machine->substitute_pic_base_reg + == INVALID_REGNUM) + || cfun->machine->ra_needs_full_frame))))) + + /* APPLE LOCAL end darwin native */ { rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, GEN_INT (info->gp_save_offset @@ -14875,7 +17019,9 @@ rs6000_emit_epilogue (int sibcall) /* If we saved cr, restore it here. Just those that were used. */ if (info->cr_save_p) { - rtx r12_rtx = gen_rtx_REG (SImode, 12); + /* APPLE LOCAL use R11 because of ObjC use of R12 in sibcall to CTR */ + /* APPLE LOCAL silly name retained to minimize deviation from FSF */ + rtx r12_rtx = gen_rtx_REG (SImode, DEFAULT_ABI == ABI_DARWIN ? 11 : 12); int count = 0; if (using_mfcr_multiple) @@ -14975,8 +17121,25 @@ rs6000_emit_epilogue (int sibcall) char rname[30]; const char *alloc_rname; + /* APPLE LOCAL begin Reduce code size / improve performance */ +#if TARGET_MACHO + /* We have to calculate the offset into RESTFP to where we must + call (!!) RESTFP also restores the caller's LR from 8(R1). + RESTFP should *never* be called to restore only F31. */ + + if (info->lr_save_offset != (POINTER_SIZE / 4) || info->first_fp_reg_save == 63) + abort (); + + sprintf (rname, "*restFP%s%.0d ; restore f%d-f31", + (info->first_fp_reg_save - 32 == 14 ? "" : "+"), + (info->first_fp_reg_save - 46) * 4, + info->first_fp_reg_save - 32); +#else + /* APPLE LOCAL end Reduce code size / improve performance */ sprintf (rname, "%s%d%s", RESTORE_FP_PREFIX, info->first_fp_reg_save - 32, RESTORE_FP_SUFFIX); + /* APPLE LOCAL Reduce code size / improve performance */ +#endif /* TARGET_MACHO */ alloc_rname = ggc_strdup (rname); RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, @@ -16585,8 +18748,29 @@ rs6000_is_costly_dependence (rtx insn, rtx next, rtx link, int cost, && (!link || (int) REG_NOTE_KIND (link) == 0)) /* Prevent load after store in the same group if it is a true dependence. */ - return true; - + /* APPLE LOCAL begin nop on true-dependence. */ + { + if (GET_CODE (PATTERN (next)) == SET && GET_CODE (PATTERN (insn)) == SET) + { + rtx load_mem = SET_SRC (PATTERN (next)); + rtx sto_mem = SET_DEST (PATTERN (insn)); + if (GET_CODE (load_mem) == ZERO_EXTEND + || GET_CODE (load_mem) == SIGN_EXTEND) + load_mem = XEXP (load_mem, 0); + if (GET_CODE (sto_mem) == ZERO_EXTEND + || GET_CODE (sto_mem) == SIGN_EXTEND) + load_mem = XEXP (sto_mem, 0); + if (GET_CODE (load_mem) == MEM && GET_CODE (sto_mem) == MEM) + /* Only consider those true-depenedence cases that memory conflict + can be determined. Exclude cases, where true-dependency was + decided because memory conflict could not be determined from + aliasing info. */ + return must_true_dependence (load_mem, sto_mem); + } + return true; + } + /* APPLE LOCAL end nop on true-dependence. */ + /* The flag is set to X; dependences with latency >= X are considered costly, and will not be scheduled in the same group. */ if (rs6000_sched_costly_dep <= max_dep_latency @@ -17166,6 +19350,13 @@ rs6000_handle_altivec_attribute (tree *node, switch (altivec_type) { + /* APPLE LOCAL begin AltiVec */ + case 'e': + /* Return the constituent element type. */ + result = (ALTIVEC_VECTOR_MODE (mode) ? TREE_TYPE (type) : type); + break; + /* APPLE LOCAL end AltiVec */ + case 'v': unsigned_p = TYPE_UNSIGNED (type); switch (mode) @@ -17206,8 +19397,12 @@ rs6000_handle_altivec_attribute (tree *node, default: break; } - if (result && result != type && TYPE_READONLY (type)) - result = build_qualified_type (result, TYPE_QUAL_CONST); + /* APPLE LOCAL begin AltiVec */ + /* Propagate qualifiers attached to the element type + onto the vector type. */ + if (result && result != type && TYPE_QUALS (type)) + result = build_qualified_type (result, TYPE_QUALS (type)); + /* APPLE LOCAL end AltiVec */ *no_add_attrs = true; /* No need to hang on to the attribute. */ @@ -17516,9 +19711,11 @@ macho_branch_islands (void) strcat (tmp_buf, label); strcat (tmp_buf, "_pic\n"); strcat (tmp_buf, label); - strcat (tmp_buf, "_pic:\n\tmflr r11\n"); + /* APPLE LOCAL indirect calls in R12 */ + strcat (tmp_buf, "_pic:\n\tmflr r12\n"); - strcat (tmp_buf, "\taddis r11,r11,ha16("); + /* APPLE LOCAL indirect calls in R12 */ + strcat (tmp_buf, "\taddis r12,r12,ha16("); strcat (tmp_buf, name_buf); strcat (tmp_buf, " - "); strcat (tmp_buf, label); @@ -17526,7 +19723,8 @@ macho_branch_islands (void) strcat (tmp_buf, "\tmtlr r0\n"); - strcat (tmp_buf, "\taddi r12,r11,lo16("); + /* APPLE LOCAL indirect calls in R12 */ + strcat (tmp_buf, "\taddi r12,r12,lo16("); strcat (tmp_buf, name_buf); strcat (tmp_buf, " - "); strcat (tmp_buf, label); @@ -17592,12 +19790,59 @@ output_call (rtx insn, rtx *operands, int dest_operand_number, int cookie_operand_number) { static char buf[256]; + /* APPLE LOCAL begin long-branch */ + const char *far_call_instr_str=NULL, *near_call_instr_str=NULL; + rtx pattern; + + switch (GET_CODE (insn)) + { + case CALL_INSN: + far_call_instr_str = "jbsr"; + near_call_instr_str = "bl"; + pattern = NULL_RTX; + break; + case JUMP_INSN: + far_call_instr_str = "jmp"; + near_call_instr_str = "b"; + pattern = NULL_RTX; + break; + case INSN: + pattern = PATTERN (insn); + break; + default: + gcc_unreachable (); + break; + } + /* APPLE LOCAL end long-branch */ + if (GET_CODE (operands[dest_operand_number]) == SYMBOL_REF && (INTVAL (operands[cookie_operand_number]) & CALL_LONG)) { tree labelname; tree funname = get_identifier (XSTR (operands[dest_operand_number], 0)); + /* APPLE LOCAL begin long-branch */ + /* This insn represents a prologue or epilogue. */ + if ((pattern != NULL_RTX) && GET_CODE (pattern) == PARALLEL) + { + rtx parallel_first_op = XVECEXP (pattern, 0, 0); + switch (GET_CODE (parallel_first_op)) + { + case CLOBBER: /* Prologue: a call to save_world. */ + far_call_instr_str = "jbsr"; + near_call_instr_str = "bl"; + break; + case RETURN: /* Epilogue: a call to rest_world. */ + far_call_instr_str = "jmp"; + near_call_instr_str = "b"; + break; + default: + abort(); + break; + } + } + /* APPLE LOCAL end long-branch */ + if (no_previous_def (funname)) { int line_number = 0; @@ -17607,7 +19852,13 @@ output_call (rtx insn, rtx *operands, int dest_operand_number, CODE_LABEL_NUMBER (label_rtx)); label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf; labelname = get_identifier (label_buf); - for (; insn && GET_CODE (insn) != NOTE; insn = PREV_INSN (insn)); + /* APPLE LOCAL begin 3910248, 3915171 */ + for (; + insn && (GET_CODE (insn) != NOTE + || NOTE_LINE_NUMBER (insn) < 0); + insn = PREV_INSN (insn)) + ; + /* APPLE LOCAL end 3910248, 3915171 */ if (insn) line_number = NOTE_LINE_NUMBER (insn); add_compiler_branch_island (labelname, funname, line_number); @@ -17783,6 +20034,8 @@ rs6000_darwin_file_start (void) { "power4", "ppc970", 0 }, { "G5", "ppc970", 0 }, { "7450", "ppc7450", 0 }, + /* APPLE LOCAL radar 4161346 */ + { "ppc", "ppc", MASK_PIM_ALTIVEC }, { "7400", "ppc7400", MASK_ALTIVEC }, { "G4", "ppc7400", 0 }, { "750", "ppc750", 0 }, @@ -17820,6 +20073,130 @@ rs6000_darwin_file_start (void) #endif /* TARGET_MACHO */ +/* APPLE LOCAL begin Macintosh alignment 2002-1-22 --ff */ +/* Return the alignment of a struct based on the Macintosh PowerPC + alignment rules. In general the alignment of a struct is + determined by the greatest alignment of its elements. However, the + PowerPC rules cause the alignment of a struct to peg at word + alignment except when the first field has greater than word + (32-bit) alignment, in which case the alignment is determined by + the alignment of the first field. */ + +unsigned +round_type_align (tree the_struct, unsigned computed, unsigned specified) +{ + if (TREE_CODE (the_struct) == VECTOR_TYPE + && ALTIVEC_VECTOR_MODE (TYPE_MODE (the_struct))) + { + /* All vectors are (at least) 16-byte aligned. A struct or + union with a vector element is also 16-byte aligned. */ + return MAX (RS6000_VECTOR_ALIGNMENT, MAX (computed, specified)); + } + + if (TREE_CODE (the_struct) == RECORD_TYPE + || TREE_CODE (the_struct) == UNION_TYPE + || TREE_CODE (the_struct) == QUAL_UNION_TYPE) + { + tree first_field = TYPE_FIELDS (the_struct); + + /* Skip past static fields, enums, and constant fields that are + not really a part of the record layout. */ + while ((first_field != 0) + && (TREE_CODE (first_field) != FIELD_DECL)) + first_field = TREE_CHAIN (first_field); + + if (first_field != 0) + { + /* If other-than-default alignment (which includes mac68k + mode) is in effect, then no adjustments to the alignment + should be necessary. Ditto if the struct has the + __packed__ attribute. */ + if (TYPE_PACKED (the_struct) || TARGET_ALIGN_MAC68K + || TARGET_ALIGN_NATURAL || maximum_field_alignment != 0) + /* Do nothing */ ; + else + { + /* The following code handles Macintosh PowerPC + alignment. The implementation is complicated by the + fact that BIGGEST_ALIGNMENT is 128 when AltiVec is + enabled and 32 when it is not. So when AltiVec is + not enabled, alignment is generally limited to word + alignment. Consequently, the alignment of unions has + to be recalculated if AltiVec is not enabled. + + Below we explicitly test for fields with greater than + word alignment: doubles, long longs, and structs and + arrays with greater than word alignment. */ + unsigned val; + tree field_type; + + val = MAX (computed, specified); + + if (TREE_CODE (the_struct) == UNION_TYPE && !TARGET_ALTIVEC) + { + tree field = first_field; + + while (field != 0) + { + /* Don't consider statics, enums and constant fields + which are not really a part of the record. */ + if (TREE_CODE (field) != FIELD_DECL) + { + field = TREE_CHAIN (field); + continue; + } + field_type = TREE_TYPE(field); + if (TREE_CODE (TREE_TYPE (field)) == ARRAY_TYPE) + field_type = get_inner_array_type (field); + else + field_type = TREE_TYPE (field); + val = MAX (TYPE_ALIGN (field_type), val); + if (FLOAT_TYPE_P (field_type) + && TYPE_MODE (field_type) == DFmode) + val = MAX (RS6000_DOUBLE_ALIGNMENT, val); + else if (INTEGRAL_TYPE_P (field_type) + && TYPE_MODE (field_type) == DImode) + val = MAX (RS6000_LONGLONG_ALIGNMENT, val); + field = TREE_CHAIN (field); + } + } + else + { + if (TREE_CODE (TREE_TYPE (first_field)) == ARRAY_TYPE) + field_type = get_inner_array_type (first_field); + else + field_type = TREE_TYPE (first_field); + + if (field_type == error_mark_node) + return val; + val = MAX (TYPE_ALIGN (field_type), val); + + if (FLOAT_TYPE_P (field_type) + && TYPE_MODE (field_type) == DFmode) + val = MAX (RS6000_DOUBLE_ALIGNMENT, val); + else if (INTEGRAL_TYPE_P (field_type) + && TYPE_MODE (field_type) == DImode) + val = MAX (RS6000_LONGLONG_ALIGNMENT, val); + } + + return val; + } + } /* first_field != 0 */ + + /* Ensure all MAC68K structs are at least 16-bit aligned. + Unless the struct has __attribute__ ((packed)). */ + + if (TARGET_ALIGN_MAC68K && ! TYPE_PACKED (the_struct)) + { + if (computed < 16) + computed = 16; + } + } /* RECORD_TYPE, etc */ + + return (MAX (computed, specified)); +} +/* APPLE LOCAL end Macintosh alignment 2002-1-22 --ff */ + #if TARGET_ELF static unsigned int rs6000_elf_section_type_flags (tree decl, const char *name, int reloc) @@ -18142,8 +20519,24 @@ rs6000_xcoff_file_end (void) static bool rs6000_binds_local_p (tree decl) { - return default_binds_local_p_1 (decl, 0); + /* APPLE LOCAL begin kext treat vtables as overridable */ + return default_binds_local_p_1 (decl, + flag_apple_kext && lang_hooks.vtable_p (decl)); } +/* APPLE LOCAL end kext treat vtables as overridable */ + +/* APPLE LOCAL begin pragma reverse_bitfields */ +/* Pragma reverse_bitfields. For compatibility with CW. + This feature is not well defined by CW, and results in + code that does not work in some cases! Bug compatibility + is the requirement, however. */ + +static bool +rs6000_reverse_bitfields_p (tree record_type ATTRIBUTE_UNUSED) +{ + return darwin_reverse_bitfields; +} +/* APPLE LOCAL end prgama reverse_bitfields */ #endif /* Compute a (partial) cost for rtx X. Return true if the complete @@ -18605,128 +20998,6 @@ rs6000_complex_function_value (enum machine_mode mode) return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2)); } -/* Compose a PARALLEL for a darwin64 struct being returned by - value. */ - -static rtx -rs6000_darwin64_function_value (CUMULATIVE_ARGS *cum, tree valtype) -{ - tree f, ftype; - rtx rvec[FIRST_PSEUDO_REGISTER], sub, roffset, suboff; - int k = 0, bytepos, tot, elt, i, subbytepos; - enum machine_mode fmode; - - switch (TREE_CODE (valtype)) - { - case RECORD_TYPE: - for (f = TYPE_FIELDS (valtype); f ; f = TREE_CHAIN (f)) - if (TREE_CODE (f) == FIELD_DECL) - { - ftype = TREE_TYPE (f); - fmode = TYPE_MODE (ftype); - bytepos = int_bit_position (f) / BITS_PER_UNIT; - if (USE_FP_FOR_ARG_P (cum, fmode, ftype)) - { - sub = gen_rtx_REG (fmode, cum->fregno++); - cum->sysv_gregno++; - } - else if (USE_ALTIVEC_FOR_ARG_P (cum, fmode, ftype, 1)) - { - sub = gen_rtx_REG (fmode, cum->vregno++); - cum->sysv_gregno++; - } - else if (fmode == BLKmode - && (TREE_CODE (ftype) == RECORD_TYPE - || TREE_CODE (ftype) == ARRAY_TYPE)) - sub = rs6000_darwin64_function_value (cum, ftype); - else - sub = gen_rtx_REG (fmode, cum->sysv_gregno++); - if (sub == NULL_RTX) - return sub; - else if (GET_CODE (sub) == PARALLEL) - { - for (i = 0; i < XVECLEN (sub, 0); i++) - { - rtx subsub = XVECEXP (sub, 0, i); - - suboff = XEXP (subsub, 1); - subbytepos = INTVAL (suboff); - subbytepos += bytepos; - roffset = gen_rtx_CONST_INT (SImode, subbytepos); - subsub = XEXP (subsub, 0); - rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, subsub, roffset); - } - } - else - { - roffset = gen_rtx_CONST_INT (SImode, bytepos); - rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, sub, roffset); - } - } - if (k > 0) - return gen_rtx_PARALLEL (TYPE_MODE (valtype), gen_rtvec_v (k, rvec)); - else - return NULL_RTX; - - case ARRAY_TYPE: - /* If passing by value won't work, give up. */ - if (int_size_in_bytes (valtype) <= 0) - return NULL_RTX; - ftype = TREE_TYPE (valtype); - fmode = TYPE_MODE (ftype); - tot = int_size_in_bytes (valtype) / int_size_in_bytes (ftype); - bytepos = 0; - for (elt = 0; elt < tot; ++elt) - { - if (USE_FP_FOR_ARG_P (cum, fmode, ftype)) - { - sub = gen_rtx_REG (fmode, cum->fregno++); - cum->sysv_gregno++; - } - else if (USE_ALTIVEC_FOR_ARG_P (cum, fmode, ftype, 1)) - { - sub = gen_rtx_REG (fmode, cum->vregno++); - cum->sysv_gregno++; - } - else if (fmode == BLKmode - && (TREE_CODE (ftype) == RECORD_TYPE - || TREE_CODE (ftype) == ARRAY_TYPE)) - sub = rs6000_darwin64_function_value (cum, ftype); - else - sub = gen_rtx_REG (fmode, cum->sysv_gregno++); - if (sub == NULL_RTX) - return sub; - else if (GET_CODE (sub) == PARALLEL) - { - for (i = 0; i < XVECLEN (sub, 0); i++) - { - rtx subsub = XVECEXP (sub, 0, i); - - suboff = XEXP (subsub, 1); - subbytepos = INTVAL (suboff); - subbytepos += bytepos; - roffset = gen_rtx_CONST_INT (SImode, subbytepos); - subsub = XEXP (subsub, 0); - rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, subsub, roffset); - } - } - else - { - roffset = gen_rtx_CONST_INT (SImode, bytepos); - rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, sub, roffset); - } - bytepos += int_size_in_bytes (ftype); - } - if (k > 0) - return gen_rtx_PARALLEL (TYPE_MODE (valtype), gen_rtvec_v (k, rvec)); - else - return NULL_RTX; - - default: - abort (); - } -} - /* Define how to find the value returned by a function. VALTYPE is the data type of the value (as a tree). If the precise function being called is known, FUNC is its FUNCTION_DECL; @@ -18746,16 +21017,18 @@ rs6000_function_value (tree valtype, tree func ATTRIBUTE_UNUSED) /* Special handling for structs in darwin64. */ if (rs6000_darwin64_abi && TYPE_MODE (valtype) == BLKmode - && (TREE_CODE (valtype) == RECORD_TYPE - || TREE_CODE (valtype) == ARRAY_TYPE)) + && TREE_CODE (valtype) == RECORD_TYPE + && int_size_in_bytes (valtype) > 0) { CUMULATIVE_ARGS valcum; rtx valret; - valcum.sysv_gregno = GP_ARG_RETURN; + valcum.words = 0; valcum.fregno = FP_ARG_MIN_REG; valcum.vregno = ALTIVEC_ARG_MIN_REG; - valret = rs6000_darwin64_function_value (&valcum, valtype); + /* Do a trial code generation as if this were going to be passed as + an argument; if any part goes in memory, we return NULL. */ + valret = rs6000_darwin64_record_arg (&valcum, valtype, 1, true); if (valret) return valret; /* Otherwise fall through to standard ABI rules. */ @@ -18934,6 +21207,43 @@ rs6000_dbx_register_number (unsigned int regno) abort (); } +/* APPLE LOCAL begin CW asm blocks */ +/* Translate some register names seen in CW asm into GCC standard + forms. */ + +const char * +rs6000_cw_asm_register_name (const char *regname, char *buf) +{ + /* SP is a valid reg name, but asm doesn't like it yet, so translate. */ + if (strcmp (regname, "sp") == 0) + return "r1"; + if (decode_reg_name (regname) >= 0) + return regname; + /* Change "gpr0" to "r0". */ + if (regname[0] == 'g' + && regname[1] == 'p' + && decode_reg_name (regname + 2) >= 0) + return regname + 2; + /* Change "fp0" to "f0". */ + if (regname[0] == 'f' && regname[1] == 'p') + { + buf[0] = 'f'; + strcpy (buf + 1, regname + 2); + if (decode_reg_name (buf) >= 0) + return buf; + } + if (regname[0] == 's' + && regname[1] == 'p' + && regname[2] == 'r' + ) + /* Temp hack, return it as a number. */ + return regname + 3; + if (strcmp (regname, "RTOC") == 0) + return "r2"; + return NULL; +} +/* APPLE LOCAL end CW asm blocks */ + /* target hook eh_return_filter_mode */ static enum machine_mode rs6000_eh_return_filter_mode (void) @@ -18955,5 +21265,21 @@ rs6000_vector_mode_supported_p (enum machine_mode mode) else return false; } +/* APPLE LOCAL begin mainline 2005-04-14 */ + +/* Target hook for invalid_arg_for_unprototyped_fn. */ +static const char * +invalid_arg_for_unprototyped_fn (tree typelist, tree funcdecl, tree val) +{ + return (!rs6000_darwin64_abi + && typelist == 0 + && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE + && (funcdecl == NULL_TREE + || (TREE_CODE (funcdecl) == FUNCTION_DECL + && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD))) + ? N_("AltiVec argument passed to unprototyped function") + : NULL; +} +/* APPLE LOCAL end mainline 2005-04-14 */ #include "gt-rs6000.h" |