diff options
28 files changed, 3511 insertions, 686 deletions
diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner new file mode 100644 index 00000000000..d83c79fcf21 --- /dev/null +++ b/gcc/ChangeLog.meissner @@ -0,0 +1,227 @@ +2015-02-06 Michael Meissner <meissner@linux.vnet.ibm.com> + + Merge up to ibm/gcc-4_9-branch, subversion id 220484. + * REVISION: Update subversion id. + +2014-12-11 Michael Meissner <meissner@linux.vnet.ibm.com> + + Merge up to ibm/gcc-4_9-branch, subversion id 218646. + * REVISION: Update subversion id. + +2014-12-09 Michael Meissner <meissner@linux.vnet.ibm.com> + + * config/rs6000/rs6000.c (rs6000_secondary_reload): Clear entire + secondary_reload_info structure instead of just setting a few + fields to 0. Add an assertion checking that the secondary reload + function is in range. + +2014-12-05 Michael Meissner <meissner@linux.vnet.ibm.com> + + * config/rs6000/rs6000.c (rs6000_emit_move): Do not split TFmode + constant moves if -mupper-regs-df. + + * config/rs6000/rs6000.md (mov<mode>_64bit_dm): Optimize moving + 0.0L to TFmode. + (movtd_64bit_nodm): Likewise. + (mov<mode>_32bit, FMOVE128 case): Likewise. + +2014-12-02 Michael Meissner <meissner@linux.vnet.ibm.com> + + Clone branch from at 8.0 branch, subversion id 218285 (FSF + subversion id 217046) + + * REVISION: Update file. + +[gcc, patch #1] +2014-11-11 Michael Meissner <meissner@linux.vnet.ibm.com> + + * config/rs6000/predicates.md (easy_fp_constant): Delete redunant + tests for 0.0. + +[gcc, patch #2] +2014-11-11 Michael Meissner <meissner@linux.vnet.ibm.com> + + * config/rs6000/vector.md (VEC_R): Move secondary reload support + insns to rs6000.md from vector.md. + (reload_<VEC_R:mode>_<P:mptrsize>_store): Likewise. + (reload_<VEC_R:mode>_<P:mptrsize>_load): Likewise. + (vec_reload_and_plus_<mptrsize>): Likewise. + + * config/rs6000/rs6000.md (RELOAD): New mode iterator for all of + the types that have secondary reload address support to load up a + base register. + (reload_<RELOAD:mode>_<P:mptrsize>_store): Move the reload + handlers here from vector.md, and expand the types we generate + reload handlers for. + (reload_<RELOAD:mode>_<P:mptrsize>_load): Likewise. + (vec_reload_and_plus_<mptrsize>): Likewise. + +[gcc, patch #3] +2014-11-11 Michael Meissner <meissner@linux.vnet.ibm.com> + + * config/rs6000/vsx.md (vsx_float<VSi><mode>2): Only provide the + vector forms of the instructions. Move VSX scalar forms to + rs6000.md, and add support for -mupper-regs-sf. + (vsx_floatuns<VSi><mode>2): Likewise. + (vsx_fix_trunc<mode><VSi>2): Likewise. + (vsx_fixuns_trunc<mode><VSi>2): Likewise. + (vsx_float_fix_<mode>2): Delete DF version, rename to + vsx_float_fix_v2df2. + (vsx_float_fix_v2df2): Likewise. + + * config/rs6000/rs6000.md (Fa): New mode attribute to give + constraint for the Altivec registers for a type. + (extendsfdf2_fpr): Use correct constraint. + (copysign<mode>3_fcpsgn): For SFmode, use correct xscpsgndp + instruction. + (floatsi<mode>2_lfiwax): Add support for -mupper-regs-{sf,df}. + Generate the non-VSX instruction if all registers were FPRs. Do + not use the patterns in vsx.md for scalar operations. + (floatsi<mode>2_lfiwax_mem): Likewise. + (floatunssi<mode>2_lfiwzx): Likewise. + (floatunssi<mode>2_lfiwzx_mem): Likewise. + (fix_trunc<mode>di2_fctidz): Likewise. + (fixuns_trunc<mode>di2_fctiduz): Likewise. + (fctiwz_<mode>): Likewise. + (fctiwuz_<mode>): Likewise. + (friz): Likewise. + (floatdidf2_fpr): Likewise. + (floatdidf2_mem): Likewise. + (floatunsdidf2): Likewise. + (floatunsdidf2_fcfidu): Likewise. + (floatunsdidf2_mem): Likewise. + (floatdisf2_fcfids): Likewise. + (floatdisf2_mem): Likewise. + (floatdisf2_internal1): Add explicit test for not FCFIDS to make + it more obvious that the code is for pre-ISA 2.06 machines. + (floatdisf2_internal2): Likewise. + (floatunsdisf2_fcfidus): Add support for -mupper-regs-{sf,df}. + Generate the non-VSX instruction if all registers were FPRs. Do + not use the patterns in vsx.md for scalar operations. + (floatunsdisf2_mem): Likewise. + +[gcc, patch #4] +2014-11-11 Michael Meissner <meissner@linux.vnet.ibm.com> + + * config/rs6000/rs6000.c (RELOAD_REG_AND_M16): Add support for + Altivec style vector loads that ignore the bottom 3 bits of the + address. + (rs6000_debug_addr_mask): New function to print the addr_mask + values if debugging. + (rs6000_debug_print_mode): Call rs6000_debug_addr_mask to print + out addr_mask. + (rs6000_setup_reg_addr_masks): Add support for Altivec style + vector loads that ignore the bottom 3 bits of the address. + (rs6000_init_hard_regno_mode_ok): Rework DFmode support if + -mupper-regs-df. Add support for -mupper-regs-sf. Rearrange code + placement for direct move support. + (rs6000_option_override_internal): Add checks for -mupper-regs-df + requiring -mvsx, and -mupper-regs-sf requiring -mpower8-vector. + (rs6000_secondary_reload_fail): Add ATTRIBUTE_NORETURN. + +[gcc, patch #5] +2014-11-11 Michael Meissner <meissner@linux.vnet.ibm.com> + Ulrich Weigand <Ulrich.Weigand@de.ibm.com> + + * config/rs6000/rs6000.c (rs6000_secondary_reload_toc_costs): + Helper function to identify costs of a TOC load for secondary + reload support. + (rs6000_secondary_reload_memory): Helper function for secondary + reload, to determine if a particular memory operation is directly + handled by the hardware, or if it needs support from secondary + reload to create a valid address. + (rs6000_secondary_reload): Rework code, to be clearer. If the + appropriate -mupper-regs-{sf,df} is used, use FPR registers to + reload scalar values, since the FPR registers have D-form + addressing. Move most of the code handling memory to the function + rs6000_secondary_reload_memory, and use the reg_addr structure to + determine what type of address modes are supported. Print more + debug information if -mdebug=addr. + (rs6000_secondary_reload_inner): Rework entire function to be more + general. Use the reg_addr bits to determine what type of + addressing is supported. + (rs6000_preferred_reload_class): Rework. Move constant handling + into a single place. Prefer using FLOAT_REGS for scalar floating + point. + (rs6000_secondary_reload_class): Use a FPR register to move a + value from an Altivec register to a GPR, and vice versa. Move VSX + handling above traditional floating point. + + * config/rs6000/rs6000.md (mov<mode>_hardfloat, FMOVE32 case): + Delete some spaces in the constraints. + (DF->DF move peephole2): Disable if -mupper-regs-{sf,df} to + allow using FPR registers to load/store an Altivec register for + scalar floating point types. + (SF->SF move peephole2): Likewise. + +[gcc, patch #6] +2014-11-11 Michael Meissner <meissner@linux.vnet.ibm.com> + + * config/rs6000/rs6000.opt (-mupper-regs-df): Make option public. + (-mupper-regs-sf): Likewise. + + * config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Define + __UPPER_REGS_DF__ if -mupper-regs-df. Define __UPPER_REGS_SF__ if + -mupper-regs-sf. + + * doc/invoke.texi (RS/6000 and PowerPC Options): Document + -mupper-regs-{sf,df}. + +[gcc, patch #8] +2014-11-14 Michael Meissner <meissner@linux.vnet.ibm.com> + + * config/rs6000/predicates.md (memory_fp_constant): New predicate + to return true if the operand is a floating point constant that + must be put into the constant pool, before register allocation + occurs. + + * config/rs6000/rs6000-cpus.def (ISA_2_6_MASKS_SERVER): Enable + -mupper-regs-df by default. + (ISA_2_7_MASKS_SERVER): Enable -mupper-regs-sf by default. + (POWERPC_MASKS): Add -mupper-regs-{sf,df} as options set by the + various -mcpu=... options. + (power7 cpu): Enable -mupper-regs-df by default. + + * config/rs6000/rs6000.opt (-mupper-regs): New combination option + that sets -mupper-regs-sf and -mupper-regs-df by default if the + cpu supports the instructions. + + * config/rs6000/rs6000.c (rs6000_setup_reg_addr_masks): Allow + pre-increment and pre-decrement on floating point, even if the + -mupper-regs-{sf,df} options were used. + (rs6000_option_override_internal): If -mupper-regs, set both + -mupper-regs-sf and -mupper-regs-df, depending on the underlying + cpu. + + * config/rs6000/rs6000.md (DFmode splitter): Add a define_split to + move floating point constants to the constant pool before register + allocation. Normally constants are put into the pool immediately, + but -ffast-math delays putting them into the constant pool for the + reciprocal approximation support. + (SFmode splitter): Likewise. + + * doc/invoke.texi (RS/6000 and PowerPC Options): Document + -mupper-regs. + +[pr63965 fix] +2014-11-20 Michael Meissner <meissner@linux.vnet.ibm.com> + + PR target/63965 + * config/rs6000/rs6000.c (rs6000_setup_reg_addr_masks): Do not set + Altivec & -16 mask if the type is not valid for Altivec registers. + (rs6000_secondary_reload_memory): Add support for ((reg + const) + + reg) that occurs during push_reload processing. + + * config/rs6000/altivec.md (altivec_mov<mode>): Add instruction + alternative for moving constant vectors which are easy altivec + constants to GPRs. Set the length attribute each of the + alternatives. + +[pr64019 fix] +2014-12-01 Michael Meissner <meissner@linux.vnet.ibm.com> + + PR target/64019 + * config/rs6000/rs6000.c (rs6000_legitimize_reload_address): Do + not create LO_SUM address for constant addresses if the type can + go in Altivec registers. + diff --git a/gcc/REVISION b/gcc/REVISION index dd537118185..62601b616b3 100644 --- a/gcc/REVISION +++ b/gcc/REVISION @@ -1 +1 @@ -[ibm/gcc-4_9-branch merged from gcc-4_9-branch, revision 220457] +[ibm/gcc-4_9-addr merged from gcc-4_9-branch, revision 220457, merged from at8 branch 220484] diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 02ea1423782..9a2f5d764f4 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -189,8 +189,8 @@ ;; Vector move instructions. (define_insn "*altivec_mov<mode>" - [(set (match_operand:VM2 0 "nonimmediate_operand" "=Z,v,v,*Y,*r,*r,v,v") - (match_operand:VM2 1 "input_operand" "v,Z,v,r,Y,r,j,W"))] + [(set (match_operand:VM2 0 "nonimmediate_operand" "=Z,v,v,*Y,*r,*r,v,v,*r") + (match_operand:VM2 1 "input_operand" "v,Z,v,r,Y,r,j,W,W"))] "VECTOR_MEM_ALTIVEC_P (<MODE>mode) && (register_operand (operands[0], <MODE>mode) || register_operand (operands[1], <MODE>mode))" @@ -205,10 +205,12 @@ case 5: return "#"; case 6: return "vxor %0,%0,%0"; case 7: return output_vec_const_move (operands); + case 8: return "#"; default: gcc_unreachable (); } } - [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,vecsimple,*")]) + [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,vecsimple,*,*") + (set_attr "length" "4,4,4,20,20,20,4,8,32")]) ;; Unlike other altivec moves, allow the GPRs, since a normal use of TImode ;; is for unions. However for plain data movement, slightly favor the vector diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 2f40462159e..cc955c52a62 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -471,10 +471,6 @@ && num_insns_constant_wide ((HOST_WIDE_INT) k[3]) == 1); case DFmode: - /* The constant 0.f is easy under VSX. */ - if (op == CONST0_RTX (DFmode) && VECTOR_UNIT_VSX_P (DFmode)) - return 1; - /* Force constants to memory before reload to utilize compress_float_constant. Avoid this when flag_unsafe_math_optimizations is enabled @@ -492,10 +488,6 @@ && num_insns_constant_wide ((HOST_WIDE_INT) k[1]) == 1); case SFmode: - /* The constant 0.f is easy. */ - if (op == CONST0_RTX (SFmode)) - return 1; - /* Force constants to memory before reload to utilize compress_float_constant. Avoid this when flag_unsafe_math_optimizations is enabled @@ -521,6 +513,27 @@ } }) +;; Return 1 if the operand must be loaded from memory. This is used by a +;; define_split to insure constants get pushed to the constant pool before +;; reload. If -ffast-math is used, easy_fp_constant will allow move insns to +;; have constants in order not interfere with reciprocal estimation. However, +;; with -mupper-regs support, these constants must be moved to the constant +;; pool before register allocation. + +(define_predicate "memory_fp_constant" + (match_code "const_double") +{ + if (TARGET_VSX && op == CONST0_RTX (mode)) + return 0; + + if (!TARGET_HARD_FLOAT || !TARGET_FPRS + || (mode == SFmode && !TARGET_SINGLE_FLOAT) + || (mode == DFmode && !TARGET_DOUBLE_FLOAT)) + return 0; + + return 1; +}) + ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a ;; vector register without using memory. (define_predicate "easy_vector_constant" diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index 8dedeec2643..3c6e45afb08 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -362,6 +362,10 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, rs6000_define_or_undefine_macro (define_p, "__QUAD_MEMORY_ATOMIC__"); if ((flags & OPTION_MASK_CRYPTO) != 0) rs6000_define_or_undefine_macro (define_p, "__CRYPTO__"); + if ((flags & OPTION_MASK_UPPER_REGS_DF) != 0) + rs6000_define_or_undefine_macro (define_p, "__UPPER_REGS_DF__"); + if ((flags & OPTION_MASK_UPPER_REGS_SF) != 0) + rs6000_define_or_undefine_macro (define_p, "__UPPER_REGS_SF__"); /* options from the builtin masks. */ if ((bu_mask & RS6000_BTM_SPE) != 0) diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index ba56df83dd1..17f5a571383 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -44,7 +44,8 @@ #define ISA_2_6_MASKS_SERVER (ISA_2_5_MASKS_SERVER \ | OPTION_MASK_POPCNTD \ | OPTION_MASK_ALTIVEC \ - | OPTION_MASK_VSX) + | OPTION_MASK_VSX \ + | OPTION_MASK_UPPER_REGS_DF) /* For now, don't provide an embedded version of ISA 2.07. */ #define ISA_2_7_MASKS_SERVER (ISA_2_6_MASKS_SERVER \ @@ -54,7 +55,8 @@ | OPTION_MASK_DIRECT_MOVE \ | OPTION_MASK_HTM \ | OPTION_MASK_QUAD_MEMORY \ - | OPTION_MASK_QUAD_MEMORY_ATOMIC) + | OPTION_MASK_QUAD_MEMORY_ATOMIC \ + | OPTION_MASK_UPPER_REGS_SF) #define POWERPC_7400_MASK (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC) @@ -95,6 +97,8 @@ | OPTION_MASK_RECIP_PRECISION \ | OPTION_MASK_SOFT_FLOAT \ | OPTION_MASK_STRICT_ALIGN_OPTIONAL \ + | OPTION_MASK_UPPER_REGS_DF \ + | OPTION_MASK_UPPER_REGS_SF \ | OPTION_MASK_VSX \ | OPTION_MASK_VSX_TIMODE) @@ -185,7 +189,7 @@ RS6000_CPU ("power6x", PROCESSOR_POWER6, MASK_POWERPC64 | MASK_PPC_GPOPT RS6000_CPU ("power7", PROCESSOR_POWER7, /* Don't add MASK_ISEL by default */ POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD - | MASK_VSX | MASK_RECIP_PRECISION) + | MASK_VSX | MASK_RECIP_PRECISION | OPTION_MASK_UPPER_REGS_DF) RS6000_CPU ("power8", PROCESSOR_POWER8, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER) RS6000_CPU ("powerpc", PROCESSOR_POWERPC, 0) RS6000_CPU ("powerpc64", PROCESSOR_POWERPC64, MASK_PPC_GFXOPT | MASK_POWERPC64) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 1917cdc97ab..b5b18411f85 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -383,6 +383,7 @@ typedef unsigned char addr_mask_type; #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */ #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */ #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */ +#define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */ /* Register type masks based on the type, of valid addressing modes. */ struct rs6000_reg_addr { @@ -1904,6 +1905,54 @@ rs6000_debug_vector_unit (enum rs6000_vector v) return ret; } +/* Inner function printing just the address mask for a particular reload + register class. */ +DEBUG_FUNCTION char * +rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces) +{ + static char ret[8]; + char *p = ret; + + if ((mask & RELOAD_REG_VALID) != 0) + *p++ = 'v'; + else if (keep_spaces) + *p++ = ' '; + + if ((mask & RELOAD_REG_MULTIPLE) != 0) + *p++ = 'm'; + else if (keep_spaces) + *p++ = ' '; + + if ((mask & RELOAD_REG_INDEXED) != 0) + *p++ = 'i'; + else if (keep_spaces) + *p++ = ' '; + + if ((mask & RELOAD_REG_OFFSET) != 0) + *p++ = 'o'; + else if (keep_spaces) + *p++ = ' '; + + if ((mask & RELOAD_REG_PRE_INCDEC) != 0) + *p++ = '+'; + else if (keep_spaces) + *p++ = ' '; + + if ((mask & RELOAD_REG_PRE_MODIFY) != 0) + *p++ = '+'; + else if (keep_spaces) + *p++ = ' '; + + if ((mask & RELOAD_REG_AND_M16) != 0) + *p++ = '&'; + else if (keep_spaces) + *p++ = ' '; + + *p = '\0'; + + return ret; +} + /* Print the address masks in a human readble fashion. */ DEBUG_FUNCTION void rs6000_debug_print_mode (ssize_t m) @@ -1912,18 +1961,8 @@ rs6000_debug_print_mode (ssize_t m) fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m)); for (rc = 0; rc < N_RELOAD_REG; rc++) - { - addr_mask_type mask = reg_addr[m].addr_mask[rc]; - fprintf (stderr, - " %s: %c%c%c%c%c%c", - reload_reg_map[rc].name, - (mask & RELOAD_REG_VALID) != 0 ? 'v' : ' ', - (mask & RELOAD_REG_MULTIPLE) != 0 ? 'm' : ' ', - (mask & RELOAD_REG_INDEXED) != 0 ? 'i' : ' ', - (mask & RELOAD_REG_OFFSET) != 0 ? 'o' : ' ', - (mask & RELOAD_REG_PRE_INCDEC) != 0 ? '+' : ' ', - (mask & RELOAD_REG_PRE_MODIFY) != 0 ? '+' : ' '); - } + fprintf (stderr, " %s: %s", reload_reg_map[rc].name, + rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true)); if (rs6000_vector_unit[m] != VECTOR_NONE || rs6000_vector_mem[m] != VECTOR_NONE @@ -2399,9 +2438,7 @@ rs6000_setup_reg_addr_masks (void) /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY addressing. Restrict addressing on SPE for 64-bit types because of the SUBREG hackery used to address 64-bit floats in - '32-bit' GPRs. To simplify secondary reload, don't allow - update forms on scalar floating point types that can go in the - upper registers. */ + '32-bit' GPRs. */ if (TARGET_UPDATE && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR) @@ -2409,8 +2446,7 @@ rs6000_setup_reg_addr_masks (void) && !VECTOR_MODE_P (m2) && !COMPLEX_MODE_P (m2) && !indexed_only_p - && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8) - && !reg_addr[m2].scalar_in_vmx_p) + && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8)) { addr_mask |= RELOAD_REG_PRE_INCDEC; @@ -2443,6 +2479,12 @@ rs6000_setup_reg_addr_masks (void) && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)) addr_mask |= RELOAD_REG_OFFSET; + /* VMX registers can do (REG & -16) and ((REG+REG) & -16) + addressing on 128-bit types. */ + if (rc == RELOAD_REG_VMX && GET_MODE_SIZE (m2) == 16 + && (addr_mask & RELOAD_REG_VALID) != 0) + addr_mask |= RELOAD_REG_AND_M16; + reg_addr[m].addr_mask[rc] = addr_mask; any_addr_mask |= addr_mask; } @@ -2609,13 +2651,19 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_align[V1TImode] = 128; } - /* DFmode, see if we want to use the VSX unit. */ + /* DFmode, see if we want to use the VSX unit. Memory is handled + differently, so don't set rs6000_vector_mem. */ if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE) { rs6000_vector_unit[DFmode] = VECTOR_VSX; - rs6000_vector_mem[DFmode] - = (TARGET_UPPER_REGS_DF ? VECTOR_VSX : VECTOR_NONE); - rs6000_vector_align[DFmode] = align64; + rs6000_vector_align[DFmode] = 64; + } + + /* SFmode, see if we want to use the VSX unit. */ + if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT) + { + rs6000_vector_unit[SFmode] = VECTOR_VSX; + rs6000_vector_align[SFmode] = 32; } /* Allow TImode in VSX register and set the VSX memory macros. */ @@ -2750,58 +2798,42 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load; reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store; reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load; - if (TARGET_VSX && TARGET_UPPER_REGS_DF) - { - reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store; - reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load; - reg_addr[DFmode].scalar_in_vmx_p = true; - reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store; - reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load; - } - if (TARGET_P8_VECTOR) - { - reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store; - reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load; - reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store; - reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load; - if (TARGET_UPPER_REGS_SF) - reg_addr[SFmode].scalar_in_vmx_p = true; - } + reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store; + reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load; + reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store; + reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load; + reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store; + reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load; + reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store; + reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load; + if (TARGET_VSX_TIMODE) { reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store; reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load; } + if (TARGET_DIRECT_MOVE) { - if (TARGET_POWERPC64) - { - reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti; - reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti; - reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df; - reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di; - reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf; - reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si; - reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi; - reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi; - reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf; - - reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti; - reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti; - reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df; - reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di; - reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf; - reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si; - reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi; - reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi; - reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf; - } - else - { - reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi; - reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd; - reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf; - } + reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti; + reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti; + reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df; + reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di; + reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf; + reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si; + reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi; + reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi; + reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf; + + reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti; + reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti; + reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df; + reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di; + reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf; + reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si; + reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi; + reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi; + reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf; } } else @@ -2820,29 +2852,34 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load; reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store; reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load; - if (TARGET_VSX && TARGET_UPPER_REGS_DF) - { - reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store; - reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load; - reg_addr[DFmode].scalar_in_vmx_p = true; - reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store; - reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load; - } - if (TARGET_P8_VECTOR) - { - reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store; - reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load; - reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store; - reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load; - if (TARGET_UPPER_REGS_SF) - reg_addr[SFmode].scalar_in_vmx_p = true; - } + reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store; + reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load; + reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store; + reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load; + reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store; + reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load; + reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store; + reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load; + if (TARGET_VSX_TIMODE) { reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store; reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load; } + + if (TARGET_DIRECT_MOVE) + { + reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi; + reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd; + reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf; + } } + + if (TARGET_UPPER_REGS_DF) + reg_addr[DFmode].scalar_in_vmx_p = true; + + if (TARGET_UPPER_REGS_SF) + reg_addr[SFmode].scalar_in_vmx_p = true; } /* Precalculate HARD_REGNO_NREGS. */ @@ -3446,6 +3483,54 @@ rs6000_option_override_internal (bool global_init_p) rs6000_isa_flags &= ~OPTION_MASK_DFP; } + /* Allow an explicit -mupper-regs to set both -mupper-regs-df and + -mupper-regs-sf, depending on the cpu, unless the user explicitly also set + the individual option. */ + if (TARGET_UPPER_REGS > 0) + { + if (TARGET_VSX + && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)) + { + rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF; + rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF; + } + if (TARGET_P8_VECTOR + && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)) + { + rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF; + rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF; + } + } + else if (TARGET_UPPER_REGS == 0) + { + if (TARGET_VSX + && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)) + { + rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF; + rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF; + } + if (TARGET_P8_VECTOR + && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)) + { + rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF; + rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF; + } + } + + if (TARGET_UPPER_REGS_DF && !TARGET_VSX) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF) + error ("-mupper-regs-df requires -mvsx"); + rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF; + } + + if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF) + error ("-mupper-regs-sf requires -mpower8-vector"); + rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF; + } + /* The quad memory instructions only works in 64-bit mode. In 32-bit mode, silently turn off quad memory mode. */ if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64) @@ -7472,7 +7557,11 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode, naturally aligned. Since we say the address is good here, we can't disable offsets from LO_SUMs in mem_operand_gpr. FIXME: Allow offset from lo_sum for other modes too, when - mem is sufficiently aligned. */ + mem is sufficiently aligned. + + Also disallow this if the type can go in VMX/Altivec registers, since + those registers do not have d-form (reg+offset) address modes. */ + && !reg_addr[mode].scalar_in_vmx_p && mode != TFmode && mode != TDmode && (mode != TImode || !TARGET_VSX_TIMODE) @@ -8304,9 +8393,11 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode) || ! nonimmediate_operand (operands[0], mode))) goto emit_set; - /* 128-bit constant floating-point values on Darwin should really be - loaded as two parts. */ + /* 128-bit constant floating-point values on Darwin should really be loaded + as two parts. However, this premature splitting is a problem when DFmode + values can go into Altivec registers. */ if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128 + && !reg_addr[DFmode].scalar_in_vmx_p && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE) { rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0), @@ -8325,6 +8416,30 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode) eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX); + /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD), + p1:SD) if p1 is not of floating point class and p0 is spilled as + we can have no analogous movsd_store for this. */ + if (lra_in_progress && mode == DDmode + && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER + && reg_preferred_class (REGNO (operands[0])) == NO_REGS + && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1])) + && GET_MODE (SUBREG_REG (operands[1])) == SDmode) + { + enum reg_class cl; + int regno = REGNO (SUBREG_REG (operands[1])); + + if (regno >= FIRST_PSEUDO_REGISTER) + { + cl = reg_preferred_class (regno); + regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1]; + } + if (regno >= 0 && ! FP_REGNO_P (regno)) + { + mode = SDmode; + operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]); + operands[1] = SUBREG_REG (operands[1]); + } + } if (lra_in_progress && mode == SDmode && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER @@ -8355,6 +8470,30 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode) gcc_unreachable(); return; } + /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD + p:DD)) if p0 is not of floating point class and p1 is spilled as + we can have no analogous movsd_load for this. */ + if (lra_in_progress && mode == DDmode + && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0])) + && GET_MODE (SUBREG_REG (operands[0])) == SDmode + && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER + && reg_preferred_class (REGNO (operands[1])) == NO_REGS) + { + enum reg_class cl; + int regno = REGNO (SUBREG_REG (operands[0])); + + if (regno >= FIRST_PSEUDO_REGISTER) + { + cl = reg_preferred_class (regno); + regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0]; + } + if (regno >= 0 && ! FP_REGNO_P (regno)) + { + mode = SDmode; + operands[0] = SUBREG_REG (operands[0]); + operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]); + } + } if (lra_in_progress && mode == SDmode && (REG_P (operands[0]) @@ -16281,6 +16420,289 @@ register_to_reg_type (rtx reg, bool *is_altivec) return reg_class_to_reg_type[(int)rclass]; } +/* Helper function to return the cost of adding a TOC entry address. */ + +static inline int +rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask) +{ + int ret; + + if (TARGET_CMODEL != CMODEL_SMALL) + ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2; + + else + ret = (TARGET_MINIMAL_TOC) ? 6 : 3; + + return ret; +} + +/* Helper function for rs6000_secondary_reload to determine whether the memory + address (ADDR) with a given register class (RCLASS) and machine mode (MODE) + needs reloading. Return negative if the memory is not handled by the memory + helper functions and to try a different reload method, 0 if no additional + instructions are need, and positive to give the extra cost for the + memory. */ + +static int +rs6000_secondary_reload_memory (rtx addr, + enum reg_class rclass, + enum machine_mode mode) +{ + int extra_cost = 0; + rtx reg, and_arg, plus_arg0, plus_arg1; + addr_mask_type addr_mask; + const char *type = NULL; + const char *fail_msg = NULL; + + if (GPR_REG_CLASS_P (rclass)) + addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR]; + + else if (rclass == FLOAT_REGS) + addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR]; + + else if (rclass == ALTIVEC_REGS) + addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX]; + + /* For the combined VSX_REGS, turn off Altivec AND -16. */ + else if (rclass == VSX_REGS) + addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX] + & ~RELOAD_REG_AND_M16); + + else + { + if (TARGET_DEBUG_ADDR) + fprintf (stderr, + "rs6000_secondary_reload_memory: mode = %s, class = %s, " + "class is not GPR, FPR, VMX\n", + GET_MODE_NAME (mode), reg_class_names[rclass]); + + return -1; + } + + /* If the register isn't valid in this register class, just return now. */ + if ((addr_mask & RELOAD_REG_VALID) == 0) + { + if (TARGET_DEBUG_ADDR) + fprintf (stderr, + "rs6000_secondary_reload_memory: mode = %s, class = %s, " + "not valid in class\n", + GET_MODE_NAME (mode), reg_class_names[rclass]); + + return -1; + } + + switch (GET_CODE (addr)) + { + /* Does the register class supports auto update forms for this mode? We + don't need a scratch register, since the powerpc only supports + PRE_INC, PRE_DEC, and PRE_MODIFY. */ + case PRE_INC: + case PRE_DEC: + reg = XEXP (addr, 0); + if (!base_reg_operand (addr, GET_MODE (reg))) + { + fail_msg = "no base register #1"; + extra_cost = -1; + } + + else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0) + { + extra_cost = 1; + type = "update"; + } + break; + + case PRE_MODIFY: + reg = XEXP (addr, 0); + plus_arg1 = XEXP (addr, 1); + if (!base_reg_operand (reg, GET_MODE (reg)) + || GET_CODE (plus_arg1) != PLUS + || !rtx_equal_p (reg, XEXP (plus_arg1, 0))) + { + fail_msg = "bad PRE_MODIFY"; + extra_cost = -1; + } + + else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0) + { + extra_cost = 1; + type = "update"; + } + break; + + /* Do we need to simulate AND -16 to clear the bottom address bits used + in VMX load/stores? Only allow the AND for vector sizes. */ + case AND: + and_arg = XEXP (addr, 0); + if (GET_MODE_SIZE (mode) != 16 + || GET_CODE (XEXP (addr, 1)) != CONST_INT + || INTVAL (XEXP (addr, 1)) != -16) + { + fail_msg = "bad Altivec AND #1"; + extra_cost = -1; + } + + if (rclass != ALTIVEC_REGS) + { + if (legitimate_indirect_address_p (and_arg, false)) + extra_cost = 1; + + else if (legitimate_indexed_address_p (and_arg, false)) + extra_cost = 2; + + else + { + fail_msg = "bad Altivec AND #2"; + extra_cost = -1; + } + + type = "and"; + } + break; + + /* If this is an indirect address, make sure it is a base register. */ + case REG: + case SUBREG: + if (!legitimate_indirect_address_p (addr, false)) + { + extra_cost = 1; + type = "move"; + } + break; + + /* If this is an indexed address, make sure the register class can handle + indexed addresses for this mode. */ + case PLUS: + plus_arg0 = XEXP (addr, 0); + plus_arg1 = XEXP (addr, 1); + + /* (plus (plus (reg) (constant)) (constant)) is generated during + push_reload processing, so handle it now. */ + if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1)) + { + if ((addr_mask & RELOAD_REG_OFFSET) == 0) + { + extra_cost = 1; + type = "offset"; + } + } + + /* (plus (plus (reg) (constant)) (reg)) is also generated during + push_reload processing, so handle it now. */ + else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1)) + { + if ((addr_mask & RELOAD_REG_INDEXED) == 0) + { + extra_cost = 1; + type = "indexed #2"; + } + } + + else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0))) + { + fail_msg = "no base register #2"; + extra_cost = -1; + } + + else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1))) + { + if ((addr_mask & RELOAD_REG_INDEXED) == 0 + || !legitimate_indexed_address_p (addr, false)) + { + extra_cost = 1; + type = "indexed"; + } + } + + /* Make sure the register class can handle offset addresses. */ + else if (rs6000_legitimate_offset_address_p (mode, addr, false, true)) + { + if ((addr_mask & RELOAD_REG_OFFSET) == 0) + { + extra_cost = 1; + type = "offset"; + } + } + + else + { + fail_msg = "bad PLUS"; + extra_cost = -1; + } + + break; + + case LO_SUM: + if (!legitimate_lo_sum_address_p (mode, addr, false)) + { + fail_msg = "bad LO_SUM"; + extra_cost = -1; + } + + if ((addr_mask & RELOAD_REG_OFFSET) == 0) + { + extra_cost = 1; + type = "lo_sum"; + } + break; + + /* Static addresses need to create a TOC entry. */ + case CONST: + case SYMBOL_REF: + case LABEL_REF: + type = "address"; + extra_cost = rs6000_secondary_reload_toc_costs (addr_mask); + break; + + /* TOC references look like offsetable memory. */ + case UNSPEC: + if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL) + { + fail_msg = "bad UNSPEC"; + extra_cost = -1; + } + + else if ((addr_mask & RELOAD_REG_OFFSET) == 0) + { + extra_cost = 1; + type = "toc reference"; + } + break; + + default: + { + fail_msg = "bad address"; + extra_cost = -1; + } + } + + if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */) + { + if (extra_cost < 0) + fprintf (stderr, + "rs6000_secondary_reload_memory error: mode = %s, " + "class = %s, addr_mask = '%s', %s\n", + GET_MODE_NAME (mode), + reg_class_names[rclass], + rs6000_debug_addr_mask (addr_mask, false), + (fail_msg != NULL) ? fail_msg : "<bad address>"); + + else + fprintf (stderr, + "rs6000_secondary_reload_memory: mode = %s, class = %s, " + "addr_mask = '%s', extra cost = %d, %s\n", + GET_MODE_NAME (mode), + reg_class_names[rclass], + rs6000_debug_addr_mask (addr_mask, false), + extra_cost, + (type) ? type : "<none>"); + + debug_rtx (addr); + } + + return extra_cost; +} + /* Helper function for rs6000_secondary_reload to return true if a move to a different register classe is really a simple move. */ @@ -16434,6 +16856,13 @@ rs6000_secondary_reload_move (enum rs6000_reg_type to_type, secondary_reload_info *sri, bool altivec_p) { + /* Make sure sri is setup if passed. */ + if (sri) + { + sri->icode = CODE_FOR_nothing; + sri->extra_cost = 0; + } + /* Fall back to load/store reloads if either type is not a register. */ if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE) return false; @@ -16452,14 +16881,7 @@ rs6000_secondary_reload_move (enum rs6000_reg_type to_type, /* Check whether a simple move can be done directly. */ if (rs6000_secondary_reload_simple_move (to_type, from_type, mode)) - { - if (sri) - { - sri->icode = CODE_FOR_nothing; - sri->extra_cost = 0; - } - return true; - } + return true; /* Now check if we can do it in a few steps. */ return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri, @@ -16487,8 +16909,17 @@ rs6000_secondary_reload (bool in_p, reg_class_t ret = ALL_REGS; enum insn_code icode; bool default_p = false; + bool done_p = false; + + /* Allow subreg of memory before/during reload. */ + bool memory_p = (MEM_P (x) + || (!reload_completed && GET_CODE (x) == SUBREG + && MEM_P (SUBREG_REG (x)))); + /* Set the secondary reload structure to a known state. */ + memset ((void *)sri, '\0', sizeof (secondary_reload_info)); sri->icode = CODE_FOR_nothing; + icode = ((in_p) ? reg_addr[mode].reload_load : reg_addr[mode].reload_store); @@ -16512,121 +16943,54 @@ rs6000_secondary_reload (bool in_p, { icode = (enum insn_code)sri->icode; default_p = false; + done_p = true; ret = NO_REGS; } } - /* Handle vector moves with reload helper functions. */ - if (ret == ALL_REGS && icode != CODE_FOR_nothing) + /* Make sure 0.0 is not reloaded or forced into memory. */ + if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass)) { ret = NO_REGS; - sri->icode = CODE_FOR_nothing; - sri->extra_cost = 0; + default_p = false; + done_p = true; + } - if (GET_CODE (x) == MEM) - { - rtx addr = XEXP (x, 0); + /* If this is a scalar floating point value and we want to load it into the + traditional Altivec registers, do it via a move via a traditional floating + point register. Also make sure that non-zero constants use a FPR. */ + if (!done_p && reg_addr[mode].scalar_in_vmx_p + && (rclass == VSX_REGS || rclass == ALTIVEC_REGS) + && (memory_p || (GET_CODE (x) == CONST_DOUBLE))) + { + ret = FLOAT_REGS; + default_p = false; + done_p = true; + } - /* Loads to and stores from gprs can do reg+offset, and wouldn't need - an extra register in that case, but it would need an extra - register if the addressing is reg+reg or (reg+reg)&(-16). Special - case load/store quad. */ - if (rclass == GENERAL_REGS || rclass == BASE_REGS) - { - if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY - && GET_MODE_SIZE (mode) == 16 - && quad_memory_operand (x, mode)) - { - sri->icode = icode; - sri->extra_cost = 2; - } + /* Handle reload of load/stores if we have reload helper functions. */ + if (!done_p && icode != CODE_FOR_nothing && memory_p) + { + int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass, + mode); - else if (!legitimate_indirect_address_p (addr, false) - && !rs6000_legitimate_offset_address_p (PTImode, addr, - false, true)) - { - sri->icode = icode; - /* account for splitting the loads, and converting the - address from reg+reg to reg. */ - sri->extra_cost = (((TARGET_64BIT) ? 3 : 5) - + ((GET_CODE (addr) == AND) ? 1 : 0)); - } - } - /* Allow scalar loads to/from the traditional floating point - registers, even if VSX memory is set. */ - else if ((rclass == FLOAT_REGS || rclass == NO_REGS) - && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) - && (legitimate_indirect_address_p (addr, false) - || legitimate_indirect_address_p (addr, false) - || rs6000_legitimate_offset_address_p (mode, addr, - false, true))) - - ; - /* Loads to and stores from vector registers can only do reg+reg - addressing. Altivec registers can also do (reg+reg)&(-16). Allow - scalar modes loading up the traditional floating point registers - to use offset addresses. */ - else if (rclass == VSX_REGS || rclass == ALTIVEC_REGS - || rclass == FLOAT_REGS || rclass == NO_REGS) - { - if (!VECTOR_MEM_ALTIVEC_P (mode) - && GET_CODE (addr) == AND - && GET_CODE (XEXP (addr, 1)) == CONST_INT - && INTVAL (XEXP (addr, 1)) == -16 - && (legitimate_indirect_address_p (XEXP (addr, 0), false) - || legitimate_indexed_address_p (XEXP (addr, 0), false))) - { - sri->icode = icode; - sri->extra_cost = ((GET_CODE (XEXP (addr, 0)) == PLUS) - ? 2 : 1); - } - else if (!legitimate_indirect_address_p (addr, false) - && (rclass == NO_REGS - || !legitimate_indexed_address_p (addr, false))) - { - sri->icode = icode; - sri->extra_cost = 1; - } - else - icode = CODE_FOR_nothing; - } - /* Any other loads, including to pseudo registers which haven't been - assigned to a register yet, default to require a scratch - register. */ - else - { - sri->icode = icode; - sri->extra_cost = 2; - } - } - else if (REG_P (x)) + if (extra_cost >= 0) { - int regno = true_regnum (x); - - icode = CODE_FOR_nothing; - if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER) - default_p = true; - else + done_p = true; + ret = NO_REGS; + if (extra_cost > 0) { - enum reg_class xclass = REGNO_REG_CLASS (regno); - enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass]; - enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass]; - - /* If memory is needed, use default_secondary_reload to create the - stack slot. */ - if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1)) - default_p = true; - else - ret = NO_REGS; + sri->extra_cost = extra_cost; + sri->icode = icode; } } - else - default_p = true; } - else if (TARGET_POWERPC64 - && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE - && MEM_P (x) - && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD) + + /* Handle unaligned loads and stores of integer registers. */ + if (!done_p && TARGET_POWERPC64 + && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE + && memory_p + && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD) { rtx addr = XEXP (x, 0); rtx off = address_offset (addr); @@ -16658,6 +17022,7 @@ rs6000_secondary_reload (bool in_p, : CODE_FOR_reload_di_store); sri->extra_cost = 2; ret = NO_REGS; + done_p = true; } else default_p = true; @@ -16665,10 +17030,11 @@ rs6000_secondary_reload (bool in_p, else default_p = true; } - else if (!TARGET_POWERPC64 - && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE - && MEM_P (x) - && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) + + if (!done_p && !TARGET_POWERPC64 + && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE + && memory_p + && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) { rtx addr = XEXP (x, 0); rtx off = address_offset (addr); @@ -16704,6 +17070,7 @@ rs6000_secondary_reload (bool in_p, sri->icode = CODE_FOR_reload_si_store; sri->extra_cost = 2; ret = NO_REGS; + done_p = true; } else default_p = true; @@ -16711,13 +17078,28 @@ rs6000_secondary_reload (bool in_p, else default_p = true; } - else + + if (!done_p) default_p = true; if (default_p) - ret = default_secondary_reload (in_p, x, rclass, mode, sri); + { + ret = default_secondary_reload (in_p, x, rclass, mode, sri); + + if (!IN_RANGE (sri->icode, CODE_FOR_nothing, LAST_INSN_CODE)) + { + fprintf (stderr, + "default_secondary_reload failure, ret = %s, rclass = %s, mode = %s, in_p = %s\n", + reg_class_names[ret], + reg_class_names[rclass], + GET_MODE_NAME (mode), + in_p ? "true" : "false"); + debug_rtx (x); + } + } gcc_assert (ret != ALL_REGS); + gcc_assert (IN_RANGE (sri->icode, CODE_FOR_nothing, LAST_INSN_CODE)); if (TARGET_DEBUG_ADDR) { @@ -16729,15 +17111,20 @@ rs6000_secondary_reload (bool in_p, reg_class_names[rclass], GET_MODE_NAME (mode)); + if (reload_completed) + fputs (", after reload", stderr); + + if (!done_p) + fputs (", done_p not set", stderr); + if (default_p) - fprintf (stderr, ", default secondary reload"); + fputs (", default secondary reload", stderr); if (sri->icode != CODE_FOR_nothing) - fprintf (stderr, ", reload func = %s, extra cost = %d\n", + fprintf (stderr, ", reload func = %s, extra cost = %d", insn_data[sri->icode].name, sri->extra_cost); - else - fprintf (stderr, "\n"); + fputs ("\n", stderr); debug_rtx (x); } @@ -16766,6 +17153,9 @@ rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch, debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber))); } +static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool) + ATTRIBUTE_NORETURN; + static void rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch, bool store_p) @@ -16774,209 +17164,148 @@ rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch, gcc_unreachable (); } -/* Fixup reload addresses for Altivec or VSX loads/stores to change SP+offset - to SP+reg addressing. */ +/* Fixup reload addresses for values in GPR, FPR, and VMX registers that have + reload helper functions. These were identified in + rs6000_secondary_reload_memory, and if reload decided to use the secondary + reload, it calls the insns: + reload_<RELOAD:mode>_<P:mptrsize>_store + reload_<RELOAD:mode>_<P:mptrsize>_load + + which in turn calls this function, to do whatever is necessary to create + valid addresses. */ void rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p) { int regno = true_regnum (reg); - enum machine_mode mode = GET_MODE (reg); - enum reg_class rclass; + machine_mode mode = GET_MODE (reg); + addr_mask_type addr_mask; rtx addr; - rtx and_op2 = NULL_RTX; - rtx addr_op1; - rtx addr_op2; - rtx scratch_or_premodify = scratch; - rtx and_rtx; + rtx new_addr; + rtx op_reg, op0, op1; + rtx and_op; rtx cc_clobber; + rtvec rv; - if (TARGET_DEBUG_ADDR) - rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p); + if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem) + || !base_reg_operand (scratch, GET_MODE (scratch))) + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); - if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER) + if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)) + addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR]; + + else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO)) + addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR]; + + else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO)) + addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX]; + + else rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); - if (GET_CODE (mem) != MEM) + /* Make sure the mode is valid in this register class. */ + if ((addr_mask & RELOAD_REG_VALID) == 0) rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); - rclass = REGNO_REG_CLASS (regno); - addr = find_replacement (&XEXP (mem, 0)); + if (TARGET_DEBUG_ADDR) + rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p); - switch (rclass) + new_addr = addr = XEXP (mem, 0); + switch (GET_CODE (addr)) { - /* GPRs can handle reg + small constant, all other addresses need to use - the scratch register. */ - case GENERAL_REGS: - case BASE_REGS: - if (GET_CODE (addr) == AND) + /* Does the register class support auto update forms for this mode? If + not, do the update now. We don't need a scratch register, since the + powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */ + case PRE_INC: + case PRE_DEC: + op_reg = XEXP (addr, 0); + if (!base_reg_operand (op_reg, Pmode)) + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); + + if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0) { - and_op2 = XEXP (addr, 1); - addr = find_replacement (&XEXP (addr, 0)); + emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode)))); + new_addr = op_reg; } + break; - if (GET_CODE (addr) == PRE_MODIFY) - { - scratch_or_premodify = find_replacement (&XEXP (addr, 0)); - if (!REG_P (scratch_or_premodify)) - rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); + case PRE_MODIFY: + op0 = XEXP (addr, 0); + op1 = XEXP (addr, 1); + if (!base_reg_operand (op0, Pmode) + || GET_CODE (op1) != PLUS + || !rtx_equal_p (op0, XEXP (op1, 0))) + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); - addr = find_replacement (&XEXP (addr, 1)); - if (GET_CODE (addr) != PLUS) - rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); + if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0) + { + emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); + new_addr = reg; } + break; - if (GET_CODE (addr) == PLUS - && (and_op2 != NULL_RTX - || !rs6000_legitimate_offset_address_p (PTImode, addr, - false, true))) + /* Do we need to simulate AND -16 to clear the bottom address bits used + in VMX load/stores? */ + case AND: + op0 = XEXP (addr, 0); + op1 = XEXP (addr, 1); + if ((addr_mask & RELOAD_REG_AND_M16) == 0) { - /* find_replacement already recurses into both operands of - PLUS so we don't need to call it here. */ - addr_op1 = XEXP (addr, 0); - addr_op2 = XEXP (addr, 1); - if (!legitimate_indirect_address_p (addr_op1, false)) - rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); + if (REG_P (op0) || GET_CODE (op0) == SUBREG) + op_reg = op0; - if (!REG_P (addr_op2) - && (GET_CODE (addr_op2) != CONST_INT - || !satisfies_constraint_I (addr_op2))) + else if (GET_CODE (op1) == PLUS) { - if (TARGET_DEBUG_ADDR) - { - fprintf (stderr, - "\nMove plus addr to register %s, mode = %s: ", - rs6000_reg_names[REGNO (scratch)], - GET_MODE_NAME (mode)); - debug_rtx (addr_op2); - } - rs6000_emit_move (scratch, addr_op2, Pmode); - addr_op2 = scratch; + emit_insn (gen_rtx_SET (VOIDmode, scratch, op1)); + op_reg = scratch; } - emit_insn (gen_rtx_SET (VOIDmode, - scratch_or_premodify, - gen_rtx_PLUS (Pmode, - addr_op1, - addr_op2))); + else + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); - addr = scratch_or_premodify; - scratch_or_premodify = scratch; - } - else if (!legitimate_indirect_address_p (addr, false) - && !rs6000_legitimate_offset_address_p (PTImode, addr, - false, true)) - { - if (TARGET_DEBUG_ADDR) - { - fprintf (stderr, "\nMove addr to register %s, mode = %s: ", - rs6000_reg_names[REGNO (scratch_or_premodify)], - GET_MODE_NAME (mode)); - debug_rtx (addr); - } - rs6000_emit_move (scratch_or_premodify, addr, Pmode); - addr = scratch_or_premodify; - scratch_or_premodify = scratch; + and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1); + cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode)); + rv = gen_rtvec (2, gen_rtx_SET (VOIDmode, scratch, and_op), cc_clobber); + emit_insn (gen_rtx_PARALLEL (VOIDmode, rv)); + new_addr = scratch; } break; - /* Float registers can do offset+reg addressing for scalar types. */ - case FLOAT_REGS: - if (legitimate_indirect_address_p (addr, false) /* reg */ - || legitimate_indexed_address_p (addr, false) /* reg+reg */ - || ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) - && and_op2 == NULL_RTX - && scratch_or_premodify == scratch - && rs6000_legitimate_offset_address_p (mode, addr, false, false))) - break; - - /* If this isn't a legacy floating point load/store, fall through to the - VSX defaults. */ - - /* VSX/Altivec registers can only handle reg+reg addressing. Move other - addresses into a scratch register. */ - case VSX_REGS: - case ALTIVEC_REGS: - - /* With float regs, we need to handle the AND ourselves, since we can't - use the Altivec instruction with an implicit AND -16. Allow scalar - loads to float registers to use reg+offset even if VSX. */ - if (GET_CODE (addr) == AND - && (rclass != ALTIVEC_REGS || GET_MODE_SIZE (mode) != 16 - || GET_CODE (XEXP (addr, 1)) != CONST_INT - || INTVAL (XEXP (addr, 1)) != -16 - || !VECTOR_MEM_ALTIVEC_P (mode))) - { - and_op2 = XEXP (addr, 1); - addr = find_replacement (&XEXP (addr, 0)); - } - - /* If we aren't using a VSX load, save the PRE_MODIFY register and use it - as the address later. */ - if (GET_CODE (addr) == PRE_MODIFY - && ((ALTIVEC_OR_VSX_VECTOR_MODE (mode) - && (rclass != FLOAT_REGS - || (GET_MODE_SIZE (mode) != 4 && GET_MODE_SIZE (mode) != 8))) - || and_op2 != NULL_RTX - || !legitimate_indexed_address_p (XEXP (addr, 1), false))) - { - scratch_or_premodify = find_replacement (&XEXP (addr, 0)); - if (!legitimate_indirect_address_p (scratch_or_premodify, false)) - rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); - - addr = find_replacement (&XEXP (addr, 1)); - if (GET_CODE (addr) != PLUS) - rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); + /* If this is an indirect address, make sure it is a base register. */ + case REG: + case SUBREG: + if (!base_reg_operand (addr, GET_MODE (addr))) + { + emit_insn (gen_rtx_SET (VOIDmode, scratch, addr)); + new_addr = scratch; } + break; - if (legitimate_indirect_address_p (addr, false) /* reg */ - || legitimate_indexed_address_p (addr, false) /* reg+reg */ - || (GET_CODE (addr) == AND /* Altivec memory */ - && rclass == ALTIVEC_REGS - && GET_CODE (XEXP (addr, 1)) == CONST_INT - && INTVAL (XEXP (addr, 1)) == -16 - && (legitimate_indirect_address_p (XEXP (addr, 0), false) - || legitimate_indexed_address_p (XEXP (addr, 0), false)))) - ; + /* If this is an indexed address, make sure the register class can handle + indexed addresses for this mode. */ + case PLUS: + op0 = XEXP (addr, 0); + op1 = XEXP (addr, 1); + if (!base_reg_operand (op0, Pmode)) + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); - else if (GET_CODE (addr) == PLUS) + else if (int_reg_operand (op1, Pmode)) { - addr_op1 = XEXP (addr, 0); - addr_op2 = XEXP (addr, 1); - if (!REG_P (addr_op1)) - rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); - - if (TARGET_DEBUG_ADDR) + if ((addr_mask & RELOAD_REG_INDEXED) == 0) { - fprintf (stderr, "\nMove plus addr to register %s, mode = %s: ", - rs6000_reg_names[REGNO (scratch)], GET_MODE_NAME (mode)); - debug_rtx (addr_op2); + emit_insn (gen_rtx_SET (VOIDmode, scratch, addr)); + new_addr = scratch; } - rs6000_emit_move (scratch, addr_op2, Pmode); - emit_insn (gen_rtx_SET (VOIDmode, - scratch_or_premodify, - gen_rtx_PLUS (Pmode, - addr_op1, - scratch))); - addr = scratch_or_premodify; - scratch_or_premodify = scratch; } - else if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == CONST - || GET_CODE (addr) == CONST_INT || GET_CODE (addr) == LO_SUM - || REG_P (addr)) + /* Make sure the register class can handle offset addresses. */ + else if (rs6000_legitimate_offset_address_p (mode, addr, false, true)) { - if (TARGET_DEBUG_ADDR) + if ((addr_mask & RELOAD_REG_OFFSET) == 0) { - fprintf (stderr, "\nMove addr to register %s, mode = %s: ", - rs6000_reg_names[REGNO (scratch_or_premodify)], - GET_MODE_NAME (mode)); - debug_rtx (addr); + emit_insn (gen_rtx_SET (VOIDmode, scratch, addr)); + new_addr = scratch; } - - rs6000_emit_move (scratch_or_premodify, addr, Pmode); - addr = scratch_or_premodify; - scratch_or_premodify = scratch; } else @@ -16984,55 +17313,58 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p) break; - default: - rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); - } - - /* If the original address involved a pre-modify that we couldn't use the VSX - memory instruction with update, and we haven't taken care of already, - store the address in the pre-modify register and use that as the - address. */ - if (scratch_or_premodify != scratch && scratch_or_premodify != addr) - { - emit_insn (gen_rtx_SET (VOIDmode, scratch_or_premodify, addr)); - addr = scratch_or_premodify; - } + case LO_SUM: + op0 = XEXP (addr, 0); + op1 = XEXP (addr, 1); + if (!base_reg_operand (op0, Pmode)) + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); - /* If the original address involved an AND -16 and we couldn't use an ALTIVEC - memory instruction, recreate the AND now, including the clobber which is - generated by the general ANDSI3/ANDDI3 patterns for the - andi. instruction. */ - if (and_op2 != NULL_RTX) - { - if (! legitimate_indirect_address_p (addr, false)) + else if (int_reg_operand (op1, Pmode)) { - emit_insn (gen_rtx_SET (VOIDmode, scratch, addr)); - addr = scratch; + if ((addr_mask & RELOAD_REG_INDEXED) == 0) + { + emit_insn (gen_rtx_SET (VOIDmode, scratch, addr)); + new_addr = scratch; + } } - if (TARGET_DEBUG_ADDR) + /* Make sure the register class can handle offset addresses. */ + else if (legitimate_lo_sum_address_p (mode, addr, false)) { - fprintf (stderr, "\nAnd addr to register %s, mode = %s: ", - rs6000_reg_names[REGNO (scratch)], GET_MODE_NAME (mode)); - debug_rtx (and_op2); + if ((addr_mask & RELOAD_REG_OFFSET) == 0) + { + emit_insn (gen_rtx_SET (VOIDmode, scratch, addr)); + new_addr = scratch; + } } - and_rtx = gen_rtx_SET (VOIDmode, - scratch, - gen_rtx_AND (Pmode, - addr, - and_op2)); + else + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); - cc_clobber = gen_rtx_CLOBBER (CCmode, gen_rtx_SCRATCH (CCmode)); - emit_insn (gen_rtx_PARALLEL (VOIDmode, - gen_rtvec (2, and_rtx, cc_clobber))); - addr = scratch; + break; + + case SYMBOL_REF: + case CONST: + case LABEL_REF: +#if 0 + if (TARGET_TOC) + emit_insn (gen_rtx_SET (VOIDmode, scratch, + create_TOC_reference (addr, scratch))); + else +#endif + rs6000_emit_move (scratch, addr, Pmode); + + new_addr = scratch; + break; + + default: + rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); } /* Adjust the address if it changed. */ - if (addr != XEXP (mem, 0)) + if (addr != new_addr) { - mem = replace_equiv_address_nv (mem, addr); + mem = replace_equiv_address_nv (mem, new_addr); if (TARGET_DEBUG_ADDR) fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n"); } @@ -17176,44 +17508,36 @@ rs6000_instantiate_decls (void) static enum reg_class rs6000_preferred_reload_class (rtx x, enum reg_class rclass) { - enum machine_mode mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); + bool is_constant = CONSTANT_P (x); - if (TARGET_VSX && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass)) - return rclass; - - if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode) - && (rclass == ALTIVEC_REGS || rclass == VSX_REGS) - && easy_vector_constant (x, mode)) - return ALTIVEC_REGS; - - if ((CONSTANT_P (x) || GET_CODE (x) == PLUS)) + /* Do VSX tests before handling traditional floaitng point registers. */ + if (TARGET_VSX && VSX_REG_CLASS_P (rclass)) { - if (reg_class_subset_p (GENERAL_REGS, rclass)) - return GENERAL_REGS; - if (reg_class_subset_p (BASE_REGS, rclass)) - return BASE_REGS; - return NO_REGS; - } + if (is_constant) + { + /* Zero is always allowed in all VSX registers. */ + if (x == CONST0_RTX (mode)) + return rclass; - if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS) - return GENERAL_REGS; + /* If this is a vector constant that can be formed with a few Altivec + instructions, we want altivec registers. */ + if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode)) + return ALTIVEC_REGS; - /* For VSX, prefer the traditional registers for 64-bit values because we can - use the non-VSX loads. Prefer the Altivec registers if Altivec is - handling the vector operations (i.e. V16QI, V8HI, and V4SI), or if we - prefer Altivec loads.. */ - if (rclass == VSX_REGS) - { - if (MEM_P (x) && reg_addr[mode].scalar_in_vmx_p) - { - rtx addr = XEXP (x, 0); - if (rs6000_legitimate_offset_address_p (mode, addr, false, true) - || legitimate_lo_sum_address_p (mode, addr, false)) - return FLOAT_REGS; + /* Force constant to memory. */ + return NO_REGS; } - else if (GET_MODE_SIZE (mode) <= 8 && !reg_addr[mode].scalar_in_vmx_p) + + /* If this is a scalar floating point value, prefer the traditional + floating point registers so that we can use D-form (register+offset) + addressing. */ + if (GET_MODE_SIZE (mode) < 16) return FLOAT_REGS; + /* Prefer the Altivec registers if Altivec is handling the vector + operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec + loads. */ if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode) || mode == V1TImode) return ALTIVEC_REGS; @@ -17221,6 +17545,18 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass) return rclass; } + if (is_constant || GET_CODE (x) == PLUS) + { + if (reg_class_subset_p (GENERAL_REGS, rclass)) + return GENERAL_REGS; + if (reg_class_subset_p (BASE_REGS, rclass)) + return BASE_REGS; + return NO_REGS; + } + + if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS) + return GENERAL_REGS; + return rclass; } @@ -17340,30 +17676,34 @@ rs6000_secondary_reload_class (enum reg_class rclass, enum machine_mode mode, else regno = -1; + /* If we have VSX register moves, prefer moving scalar values between + Altivec registers and GPR by going via an FPR (and then via memory) + instead of reloading the secondary memory address for Altivec moves. */ + if (TARGET_VSX + && GET_MODE_SIZE (mode) < 16 + && (((rclass == GENERAL_REGS || rclass == BASE_REGS) + && (regno >= 0 && ALTIVEC_REGNO_P (regno))) + || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS) + && (regno >= 0 && INT_REGNO_P (regno))))) + return FLOAT_REGS; + /* We can place anything into GENERAL_REGS and can put GENERAL_REGS into anything. */ if (rclass == GENERAL_REGS || rclass == BASE_REGS || (regno >= 0 && INT_REGNO_P (regno))) return NO_REGS; + /* Constants, memory, and VSX registers can go into VSX registers (both the + traditional floating point and the altivec registers). */ + if (rclass == VSX_REGS + && (regno == -1 || VSX_REGNO_P (regno))) + return NO_REGS; + /* Constants, memory, and FP registers can go into FP registers. */ if ((regno == -1 || FP_REGNO_P (regno)) && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS)) return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS; - /* Memory, and FP/altivec registers can go into fp/altivec registers under - VSX. However, for scalar variables, use the traditional floating point - registers so that we can use offset+register addressing. */ - if (TARGET_VSX - && (regno == -1 || VSX_REGNO_P (regno)) - && VSX_REG_CLASS_P (rclass)) - { - if (GET_MODE_SIZE (mode) < 16) - return FLOAT_REGS; - - return NO_REGS; - } - /* Memory, and AltiVec registers can go into AltiVec registers. */ if ((regno == -1 || ALTIVEC_REGNO_P (regno)) && rclass == ALTIVEC_REGS) @@ -18481,7 +18821,7 @@ print_operand (FILE *file, rtx x, int code) fprintf (file, "0,%s", reg_names[REGNO (tmp)]); else { - if (!GET_CODE (tmp) == PLUS + if (GET_CODE (tmp) != PLUS || !REG_P (XEXP (tmp, 0)) || !REG_P (XEXP (tmp, 1))) { @@ -20000,7 +20340,7 @@ rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask) shift = gen_reg_rtx (SImode); addr = gen_lowpart (SImode, addr); emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask))); - if (WORDS_BIG_ENDIAN) + if (BYTES_BIG_ENDIAN) shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask), shift, 1, OPTAB_LIB_WIDEN); *pshift = shift; diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index f77754aa110..fff13f06000 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -370,6 +370,9 @@ ; SF/DF constraint for arithmetic on VSX registers (define_mode_attr Fv [(SF "wy") (DF "ws")]) +; SF/DF constraint for arithmetic on altivec registers +(define_mode_attr Fa [(SF "wu") (DF "wv")]) + ; s/d suffix for things like fp_addsub_s/fp_addsub_d (define_mode_attr Fs [(SF "s") (DF "d")]) @@ -515,6 +518,12 @@ (define_mode_attr idiv_ldiv [(SI "idiv") (DI "ldiv")]) + +;; Reload iterator for creating the function to allocate a base register to +;; supplement addressing modes. +(define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI + SF SD SI DF DD DI TI PTI]) + ;; Start with fixed-point load and store insns. Here we put only the more ;; complex forms. Basic data transfer is done later. @@ -5264,7 +5273,7 @@ "") (define_insn_and_split "*extendsfdf2_fpr" - [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d,ws,?ws,wv") + [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d,ws,?ws,wu") (float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m,0,wy,Z")))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" "@ @@ -5387,7 +5396,7 @@ "TARGET_<MODE>_FPR && TARGET_CMPB" "@ fcpsgn %0,%2,%1 - xscpsgn<Fvsx> %x0,%x2,%x1" + xscpsgndp %x0,%x2,%x1" [(set_attr "type" "fp")]) ;; For MIN, MAX, and conditional move, we use DEFINE_EXPAND's that involve a @@ -5659,9 +5668,9 @@ ; not be needed and also in case the insns are deleted as dead code. (define_insn_and_split "floatsi<mode>2_lfiwax" - [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d") + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Fv>") (float:SFDF (match_operand:SI 1 "nonimmediate_operand" "r"))) - (clobber (match_scratch:DI 2 "=d"))] + (clobber (match_scratch:DI 2 "=wj"))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX && <SI_CONVERT_FP> && can_create_pseudo_p ()" "#" @@ -5700,7 +5709,7 @@ (set_attr "type" "fpload")]) (define_insn_and_split "floatsi<mode>2_lfiwax_mem" - [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,<rreg2>") + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fa>") (float:SFDF (sign_extend:DI (match_operand:SI 1 "memory_operand" "Z,Z")))) @@ -5734,9 +5743,9 @@ [(set_attr "type" "fpload,fpload,mftgpr")]) (define_insn_and_split "floatunssi<mode>2_lfiwzx" - [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d") + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Fv>") (unsigned_float:SFDF (match_operand:SI 1 "nonimmediate_operand" "r"))) - (clobber (match_scratch:DI 2 "=d"))] + (clobber (match_scratch:DI 2 "=wj"))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX && <SI_CONVERT_FP>" "#" @@ -5775,7 +5784,7 @@ (set_attr "type" "fpload")]) (define_insn_and_split "floatunssi<mode>2_lfiwzx_mem" - [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,<rreg2>") + [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fa>") (unsigned_float:SFDF (zero_extend:DI (match_operand:SI 1 "memory_operand" "Z,Z")))) @@ -6088,11 +6097,13 @@ "") (define_insn "*fix_trunc<mode>di2_fctidz" - [(set (match_operand:DI 0 "gpc_reg_operand" "=d") - (fix:DI (match_operand:SFDF 1 "gpc_reg_operand" "d")))] + [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wi") + (fix:DI (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fa>")))] "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS - && TARGET_FCFID && !VECTOR_UNIT_VSX_P (<MODE>mode)" - "fctidz %0,%1" + && TARGET_FCFID" + "@ + fctidz %0,%1 + xscvdpsxds %x0,%x1" [(set_attr "type" "fp")]) (define_expand "fixuns_trunc<mode>si2" @@ -6158,11 +6169,13 @@ "") (define_insn "*fixuns_trunc<mode>di2_fctiduz" - [(set (match_operand:DI 0 "gpc_reg_operand" "=d") - (unsigned_fix:DI (match_operand:SFDF 1 "gpc_reg_operand" "d")))] + [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wi") + (unsigned_fix:DI (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fa>")))] "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS - && TARGET_FCTIDUZ && !VECTOR_UNIT_VSX_P (<MODE>mode)" - "fctiduz %0,%1" + && TARGET_FCTIDUZ" + "@ + fctiduz %0,%1 + xscvdpuxds %x0,%x1" [(set_attr "type" "fp")]) ; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ)) @@ -6170,32 +6183,37 @@ ; because the first makes it clear that operand 0 is not live ; before the instruction. (define_insn "fctiwz_<mode>" - [(set (match_operand:DI 0 "gpc_reg_operand" "=d") - (unspec:DI [(fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d"))] + [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wi") + (unspec:DI [(fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>"))] UNSPEC_FCTIWZ))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT" - "fctiwz %0,%1" + "@ + fctiwz %0,%1 + xscvdpsxws %x0,%x1" [(set_attr "type" "fp")]) (define_insn "fctiwuz_<mode>" - [(set (match_operand:DI 0 "gpc_reg_operand" "=d") + [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wi") (unspec:DI [(unsigned_fix:SI - (match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>"))] + (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>"))] UNSPEC_FCTIWUZ))] "TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT> && TARGET_FCTIWUZ" - "fctiwuz %0,%1" + "@ + fctiwuz %0,%1 + xscvdpuxws %x0,%x1" [(set_attr "type" "fp")]) ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since ;; since the friz instruction does not truncate the value if the floating ;; point value is < LONG_MIN or > LONG_MAX. (define_insn "*friz" - [(set (match_operand:DF 0 "gpc_reg_operand" "=d") - (float:DF (fix:DI (match_operand:DF 1 "gpc_reg_operand" "d"))))] + [(set (match_operand:DF 0 "gpc_reg_operand" "=d,ws") + (float:DF (fix:DI (match_operand:DF 1 "gpc_reg_operand" "d,ws"))))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_FPRND - && !VECTOR_UNIT_VSX_P (DFmode) && flag_unsafe_math_optimizations - && !flag_trapping_math && TARGET_FRIZ" - "friz %0,%1" + && flag_unsafe_math_optimizations && !flag_trapping_math && TARGET_FRIZ" + "@ + friz %0,%1 + xsrdpiz %x0,%x1" [(set_attr "type" "fp")]) ;; Since FCTIWZ doesn't sign extend the upper bits, we have to do a store and a @@ -6378,11 +6396,12 @@ "") (define_insn "*floatdidf2_fpr" - [(set (match_operand:DF 0 "gpc_reg_operand" "=d") - (float:DF (match_operand:DI 1 "gpc_reg_operand" "d")))] - "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS - && !VECTOR_UNIT_VSX_P (DFmode)" - "fcfid %0,%1" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d,ws") + (float:DF (match_operand:DI 1 "gpc_reg_operand" "d,wi")))] + "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS" + "@ + fcfid %0,%1 + xscvsxddp %x0,%x1" [(set_attr "type" "fp")]) ; Allow the combiner to merge source memory operands to the conversion so that @@ -6391,9 +6410,9 @@ ; hit. We will split after reload to avoid the trip through the GPRs (define_insn_and_split "*floatdidf2_mem" - [(set (match_operand:DF 0 "gpc_reg_operand" "=d") - (float:DF (match_operand:DI 1 "memory_operand" "m"))) - (clobber (match_scratch:DI 2 "=d"))] + [(set (match_operand:DF 0 "gpc_reg_operand" "=d,ws") + (float:DF (match_operand:DI 1 "memory_operand" "m,Z"))) + (clobber (match_scratch:DI 2 "=d,wi"))] "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS && TARGET_FCFID" "#" "&& reload_completed" @@ -6407,21 +6426,23 @@ [(set (match_operand:DF 0 "gpc_reg_operand" "") (unsigned_float:DF (match_operand:DI 1 "gpc_reg_operand" "")))] - "TARGET_HARD_FLOAT && (TARGET_FCFIDU || VECTOR_UNIT_VSX_P (DFmode))" + "TARGET_HARD_FLOAT && TARGET_FCFIDU" "") (define_insn "*floatunsdidf2_fcfidu" - [(set (match_operand:DF 0 "gpc_reg_operand" "=d") - (unsigned_float:DF (match_operand:DI 1 "gpc_reg_operand" "d")))] - "TARGET_HARD_FLOAT && TARGET_FCFIDU && !VECTOR_UNIT_VSX_P (DFmode)" - "fcfidu %0,%1" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d,ws") + (unsigned_float:DF (match_operand:DI 1 "gpc_reg_operand" "d,wi")))] + "TARGET_HARD_FLOAT && TARGET_FCFIDU" + "@ + fcfidu %0,%1 + xscvuxddp %x0,%x1" [(set_attr "type" "fp") (set_attr "length" "4")]) (define_insn_and_split "*floatunsdidf2_mem" - [(set (match_operand:DF 0 "gpc_reg_operand" "=d") - (unsigned_float:DF (match_operand:DI 1 "memory_operand" "m"))) - (clobber (match_scratch:DI 2 "=d"))] + [(set (match_operand:DF 0 "gpc_reg_operand" "=d,ws") + (unsigned_float:DF (match_operand:DI 1 "memory_operand" "m,Z"))) + (clobber (match_scratch:DI 2 "=d,wi"))] "TARGET_HARD_FLOAT && (TARGET_FCFIDU || VECTOR_UNIT_VSX_P (DFmode))" "#" "&& reload_completed" @@ -6454,17 +6475,19 @@ }") (define_insn "floatdisf2_fcfids" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (float:SF (match_operand:DI 1 "gpc_reg_operand" "d")))] + [(set (match_operand:SF 0 "gpc_reg_operand" "=f,wy") + (float:SF (match_operand:DI 1 "gpc_reg_operand" "d,wi")))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FCFIDS" - "fcfids %0,%1" + "@ + fcfids %0,%1 + xscvsxdsp %x0,%x1" [(set_attr "type" "fp")]) (define_insn_and_split "*floatdisf2_mem" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (float:SF (match_operand:DI 1 "memory_operand" "m"))) - (clobber (match_scratch:DI 2 "=f"))] + [(set (match_operand:SF 0 "gpc_reg_operand" "=f,wy,wy") + (float:SF (match_operand:DI 1 "memory_operand" "m,m,Z"))) + (clobber (match_scratch:DI 2 "=d,d,wi"))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FCFIDS" "#" @@ -6486,7 +6509,8 @@ [(set (match_operand:SF 0 "gpc_reg_operand" "=f") (float:SF (match_operand:DI 1 "gpc_reg_operand" "d"))) (clobber (match_scratch:DF 2 "=d"))] - "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT" + "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && !TARGET_FCFIDS" "#" "&& reload_completed" [(set (match_dup 2) @@ -6522,7 +6546,8 @@ (label_ref (match_operand:DI 2 "" "")) (pc))) (set (match_dup 0) (match_dup 1))] - "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT" + "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT + && !TARGET_FCFIDS" " { operands[3] = gen_reg_rtx (DImode); @@ -6537,17 +6562,19 @@ "") (define_insn "floatunsdisf2_fcfidus" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (unsigned_float:SF (match_operand:DI 1 "gpc_reg_operand" "d")))] + [(set (match_operand:SF 0 "gpc_reg_operand" "=f,wu") + (unsigned_float:SF (match_operand:DI 1 "gpc_reg_operand" "d,wi")))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FCFIDUS" - "fcfidus %0,%1" + "@ + fcfidus %0,%1 + xscvuxdsp %x0,%x1" [(set_attr "type" "fp")]) (define_insn_and_split "*floatunsdisf2_mem" - [(set (match_operand:SF 0 "gpc_reg_operand" "=f") - (unsigned_float:SF (match_operand:DI 1 "memory_operand" "m"))) - (clobber (match_scratch:DI 2 "=f"))] + [(set (match_operand:SF 0 "gpc_reg_operand" "=f,wy,wy") + (unsigned_float:SF (match_operand:DI 1 "memory_operand" "m,m,Z"))) + (clobber (match_scratch:DI 2 "=d,d,wi"))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FCFIDUS" "#" @@ -9207,7 +9234,7 @@ (define_insn "mov<mode>_hardfloat" [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,<f32_vsx>,<f32_vsx>,<f32_lr>,<f32_sm>,<f32_av>,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r") - (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,<f32_vsx>,j,<f32_lm>,<f32_sr>,Z,<f32_av>,r,<f32_dm>,r, h, 0, G,Fn"))] + (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,<f32_vsx>,j,<f32_lm>,<f32_sr>,Z,<f32_av>,r,<f32_dm>,r,h,0,G,Fn"))] "(gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode)) && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)" @@ -9611,8 +9638,8 @@ ;; problematical. Don't allow direct move for this case. (define_insn_and_split "*mov<mode>_64bit_dm" - [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,Y,r,r,r,wm") - (match_operand:FMOVE128 1 "input_operand" "d,m,d,r,YGHF,r,wm,r"))] + [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r,r,wm") + (match_operand:FMOVE128 1 "input_operand" "d,m,d,j,r,jYGHF,r,wm,r"))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64 && (<MODE>mode != TDmode || WORDS_BIG_ENDIAN) && (gpc_reg_operand (operands[0], <MODE>mode) @@ -9621,11 +9648,11 @@ "&& reload_completed" [(pc)] { rs6000_split_multireg_move (operands[0], operands[1]); DONE; } - [(set_attr "length" "8,8,8,12,12,8,8,8")]) + [(set_attr "length" "8,8,8,8,12,12,8,8,8")]) (define_insn_and_split "*movtd_64bit_nodm" - [(set (match_operand:TD 0 "nonimmediate_operand" "=m,d,d,Y,r,r") - (match_operand:TD 1 "input_operand" "d,m,d,r,YGHF,r"))] + [(set (match_operand:TD 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r") + (match_operand:TD 1 "input_operand" "d,m,d,j,r,jYGHF,r"))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64 && !WORDS_BIG_ENDIAN && (gpc_reg_operand (operands[0], TDmode) || gpc_reg_operand (operands[1], TDmode))" @@ -9633,11 +9660,11 @@ "&& reload_completed" [(pc)] { rs6000_split_multireg_move (operands[0], operands[1]); DONE; } - [(set_attr "length" "8,8,8,12,12,8")]) + [(set_attr "length" "8,8,8,8,12,12,8")]) (define_insn_and_split "*mov<mode>_32bit" - [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,Y,r,r") - (match_operand:FMOVE128 1 "input_operand" "d,m,d,r,YGHF,r"))] + [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r") + (match_operand:FMOVE128 1 "input_operand" "d,m,d,j,r,jYGHF,r"))] "TARGET_HARD_FLOAT && TARGET_FPRS && !TARGET_POWERPC64 && (gpc_reg_operand (operands[0], <MODE>mode) || gpc_reg_operand (operands[1], <MODE>mode))" @@ -9645,7 +9672,7 @@ "&& reload_completed" [(pc)] { rs6000_split_multireg_move (operands[0], operands[1]); DONE; } - [(set_attr "length" "8,8,8,20,20,16")]) + [(set_attr "length" "8,8,8,8,20,20,16")]) (define_insn_and_split "*mov<mode>_softfloat" [(set (match_operand:FMOVE128 0 "rs6000_nonimmediate_operand" "=Y,r,r") @@ -9659,6 +9686,21 @@ { rs6000_split_multireg_move (operands[0], operands[1]); DONE; } [(set_attr "length" "20,20,16")]) +;; If we are using -ffast-math, easy_fp_constant assumes all constants are +;; 'easy' in order to allow for reciprocal estimation. Make sure the constant +;; is in the constant pool before reload occurs. This simplifies accessing +;; scalars in the traditional Altivec registers. + +(define_split + [(set (match_operand:SFDF 0 "register_operand" "") + (match_operand:SFDF 1 "memory_fp_constant" ""))] + "TARGET_<MODE>_FPR && flag_unsafe_math_optimizations + && !reload_in_progress && !reload_completed && !lra_in_progress" + [(set (match_dup 0) (match_dup 2))] +{ + operands[2] = validize_mem (force_const_mem (<MODE>mode, operands[1])); +}) + (define_expand "extenddftf2" [(set (match_operand:TF 0 "nonimmediate_operand" "") (float_extend:TF (match_operand:DF 1 "input_operand" "")))] @@ -9964,6 +10006,50 @@ }) +;; Reload patterns for various types using the vector registers. We may need +;; an additional base register to convert the reg+offset addressing to reg+reg +;; for vector registers and reg+reg or (reg+reg)&(-16) addressing to just an +;; index register for gpr registers. +(define_expand "reload_<RELOAD:mode>_<P:mptrsize>_store" + [(parallel [(match_operand:RELOAD 0 "memory_operand" "m") + (match_operand:RELOAD 1 "gpc_reg_operand" "wa") + (match_operand:P 2 "register_operand" "=b")])] + "<P:tptrsize>" +{ + rs6000_secondary_reload_inner (operands[1], operands[0], operands[2], true); + DONE; +}) + +(define_expand "reload_<RELOAD:mode>_<P:mptrsize>_load" + [(parallel [(match_operand:RELOAD 0 "gpc_reg_operand" "wa") + (match_operand:RELOAD 1 "memory_operand" "m") + (match_operand:P 2 "register_operand" "=b")])] + "<P:tptrsize>" +{ + rs6000_secondary_reload_inner (operands[0], operands[1], operands[2], false); + DONE; +}) + + +;; Reload sometimes tries to move the address to a GPR, and can generate +;; invalid RTL for addresses involving AND -16. Allow addresses involving +;; reg+reg, reg+small constant, or just reg, all wrapped in an AND -16. + +(define_insn_and_split "*vec_reload_and_plus_<mptrsize>" + [(set (match_operand:P 0 "gpc_reg_operand" "=b") + (and:P (plus:P (match_operand:P 1 "gpc_reg_operand" "r") + (match_operand:P 2 "reg_or_cint_operand" "rI")) + (const_int -16)))] + "TARGET_ALTIVEC && (reload_in_progress || reload_completed)" + "#" + "&& reload_completed" + [(set (match_dup 0) + (plus:P (match_dup 1) + (match_dup 2))) + (set (match_dup 0) + (and:P (match_dup 0) + (const_int -16)))]) + ;; Power8 merge instructions to allow direct move to/from floating point ;; registers in 32-bit mode. We use TF mode to get two registers to move the ;; individual 32-bit parts across. Subreg doesn't work too well on the TF @@ -11278,12 +11364,15 @@ ;; sequences, using get_attr_length here will smash the operands ;; array. Neither is there an early_cobbler_p predicate. ;; Disallow subregs for E500 so we don't munge frob_di_df_2. +;; Also this optimization interferes with scalars going into +;; altivec registers (the code does reloading through the FPRs). (define_peephole2 [(set (match_operand:DF 0 "gpc_reg_operand" "") (match_operand:DF 1 "any_operand" "")) (set (match_operand:DF 2 "gpc_reg_operand" "") (match_dup 0))] "!(TARGET_E500_DOUBLE && GET_CODE (operands[2]) == SUBREG) + && !TARGET_UPPER_REGS_DF && peep2_reg_dead_p (2, operands[0])" [(set (match_dup 2) (match_dup 1))]) @@ -11292,7 +11381,8 @@ (match_operand:SF 1 "any_operand" "")) (set (match_operand:SF 2 "gpc_reg_operand" "") (match_dup 0))] - "peep2_reg_dead_p (2, operands[0])" + "!TARGET_UPPER_REGS_SF + && peep2_reg_dead_p (2, operands[0])" [(set (match_dup 2) (match_dup 1))]) diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 4d0d5e73db1..eb3e3237935 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -582,12 +582,16 @@ Target Report Var(rs6000_compat_align_parm) Init(0) Save Generate aggregate parameter passing code with at most 64-bit alignment. mupper-regs-df -Target Undocumented Mask(UPPER_REGS_DF) Var(rs6000_isa_flags) +Target Report Mask(UPPER_REGS_DF) Var(rs6000_isa_flags) Allow double variables in upper registers with -mcpu=power7 or -mvsx mupper-regs-sf -Target Undocumented Mask(UPPER_REGS_SF) Var(rs6000_isa_flags) -Allow float variables in upper registers with -mcpu=power8 or -mp8-vector +Target Report Mask(UPPER_REGS_SF) Var(rs6000_isa_flags) +Allow float variables in upper registers with -mcpu=power8 or -mpower8-vector + +mupper-regs +Target Report Var(TARGET_UPPER_REGS) Init(-1) Save +Allow float/double variables in upper registers if cpu allows it moptimize-swaps Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index edbb83161d1..e0bf6c4f5e4 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -53,10 +53,6 @@ ;; Vector modes for 64-bit base types (define_mode_iterator VEC_64 [V2DI V2DF]) -;; Vector reload iterator -(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF V1TI - SF SD SI DF DD DI TI]) - ;; Base type from vector mode (define_mode_attr VEC_base [(V16QI "QI") (V8HI "HI") @@ -183,66 +179,6 @@ } }") - - -;; Reload patterns for vector operations. We may need an additional base -;; register to convert the reg+offset addressing to reg+reg for vector -;; registers and reg+reg or (reg+reg)&(-16) addressing to just an index -;; register for gpr registers. -(define_expand "reload_<VEC_R:mode>_<P:mptrsize>_store" - [(parallel [(match_operand:VEC_R 0 "memory_operand" "m") - (match_operand:VEC_R 1 "gpc_reg_operand" "r") - (match_operand:P 2 "register_operand" "=&b")])] - "<P:tptrsize>" -{ - rs6000_secondary_reload_inner (operands[1], operands[0], operands[2], true); - DONE; -}) - -(define_expand "reload_<VEC_R:mode>_<P:mptrsize>_load" - [(parallel [(match_operand:VEC_R 0 "gpc_reg_operand" "=&r") - (match_operand:VEC_R 1 "memory_operand" "m") - (match_operand:P 2 "register_operand" "=&b")])] - "<P:tptrsize>" -{ - rs6000_secondary_reload_inner (operands[0], operands[1], operands[2], false); - DONE; -}) - -;; Reload sometimes tries to move the address to a GPR, and can generate -;; invalid RTL for addresses involving AND -16. Allow addresses involving -;; reg+reg, reg+small constant, or just reg, all wrapped in an AND -16. - -(define_insn_and_split "*vec_reload_and_plus_<mptrsize>" - [(set (match_operand:P 0 "gpc_reg_operand" "=b") - (and:P (plus:P (match_operand:P 1 "gpc_reg_operand" "r") - (match_operand:P 2 "reg_or_cint_operand" "rI")) - (const_int -16)))] - "(TARGET_ALTIVEC || TARGET_VSX) && (reload_in_progress || reload_completed)" - "#" - "&& reload_completed" - [(set (match_dup 0) - (plus:P (match_dup 1) - (match_dup 2))) - (parallel [(set (match_dup 0) - (and:P (match_dup 0) - (const_int -16))) - (clobber:CC (scratch:CC))])]) - -;; The normal ANDSI3/ANDDI3 won't match if reload decides to move an AND -16 -;; address to a register because there is no clobber of a (scratch), so we add -;; it here. -(define_insn_and_split "*vec_reload_and_reg_<mptrsize>" - [(set (match_operand:P 0 "gpc_reg_operand" "=b") - (and:P (match_operand:P 1 "gpc_reg_operand" "r") - (const_int -16)))] - "(TARGET_ALTIVEC || TARGET_VSX) && (reload_in_progress || reload_completed)" - "#" - "&& reload_completed" - [(parallel [(set (match_dup 0) - (and:P (match_dup 1) - (const_int -16))) - (clobber:CC (scratch:CC))])]) ;; Generic floating point vector arithmetic support (define_expand "add<mode>3" diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 9aaf0642804..ae7db4012b4 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -1197,24 +1197,24 @@ ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX. ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md. (define_insn "vsx_float<VSi><mode>2" - [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?<VSa>") - (float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))] + [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>") + (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "x<VSv>cvsx<VSc><VSs> %x0,%x1" + "xvcvsx<VSc><VSs> %x0,%x1" [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_simple>")]) (define_insn "vsx_floatuns<VSi><mode>2" - [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?<VSa>") - (unsigned_float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))] + [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>") + (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))] "VECTOR_UNIT_VSX_P (<MODE>mode)" - "x<VSv>cvux<VSc><VSs> %x0,%x1" + "xvcvux<VSc><VSs> %x0,%x1" [(set_attr "type" "<VStype_simple>") (set_attr "fp_type" "<VSfptype_simple>")]) (define_insn "vsx_fix_trunc<mode><VSi>2" [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>") - (fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,<VSa>")))] + (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))] "VECTOR_UNIT_VSX_P (<MODE>mode)" "x<VSv>cv<VSs>sx<VSc>s %x0,%x1" [(set_attr "type" "<VStype_simple>") @@ -1222,7 +1222,7 @@ (define_insn "vsx_fixuns_trunc<mode><VSi>2" [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>") - (unsigned_fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,<VSa>")))] + (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))] "VECTOR_UNIT_VSX_P (<MODE>mode)" "x<VSv>cv<VSs>ux<VSc>s %x0,%x1" [(set_attr "type" "<VStype_simple>") @@ -1525,19 +1525,19 @@ [(set_attr "type" "vecdouble")]) ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since -;; since the xsrdpiz instruction does not truncate the value if the floating +;; since the xvrdpiz instruction does not truncate the value if the floating ;; point value is < LONG_MIN or > LONG_MAX. -(define_insn "*vsx_float_fix_<mode>2" - [(set (match_operand:VSX_DF 0 "vsx_register_operand" "=<VSr>,?<VSa>") - (float:VSX_DF - (fix:<VSI> - (match_operand:VSX_DF 1 "vsx_register_operand" "<VSr>,?<VSa>"))))] +(define_insn "*vsx_float_fix_v2df2" + [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") + (float:V2DF + (fix:V2DI + (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT - && VECTOR_UNIT_VSX_P (<MODE>mode) && flag_unsafe_math_optimizations + && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations && !flag_trapping_math && TARGET_FRIZ" - "x<VSv>r<VSs>iz %x0,%x1" - [(set_attr "type" "<VStype_simple>") - (set_attr "fp_type" "<VSfptype_simple>")]) + "xvrdpiz %x0,%x1" + [(set_attr "type" "vecdouble") + (set_attr "fp_type" "fp_addsub_d")]) ;; Permute operations diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 7bb83ede7e5..871a4175446 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -923,7 +923,9 @@ See RS/6000 and PowerPC Options. -mcrypto -mno-crypto -mdirect-move -mno-direct-move @gol -mquad-memory -mno-quad-memory @gol -mquad-memory-atomic -mno-quad-memory-atomic @gol --mcompat-align-parm -mno-compat-align-parm} +-mcompat-align-parm -mno-compat-align-parm @gol +-mupper-regs-df -mno-upper-regs-df -mupper-regs-sf -mno-upper-regs-sf @gol +-mupper-regs -mno-upper-regs} @emph{RX Options} @gccoptlist{-m64bit-doubles -m32bit-doubles -fpu -nofpu@gol @@ -19169,6 +19171,39 @@ Generate code that uses (does not use) the atomic quad word memory instructions. The @option{-mquad-memory-atomic} option requires use of 64-bit mode. +@item -mupper-regs-df +@itemx -mno-upper-regs-df +@opindex mupper-regs-df +@opindex mno-upper-regs-df +Generate code that uses (does not use) the scalar double precision +instructions that target all 64 registers in the vector/scalar +floating point register set that were added in version 2.06 of the +PowerPC ISA. The @option{-mupper-regs-df} turned on by default if you +use either of the @option{-mcpu=power7}, @option{-mcpu=power8}, or +@option{-mvsx} options. + +@item -mupper-regs-sf +@itemx -mno-upper-regs-sf +@opindex mupper-regs-sf +@opindex mno-upper-regs-sf +Generate code that uses (does not use) the scalar single precision +instructions that target all 64 registers in the vector/scalar +floating point register set that were added in version 2.07 of the +PowerPC ISA. The @option{-mupper-regs-sf} turned on by default if you +use either of the @option{-mcpu=power8}, or @option{-mpower8-vector} +options. + +@item -mupper-regs +@itemx -mno-upper-regs +@opindex mupper-regs +@opindex mno-upper-regs +Generate code that uses (does not use) the scalar +instructions that target all 64 registers in the vector/scalar +floating point register set, depending on the model of the machine. + +If the @option{-mno-upper-regs} option was used, it will turn off both +@option{-mupper-regs-sf} and @option{-mupper-regs-df} options. + @item -mfloat-gprs=@var{yes/single/double/no} @itemx -mfloat-gprs @opindex mfloat-gprs diff --git a/gcc/testsuite/ChangeLog.meissner b/gcc/testsuite/ChangeLog.meissner new file mode 100644 index 00000000000..a79d0da9920 --- /dev/null +++ b/gcc/testsuite/ChangeLog.meissner @@ -0,0 +1,46 @@ +2015-02-06 Michael Meissner <meissner@linux.vnet.ibm.com> + + Merge up to ibm/gcc-4_9-branch, subversion id 220484. + +2014-12-11 Michael Meissner <meissner@linux.vnet.ibm.com> + + Merge up to ibm/gcc-4_9-branch, subversion id 218646. + +2014-12-02 Michael Meissner <meissner@linux.vnet.ibm.com> + + Clone branch from at 8.0 branch, subversion id 218285 (FSF + subversion id 217046) + +[gcc/testsuite, patch #3] +2014-11-11 Michael Meissner <meissner@linux.vnet.ibm.com> + + * gcc.target/powerpc/ppc-fpconv-1.c: Adjust for -mupper-regs-df + changes. + * gcc.target/powerpc/ppc-fpconv-2.c: Likewise. + * gcc.target/powerpc/ppc-fpconv-3.c: Likewise. + * gcc.target/powerpc/ppc-fpconv-4.c: Likewise. + * gcc.target/powerpc/ppc-fpconv-5.c: Likewise. + * gcc.target/powerpc/ppc-fpconv-6.c: Likewise. + * gcc.target/powerpc/ppc-fpconv-7.c: Likewise. + * gcc.target/powerpc/ppc-fpconv-8.c: Likewise. + * gcc.target/powerpc/ppc-fpconv-9.c: Likewise. + * gcc.target/powerpc/ppc-fpconv-10.c: Likewise. + * gcc.target/powerpc/ppc-round.c: Likewise. + +[gcc/testsuite, patch #7] +2014-11-11 Michael Meissner <meissner@linux.vnet.ibm.com> + + * gcc.target/powerpc/p8vector-ldst.c: Rewrite to use 40 live + floating point variables instead of using asm to test allocating + values to the Altivec registers. + + * gcc.target/powerpc/upper-regs-sf.c: New -mupper-regs-sf and + -mupper-regs-df tests. + * gcc.target/powerpc/upper-regs-df.c: Likewise. + +[gcc/testsuite, pr64019 fix] +2014-12-01 Michael Meissner <meissner@linux.vnet.ibm.com> + + PR target/64019 + * gcc.target/powerpc/pr64019.c: New file. + diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c b/gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c index 33f19991f76..5da7388097b 100644 --- a/gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c +++ b/gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c @@ -1,42 +1,624 @@ -/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ /* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ /* { dg-options "-mcpu=power8 -O2 -mupper-regs-df -mupper-regs-sf" } */ -float load_sf (float *p) +float +load_store_sf (unsigned long num, + const float *from_ptr, + float *to_ptr, + const unsigned long *in_mask_ptr, + const unsigned long *out_mask_ptr) { - float f = *p; - __asm__ ("# reg %x0" : "+v" (f)); - return f; -} + float value00 = 0.0f; + float value01 = 0.0f; + float value02 = 0.0f; + float value03 = 0.0f; + float value04 = 0.0f; + float value05 = 0.0f; + float value06 = 0.0f; + float value07 = 0.0f; + float value08 = 0.0f; + float value09 = 0.0f; + float value10 = 0.0f; + float value11 = 0.0f; + float value12 = 0.0f; + float value13 = 0.0f; + float value14 = 0.0f; + float value15 = 0.0f; + float value16 = 0.0f; + float value17 = 0.0f; + float value18 = 0.0f; + float value19 = 0.0f; + float value20 = 0.0f; + float value21 = 0.0f; + float value22 = 0.0f; + float value23 = 0.0f; + float value24 = 0.0f; + float value25 = 0.0f; + float value26 = 0.0f; + float value27 = 0.0f; + float value28 = 0.0f; + float value29 = 0.0f; + float value30 = 0.0f; + float value31 = 0.0f; + float value32 = 0.0f; + float value33 = 0.0f; + float value34 = 0.0f; + float value35 = 0.0f; + float value36 = 0.0f; + float value37 = 0.0f; + float value38 = 0.0f; + float value39 = 0.0f; + unsigned long in_mask; + unsigned long out_mask; + unsigned long i; -double load_df (double *p) -{ - double d = *p; - __asm__ ("# reg %x0" : "+v" (d)); - return d; -} + for (i = 0; i < num; i++) + { + in_mask = *in_mask_ptr++; + if ((in_mask & (1L << 0)) != 0L) + value00 = *from_ptr++; -double load_dfsf (float *p) -{ - double d = (double) *p; - __asm__ ("# reg %x0" : "+v" (d)); - return d; -} + if ((in_mask & (1L << 1)) != 0L) + value01 = *from_ptr++; -void store_sf (float *p, float f) -{ - __asm__ ("# reg %x0" : "+v" (f)); - *p = f; + if ((in_mask & (1L << 2)) != 0L) + value02 = *from_ptr++; + + if ((in_mask & (1L << 3)) != 0L) + value03 = *from_ptr++; + + if ((in_mask & (1L << 4)) != 0L) + value04 = *from_ptr++; + + if ((in_mask & (1L << 5)) != 0L) + value05 = *from_ptr++; + + if ((in_mask & (1L << 6)) != 0L) + value06 = *from_ptr++; + + if ((in_mask & (1L << 7)) != 0L) + value07 = *from_ptr++; + + if ((in_mask & (1L << 8)) != 0L) + value08 = *from_ptr++; + + if ((in_mask & (1L << 9)) != 0L) + value09 = *from_ptr++; + + if ((in_mask & (1L << 10)) != 0L) + value10 = *from_ptr++; + + if ((in_mask & (1L << 11)) != 0L) + value11 = *from_ptr++; + + if ((in_mask & (1L << 12)) != 0L) + value12 = *from_ptr++; + + if ((in_mask & (1L << 13)) != 0L) + value13 = *from_ptr++; + + if ((in_mask & (1L << 14)) != 0L) + value14 = *from_ptr++; + + if ((in_mask & (1L << 15)) != 0L) + value15 = *from_ptr++; + + if ((in_mask & (1L << 16)) != 0L) + value16 = *from_ptr++; + + if ((in_mask & (1L << 17)) != 0L) + value17 = *from_ptr++; + + if ((in_mask & (1L << 18)) != 0L) + value18 = *from_ptr++; + + if ((in_mask & (1L << 19)) != 0L) + value19 = *from_ptr++; + + if ((in_mask & (1L << 20)) != 0L) + value20 = *from_ptr++; + + if ((in_mask & (1L << 21)) != 0L) + value21 = *from_ptr++; + + if ((in_mask & (1L << 22)) != 0L) + value22 = *from_ptr++; + + if ((in_mask & (1L << 23)) != 0L) + value23 = *from_ptr++; + + if ((in_mask & (1L << 24)) != 0L) + value24 = *from_ptr++; + + if ((in_mask & (1L << 25)) != 0L) + value25 = *from_ptr++; + + if ((in_mask & (1L << 26)) != 0L) + value26 = *from_ptr++; + + if ((in_mask & (1L << 27)) != 0L) + value27 = *from_ptr++; + + if ((in_mask & (1L << 28)) != 0L) + value28 = *from_ptr++; + + if ((in_mask & (1L << 29)) != 0L) + value29 = *from_ptr++; + + if ((in_mask & (1L << 30)) != 0L) + value30 = *from_ptr++; + + if ((in_mask & (1L << 31)) != 0L) + value31 = *from_ptr++; + + if ((in_mask & (1L << 32)) != 0L) + value32 = *from_ptr++; + + if ((in_mask & (1L << 33)) != 0L) + value33 = *from_ptr++; + + if ((in_mask & (1L << 34)) != 0L) + value34 = *from_ptr++; + + if ((in_mask & (1L << 35)) != 0L) + value35 = *from_ptr++; + + if ((in_mask & (1L << 36)) != 0L) + value36 = *from_ptr++; + + if ((in_mask & (1L << 37)) != 0L) + value37 = *from_ptr++; + + if ((in_mask & (1L << 38)) != 0L) + value38 = *from_ptr++; + + if ((in_mask & (1L << 39)) != 0L) + value39 = *from_ptr++; + + out_mask = *out_mask_ptr++; + if ((out_mask & (1L << 0)) != 0L) + *to_ptr++ = value00; + + if ((out_mask & (1L << 1)) != 0L) + *to_ptr++ = value01; + + if ((out_mask & (1L << 2)) != 0L) + *to_ptr++ = value02; + + if ((out_mask & (1L << 3)) != 0L) + *to_ptr++ = value03; + + if ((out_mask & (1L << 4)) != 0L) + *to_ptr++ = value04; + + if ((out_mask & (1L << 5)) != 0L) + *to_ptr++ = value05; + + if ((out_mask & (1L << 6)) != 0L) + *to_ptr++ = value06; + + if ((out_mask & (1L << 7)) != 0L) + *to_ptr++ = value07; + + if ((out_mask & (1L << 8)) != 0L) + *to_ptr++ = value08; + + if ((out_mask & (1L << 9)) != 0L) + *to_ptr++ = value09; + + if ((out_mask & (1L << 10)) != 0L) + *to_ptr++ = value10; + + if ((out_mask & (1L << 11)) != 0L) + *to_ptr++ = value11; + + if ((out_mask & (1L << 12)) != 0L) + *to_ptr++ = value12; + + if ((out_mask & (1L << 13)) != 0L) + *to_ptr++ = value13; + + if ((out_mask & (1L << 14)) != 0L) + *to_ptr++ = value14; + + if ((out_mask & (1L << 15)) != 0L) + *to_ptr++ = value15; + + if ((out_mask & (1L << 16)) != 0L) + *to_ptr++ = value16; + + if ((out_mask & (1L << 17)) != 0L) + *to_ptr++ = value17; + + if ((out_mask & (1L << 18)) != 0L) + *to_ptr++ = value18; + + if ((out_mask & (1L << 19)) != 0L) + *to_ptr++ = value19; + + if ((out_mask & (1L << 20)) != 0L) + *to_ptr++ = value20; + + if ((out_mask & (1L << 21)) != 0L) + *to_ptr++ = value21; + + if ((out_mask & (1L << 22)) != 0L) + *to_ptr++ = value22; + + if ((out_mask & (1L << 23)) != 0L) + *to_ptr++ = value23; + + if ((out_mask & (1L << 24)) != 0L) + *to_ptr++ = value24; + + if ((out_mask & (1L << 25)) != 0L) + *to_ptr++ = value25; + + if ((out_mask & (1L << 26)) != 0L) + *to_ptr++ = value26; + + if ((out_mask & (1L << 27)) != 0L) + *to_ptr++ = value27; + + if ((out_mask & (1L << 28)) != 0L) + *to_ptr++ = value28; + + if ((out_mask & (1L << 29)) != 0L) + *to_ptr++ = value29; + + if ((out_mask & (1L << 30)) != 0L) + *to_ptr++ = value30; + + if ((out_mask & (1L << 31)) != 0L) + *to_ptr++ = value31; + + if ((out_mask & (1L << 32)) != 0L) + *to_ptr++ = value32; + + if ((out_mask & (1L << 33)) != 0L) + *to_ptr++ = value33; + + if ((out_mask & (1L << 34)) != 0L) + *to_ptr++ = value34; + + if ((out_mask & (1L << 35)) != 0L) + *to_ptr++ = value35; + + if ((out_mask & (1L << 36)) != 0L) + *to_ptr++ = value36; + + if ((out_mask & (1L << 37)) != 0L) + *to_ptr++ = value37; + + if ((out_mask & (1L << 38)) != 0L) + *to_ptr++ = value38; + + if ((out_mask & (1L << 39)) != 0L) + *to_ptr++ = value39; + } + + return ( value00 + value01 + value02 + value03 + value04 + + value05 + value06 + value07 + value08 + value09 + + value10 + value11 + value12 + value13 + value14 + + value15 + value16 + value17 + value18 + value19 + + value20 + value21 + value22 + value23 + value24 + + value25 + value26 + value27 + value28 + value29 + + value30 + value31 + value32 + value33 + value34 + + value35 + value36 + value37 + value38 + value39); } -void store_df (double *p, double d) +double +load_store_df (unsigned long num, + const double *from_ptr, + double *to_ptr, + const unsigned long *in_mask_ptr, + const unsigned long *out_mask_ptr) { - __asm__ ("# reg %x0" : "+v" (d)); - *p = d; + double value00 = 0.0; + double value01 = 0.0; + double value02 = 0.0; + double value03 = 0.0; + double value04 = 0.0; + double value05 = 0.0; + double value06 = 0.0; + double value07 = 0.0; + double value08 = 0.0; + double value09 = 0.0; + double value10 = 0.0; + double value11 = 0.0; + double value12 = 0.0; + double value13 = 0.0; + double value14 = 0.0; + double value15 = 0.0; + double value16 = 0.0; + double value17 = 0.0; + double value18 = 0.0; + double value19 = 0.0; + double value20 = 0.0; + double value21 = 0.0; + double value22 = 0.0; + double value23 = 0.0; + double value24 = 0.0; + double value25 = 0.0; + double value26 = 0.0; + double value27 = 0.0; + double value28 = 0.0; + double value29 = 0.0; + double value30 = 0.0; + double value31 = 0.0; + double value32 = 0.0; + double value33 = 0.0; + double value34 = 0.0; + double value35 = 0.0; + double value36 = 0.0; + double value37 = 0.0; + double value38 = 0.0; + double value39 = 0.0; + unsigned long in_mask; + unsigned long out_mask; + unsigned long i; + + for (i = 0; i < num; i++) + { + in_mask = *in_mask_ptr++; + if ((in_mask & (1L << 0)) != 0L) + value00 = *from_ptr++; + + if ((in_mask & (1L << 1)) != 0L) + value01 = *from_ptr++; + + if ((in_mask & (1L << 2)) != 0L) + value02 = *from_ptr++; + + if ((in_mask & (1L << 3)) != 0L) + value03 = *from_ptr++; + + if ((in_mask & (1L << 4)) != 0L) + value04 = *from_ptr++; + + if ((in_mask & (1L << 5)) != 0L) + value05 = *from_ptr++; + + if ((in_mask & (1L << 6)) != 0L) + value06 = *from_ptr++; + + if ((in_mask & (1L << 7)) != 0L) + value07 = *from_ptr++; + + if ((in_mask & (1L << 8)) != 0L) + value08 = *from_ptr++; + + if ((in_mask & (1L << 9)) != 0L) + value09 = *from_ptr++; + + if ((in_mask & (1L << 10)) != 0L) + value10 = *from_ptr++; + + if ((in_mask & (1L << 11)) != 0L) + value11 = *from_ptr++; + + if ((in_mask & (1L << 12)) != 0L) + value12 = *from_ptr++; + + if ((in_mask & (1L << 13)) != 0L) + value13 = *from_ptr++; + + if ((in_mask & (1L << 14)) != 0L) + value14 = *from_ptr++; + + if ((in_mask & (1L << 15)) != 0L) + value15 = *from_ptr++; + + if ((in_mask & (1L << 16)) != 0L) + value16 = *from_ptr++; + + if ((in_mask & (1L << 17)) != 0L) + value17 = *from_ptr++; + + if ((in_mask & (1L << 18)) != 0L) + value18 = *from_ptr++; + + if ((in_mask & (1L << 19)) != 0L) + value19 = *from_ptr++; + + if ((in_mask & (1L << 20)) != 0L) + value20 = *from_ptr++; + + if ((in_mask & (1L << 21)) != 0L) + value21 = *from_ptr++; + + if ((in_mask & (1L << 22)) != 0L) + value22 = *from_ptr++; + + if ((in_mask & (1L << 23)) != 0L) + value23 = *from_ptr++; + + if ((in_mask & (1L << 24)) != 0L) + value24 = *from_ptr++; + + if ((in_mask & (1L << 25)) != 0L) + value25 = *from_ptr++; + + if ((in_mask & (1L << 26)) != 0L) + value26 = *from_ptr++; + + if ((in_mask & (1L << 27)) != 0L) + value27 = *from_ptr++; + + if ((in_mask & (1L << 28)) != 0L) + value28 = *from_ptr++; + + if ((in_mask & (1L << 29)) != 0L) + value29 = *from_ptr++; + + if ((in_mask & (1L << 30)) != 0L) + value30 = *from_ptr++; + + if ((in_mask & (1L << 31)) != 0L) + value31 = *from_ptr++; + + if ((in_mask & (1L << 32)) != 0L) + value32 = *from_ptr++; + + if ((in_mask & (1L << 33)) != 0L) + value33 = *from_ptr++; + + if ((in_mask & (1L << 34)) != 0L) + value34 = *from_ptr++; + + if ((in_mask & (1L << 35)) != 0L) + value35 = *from_ptr++; + + if ((in_mask & (1L << 36)) != 0L) + value36 = *from_ptr++; + + if ((in_mask & (1L << 37)) != 0L) + value37 = *from_ptr++; + + if ((in_mask & (1L << 38)) != 0L) + value38 = *from_ptr++; + + if ((in_mask & (1L << 39)) != 0L) + value39 = *from_ptr++; + + out_mask = *out_mask_ptr++; + if ((out_mask & (1L << 0)) != 0L) + *to_ptr++ = value00; + + if ((out_mask & (1L << 1)) != 0L) + *to_ptr++ = value01; + + if ((out_mask & (1L << 2)) != 0L) + *to_ptr++ = value02; + + if ((out_mask & (1L << 3)) != 0L) + *to_ptr++ = value03; + + if ((out_mask & (1L << 4)) != 0L) + *to_ptr++ = value04; + + if ((out_mask & (1L << 5)) != 0L) + *to_ptr++ = value05; + + if ((out_mask & (1L << 6)) != 0L) + *to_ptr++ = value06; + + if ((out_mask & (1L << 7)) != 0L) + *to_ptr++ = value07; + + if ((out_mask & (1L << 8)) != 0L) + *to_ptr++ = value08; + + if ((out_mask & (1L << 9)) != 0L) + *to_ptr++ = value09; + + if ((out_mask & (1L << 10)) != 0L) + *to_ptr++ = value10; + + if ((out_mask & (1L << 11)) != 0L) + *to_ptr++ = value11; + + if ((out_mask & (1L << 12)) != 0L) + *to_ptr++ = value12; + + if ((out_mask & (1L << 13)) != 0L) + *to_ptr++ = value13; + + if ((out_mask & (1L << 14)) != 0L) + *to_ptr++ = value14; + + if ((out_mask & (1L << 15)) != 0L) + *to_ptr++ = value15; + + if ((out_mask & (1L << 16)) != 0L) + *to_ptr++ = value16; + + if ((out_mask & (1L << 17)) != 0L) + *to_ptr++ = value17; + + if ((out_mask & (1L << 18)) != 0L) + *to_ptr++ = value18; + + if ((out_mask & (1L << 19)) != 0L) + *to_ptr++ = value19; + + if ((out_mask & (1L << 20)) != 0L) + *to_ptr++ = value20; + + if ((out_mask & (1L << 21)) != 0L) + *to_ptr++ = value21; + + if ((out_mask & (1L << 22)) != 0L) + *to_ptr++ = value22; + + if ((out_mask & (1L << 23)) != 0L) + *to_ptr++ = value23; + + if ((out_mask & (1L << 24)) != 0L) + *to_ptr++ = value24; + + if ((out_mask & (1L << 25)) != 0L) + *to_ptr++ = value25; + + if ((out_mask & (1L << 26)) != 0L) + *to_ptr++ = value26; + + if ((out_mask & (1L << 27)) != 0L) + *to_ptr++ = value27; + + if ((out_mask & (1L << 28)) != 0L) + *to_ptr++ = value28; + + if ((out_mask & (1L << 29)) != 0L) + *to_ptr++ = value29; + + if ((out_mask & (1L << 30)) != 0L) + *to_ptr++ = value30; + + if ((out_mask & (1L << 31)) != 0L) + *to_ptr++ = value31; + + if ((out_mask & (1L << 32)) != 0L) + *to_ptr++ = value32; + + if ((out_mask & (1L << 33)) != 0L) + *to_ptr++ = value33; + + if ((out_mask & (1L << 34)) != 0L) + *to_ptr++ = value34; + + if ((out_mask & (1L << 35)) != 0L) + *to_ptr++ = value35; + + if ((out_mask & (1L << 36)) != 0L) + *to_ptr++ = value36; + + if ((out_mask & (1L << 37)) != 0L) + *to_ptr++ = value37; + + if ((out_mask & (1L << 38)) != 0L) + *to_ptr++ = value38; + + if ((out_mask & (1L << 39)) != 0L) + *to_ptr++ = value39; + } + + return ( value00 + value01 + value02 + value03 + value04 + + value05 + value06 + value07 + value08 + value09 + + value10 + value11 + value12 + value13 + value14 + + value15 + value16 + value17 + value18 + value19 + + value20 + value21 + value22 + value23 + value24 + + value25 + value26 + value27 + value28 + value29 + + value30 + value31 + value32 + value33 + value34 + + value35 + value36 + value37 + value38 + value39); } /* { dg-final { scan-assembler "lxsspx" } } */ /* { dg-final { scan-assembler "lxsdx" } } */ /* { dg-final { scan-assembler "stxsspx" } } */ /* { dg-final { scan-assembler "stxsdx" } } */ +/* { dg-final { scan-assembler "xsaddsp" } } */ +/* { dg-final { scan-assembler "xsadddp" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-1.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-1.c index 8a6cc08b909..ea20f60e1a9 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-1.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-1.c @@ -1,13 +1,16 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ /* { dg-require-effective-target powerpc_vsx_ok } */ -/* { dg-options "-O2 -mcpu=power7 -ffast-math" } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */ +/* { dg-options "-O2 -mcpu=power7 -ffast-math -mno-upper-regs-df" } */ /* { dg-final { scan-assembler-times "lfiwax" 2 } } */ /* { dg-final { scan-assembler-times "lfiwzx" 2 } } */ -/* { dg-final { scan-assembler-times "fcfids" 3 } } */ -/* { dg-final { scan-assembler-times "fcfidus" 1 } } */ -/* { dg-final { scan-assembler-times "xscvsxddp" 3 } } */ -/* { dg-final { scan-assembler-times "xscvuxddp" 1 } } */ +/* { dg-final { scan-assembler-times "fcfids " 3 } } */ +/* { dg-final { scan-assembler-times "fcfidus " 1 } } */ +/* { dg-final { scan-assembler-times "fcfid " 3 } } */ +/* { dg-final { scan-assembler-times "fcfidu " 1 } } */ +/* { dg-final { scan-assembler-not "xscvdpsxds" } } */ +/* { dg-final { scan-assembler-not "xscvdpuxds" } } */ void int_to_float (float *dest, int *src) { diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-10.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-10.c index 59ba5f91f48..11628c91840 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-10.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-10.c @@ -1,9 +1,9 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ /* { dg-require-effective-target powerpc_vsx_ok } */ -/* { dg-options "-O2 -mcpu=power7 -ffast-math" } */ -/* { dg-final { scan-assembler "xsrdpiz" } } */ -/* { dg-final { scan-assembler-not "friz" } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */ +/* { dg-options "-O2 -mcpu=power7 -ffast-math -mno-upper-regs-df" } */ +/* { dg-final { scan-assembler "friz" } } */ double round_double_llong (double a) { diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-2.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-2.c index e0a83422593..572dec628d6 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-2.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-2.c @@ -1,12 +1,14 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ /* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power6" } } */ /* { dg-options "-O2 -mcpu=power6 -ffast-math" } */ /* { dg-final { scan-assembler-times "lfiwax" 2 } } */ /* { dg-final { scan-assembler-not "lfiwzx" } } */ /* { dg-final { scan-assembler-times "fcfid " 10 } } */ -/* { dg-final { scan-assembler-not "fcfids" } } */ -/* { dg-final { scan-assembler-not "fcfidus" } } */ +/* { dg-final { scan-assembler-not "fcfids " } } */ +/* { dg-final { scan-assembler-not "fcfidus " } } */ +/* { dg-final { scan-assembler-not "fcfidu " } } */ /* { dg-final { scan-assembler-not "xscvsxddp" } } */ /* { dg-final { scan-assembler-not "xscvuxddp" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-3.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-3.c index bf12113d28c..984d3f3f07c 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-3.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-3.c @@ -2,14 +2,16 @@ /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ /* { dg-require-effective-target ilp32 } */ /* { dg-require-effective-target powerpc_fprs } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power5" } } */ /* { dg-options "-O2 -mcpu=power5 -ffast-math" } */ /* { dg-final { scan-assembler-not "lfiwax" } } */ /* { dg-final { scan-assembler-not "lfiwzx" } } */ /* { dg-final { scan-assembler-times "fcfid " 10 } } */ -/* { dg-final { scan-assembler-not "fcfids" } } */ -/* { dg-final { scan-assembler-not "fcfidus" } } */ -/* { dg-final { scan-assembler-not "xscvsxddp" } } */ -/* { dg-final { scan-assembler-not "xscvuxddp" } } */ +/* { dg-final { scan-assembler-not "fcfids " } } */ +/* { dg-final { scan-assembler-not "fcfidus " } } */ +/* { dg-final { scan-assembler-not "fcfidu " } } */ +/* { dg-final { scan-assembler-not "xscvsxddp " } } */ +/* { dg-final { scan-assembler-not "xscvuxddp " } } */ void int_to_float (float *dest, int *src) { diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-4.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-4.c index c4b9ea69bf0..dc1f710321a 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-4.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-4.c @@ -1,14 +1,16 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ /* { dg-require-effective-target ilp32 } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=750" } } */ /* { dg-options "-O2 -mcpu=750 -ffast-math" } */ /* { dg-final { scan-assembler-not "lfiwax" } } */ /* { dg-final { scan-assembler-not "lfiwzx" } } */ /* { dg-final { scan-assembler-not "fcfid " } } */ -/* { dg-final { scan-assembler-not "fcfids" } } */ -/* { dg-final { scan-assembler-not "fcfidus" } } */ -/* { dg-final { scan-assembler-not "xscvsxddp" } } */ -/* { dg-final { scan-assembler-not "xscvuxddp" } } */ +/* { dg-final { scan-assembler-not "fcfids " } } */ +/* { dg-final { scan-assembler-not "fcfidus " } } */ +/* { dg-final { scan-assembler-not "fcfidu " } } */ +/* { dg-final { scan-assembler-not "xscvsxddp " } } */ +/* { dg-final { scan-assembler-not "xscvuxddp " } } */ void int_to_float (float *dest, int *src) { diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-5.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-5.c index a071fc12292..c44eb08231c 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-5.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-5.c @@ -1,13 +1,14 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ /* { dg-require-effective-target powerpc_vsx_ok } */ -/* { dg-options "-O3 -mcpu=power7 -ffast-math" } */ -/* { dg-final { scan-assembler-times "fctiwz" 2 } } */ -/* { dg-final { scan-assembler-times "fctiwuz" 2 } } */ -/* { dg-final { scan-assembler-times "fctidz" 1 } } */ -/* { dg-final { scan-assembler-times "fctiduz" 1 } } */ -/* { dg-final { scan-assembler-times "xscvdpsxds" 1 } } */ -/* { dg-final { scan-assembler-times "xscvdpuxds" 1 } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */ +/* { dg-options "-O3 -mcpu=power7 -ffast-math -mno-upper-regs-df" } */ +/* { dg-final { scan-assembler-times "fctiwz " 2 } } */ +/* { dg-final { scan-assembler-times "fctiwuz " 2 } } */ +/* { dg-final { scan-assembler-times "fctidz " 2 } } */ +/* { dg-final { scan-assembler-times "fctiduz " 2 } } */ +/* { dg-final { scan-assembler-not "xscvdpsxds" } } */ +/* { dg-final { scan-assembler-not "xscvdpuxds" } } */ void float_to_int (int *dest, float src) { *dest = (int) src; } void double_to_int (int *dest, double src) { *dest = (int) src; } diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-6.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-6.c index 09ee1885a17..5282a5a4293 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-6.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-6.c @@ -1,11 +1,13 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ /* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power6" } } */ /* { dg-options "-O3 -mcpu=power6 -ffast-math" } */ -/* { dg-final { scan-assembler-times "fctiwz" 2 } } */ -/* { dg-final { scan-assembler-not "fctiwuz" } } */ -/* { dg-final { scan-assembler-times "fctidz" 8 } } */ -/* { dg-final { scan-assembler-not "fctiduz" } } */ +/* { dg-final { scan-assembler-times "fctiwz " 2 } } */ +/* { dg-final { scan-assembler-not "fctiwuz " } } */ +/* { dg-final { scan-assembler-times "fctidz " 8 } } */ +/* { dg-final { scan-assembler-not "fctiduz " } } */ +/* { dg-final { scan-assembler-not "fctidu " } } */ /* { dg-final { scan-assembler-not "xscvdpsxds" } } */ /* { dg-final { scan-assembler-not "xscvdpuxds" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-7.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-7.c index 808cbc39078..fa0b50edb30 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-7.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-7.c @@ -2,11 +2,13 @@ /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ /* { dg-require-effective-target ilp32 } */ /* { dg-require-effective-target powerpc_fprs } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power5" } } */ /* { dg-options "-O3 -mcpu=power5 -ffast-math" } */ -/* { dg-final { scan-assembler-times "fctiwz" 2 } } */ -/* { dg-final { scan-assembler-not "fctiwuz" } } */ -/* { dg-final { scan-assembler-times "fctidz" 8 } } */ -/* { dg-final { scan-assembler-not "fctiduz" } } */ +/* { dg-final { scan-assembler-times "fctiwz " 2 } } */ +/* { dg-final { scan-assembler-not "fctiwuz " } } */ +/* { dg-final { scan-assembler-times "fctidz " 8 } } */ +/* { dg-final { scan-assembler-not "fctiduz " } } */ +/* { dg-final { scan-assembler-not "fctidu " } } */ /* { dg-final { scan-assembler-not "xscvdpsxds" } } */ /* { dg-final { scan-assembler-not "xscvdpuxds" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-8.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-8.c index f841d7ee073..5f1bb23c50e 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-8.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-8.c @@ -2,11 +2,13 @@ /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ /* { dg-require-effective-target ilp32 } */ /* { dg-require-effective-target powerpc_fprs } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=750" } } */ /* { dg-options "-O3 -mcpu=750 -ffast-math" } */ -/* { dg-final { scan-assembler-times "fctiwz" 6 } } */ -/* { dg-final { scan-assembler-not "fctiwuz" } } */ -/* { dg-final { scan-assembler-not "fctidz" } } */ -/* { dg-final { scan-assembler-not "fctiduz" } } */ +/* { dg-final { scan-assembler-times "fctiwz " 6 } } */ +/* { dg-final { scan-assembler-not "fctiwuz " } } */ +/* { dg-final { scan-assembler-not "fctidz " } } */ +/* { dg-final { scan-assembler-not "fctiduz " } } */ +/* { dg-final { scan-assembler-not "fctidu " } } */ /* { dg-final { scan-assembler-not "xscvdpsxds" } } */ /* { dg-final { scan-assembler-not "xscvdpuxds" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-9.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-9.c index 836c030baa6..62ead0a9e4f 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-9.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-9.c @@ -1,7 +1,9 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ /* { dg-require-effective-target powerpc_vsx_ok } */ -/* { dg-options "-O3 -mcpu=power7 -ffast-math" } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */ +/* { dg-options "-O3 -mcpu=power7 -ffast-math -mno-upper-regs-df" } */ +/* { dg-final { scan-assembler-times "fctidz" 2 } } */ /* { dg-final { scan-assembler-not "lwz" } } */ /* { dg-final { scan-assembler-not "stw" } } */ /* { dg-final { scan-assembler-not "ld " } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-round.c b/gcc/testsuite/gcc.target/powerpc/ppc-round.c index 20262aa449a..4fc1679622d 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-round.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-round.c @@ -1,13 +1,14 @@ /* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ /* { dg-require-effective-target powerpc_vsx_ok } */ -/* { dg-options "-O2 -mcpu=power7" } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */ +/* { dg-options "-O2 -mcpu=power7 -mno-upper-regs-df" } */ /* { dg-final { scan-assembler-times "stfiwx" 4 } } */ /* { dg-final { scan-assembler-times "lfiwax" 2 } } */ /* { dg-final { scan-assembler-times "lfiwzx" 2 } } */ -/* { dg-final { scan-assembler-times "fctiwz" 2 } } */ -/* { dg-final { scan-assembler-times "xscvsxddp" 2 } } */ -/* { dg-final { scan-assembler-times "fcfids" 2 } } */ +/* { dg-final { scan-assembler-times "fctiwz " 2 } } */ +/* { dg-final { scan-assembler-times "fctiwuz " 2 } } */ +/* { dg-final { scan-assembler-times "fcfids " 2 } } */ /* { dg-final { scan-assembler-not "lwz" } } */ /* { dg-final { scan-assembler-not "stw" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr64019.c b/gcc/testsuite/gcc.target/powerpc/pr64019.c new file mode 100644 index 00000000000..a39b2191798 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr64019.c @@ -0,0 +1,71 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */ +/* { dg-options "-O2 -ffast-math -mcpu=power7" } */ + +#include <math.h> + +typedef struct +{ + double x, y, z; + double q, a, b, mass; + double vx, vy, vz, vw, dx, dy, dz; +} +ATOM; +int +u_f_nonbon (lambda) + double lambda; +{ + double r, r0, xt, yt, zt; + double lcutoff, cutoff, get_f_variable (); + double rdebye; + int inbond, inangle, i; + ATOM *a1, *a2, *bonded[10], *angled[10]; + ATOM *(*use)[]; + int uselist (), nuse, used; + ATOM *cp, *bp; + int a_number (), inbuffer; + double (*buffer)[], xx, yy, zz, k; + int invector, atomsused, ii, jj, imax; + double (*vector)[]; + ATOM *(*atms)[]; + double dielectric; + rdebye = cutoff / 2.; + dielectric = get_f_variable ("dielec"); + imax = a_number (); + for (jj = 1; jj < imax; jj++, a1 = bp) + { + if ((*use)[used] == a1) + { + used += 1; + } + while ((*use)[used] != a1) + { + for (i = 0; i < inbuffer; i++) + { + } + xx = a1->x + lambda * a1->dx; + yy = a1->y + lambda * a1->dy; + zz = a1->z + lambda * a1->dz; + for (i = 0; i < inbuffer; i++) + { + xt = xx - (*buffer)[3 * i]; + yt = yy - (*buffer)[3 * i + 1]; + zt = zz - (*buffer)[3 * i + 2]; + r = xt * xt + yt * yt + zt * zt; + r0 = sqrt (r); + xt = xt / r0; + zt = zt / r0; + k = + -a1->q * (*atms)[i]->q * dielectric * exp (-r0 / rdebye) * + (1. / (rdebye * r0) + 1. / r); + k += a1->a * (*atms)[i]->a / r / r0 * 6; + k -= a1->b * (*atms)[i]->b / r / r / r0 * 12; + (*vector)[3 * i] = xt * k; + (*vector)[3 * i + 1] = yt * k; + (*vector)[3 * i + 2] = zt * k; + } + } + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/upper-regs-df.c b/gcc/testsuite/gcc.target/powerpc/upper-regs-df.c new file mode 100644 index 00000000000..e3a284ca0da --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/upper-regs-df.c @@ -0,0 +1,726 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-mcpu=power7 -O2 -mupper-regs-df" } */ + +/* Test for the -mupper-regs-df option to make sure double values are allocated + to the Altivec registers as well as the traditional FPR registers. */ + +#ifndef TYPE +#define TYPE double +#endif + +#ifndef MASK_TYPE +#define MASK_TYPE unsigned long long +#endif + +#define MASK_ONE ((MASK_TYPE)1) +#define ZERO ((TYPE) 0.0) + +TYPE +test_add (const MASK_TYPE *add_mask, const TYPE *add_values, + const MASK_TYPE *sub_mask, const TYPE *sub_values, + const MASK_TYPE *mul_mask, const TYPE *mul_values, + const MASK_TYPE *div_mask, const TYPE *div_values, + const MASK_TYPE *eq0_mask, int *eq0_ptr) +{ + TYPE value; + TYPE value00 = ZERO; + TYPE value01 = ZERO; + TYPE value02 = ZERO; + TYPE value03 = ZERO; + TYPE value04 = ZERO; + TYPE value05 = ZERO; + TYPE value06 = ZERO; + TYPE value07 = ZERO; + TYPE value08 = ZERO; + TYPE value09 = ZERO; + TYPE value10 = ZERO; + TYPE value11 = ZERO; + TYPE value12 = ZERO; + TYPE value13 = ZERO; + TYPE value14 = ZERO; + TYPE value15 = ZERO; + TYPE value16 = ZERO; + TYPE value17 = ZERO; + TYPE value18 = ZERO; + TYPE value19 = ZERO; + TYPE value20 = ZERO; + TYPE value21 = ZERO; + TYPE value22 = ZERO; + TYPE value23 = ZERO; + TYPE value24 = ZERO; + TYPE value25 = ZERO; + TYPE value26 = ZERO; + TYPE value27 = ZERO; + TYPE value28 = ZERO; + TYPE value29 = ZERO; + TYPE value30 = ZERO; + TYPE value31 = ZERO; + TYPE value32 = ZERO; + TYPE value33 = ZERO; + TYPE value34 = ZERO; + TYPE value35 = ZERO; + TYPE value36 = ZERO; + TYPE value37 = ZERO; + TYPE value38 = ZERO; + TYPE value39 = ZERO; + MASK_TYPE mask; + int eq0; + + while ((mask = *add_mask++) != 0) + { + value = *add_values++; + + __asm__ (" #reg %0" : "+d" (value)); + + if ((mask & (MASK_ONE << 0)) != 0) + value00 += value; + + if ((mask & (MASK_ONE << 1)) != 0) + value01 += value; + + if ((mask & (MASK_ONE << 2)) != 0) + value02 += value; + + if ((mask & (MASK_ONE << 3)) != 0) + value03 += value; + + if ((mask & (MASK_ONE << 4)) != 0) + value04 += value; + + if ((mask & (MASK_ONE << 5)) != 0) + value05 += value; + + if ((mask & (MASK_ONE << 6)) != 0) + value06 += value; + + if ((mask & (MASK_ONE << 7)) != 0) + value07 += value; + + if ((mask & (MASK_ONE << 8)) != 0) + value08 += value; + + if ((mask & (MASK_ONE << 9)) != 0) + value09 += value; + + if ((mask & (MASK_ONE << 10)) != 0) + value10 += value; + + if ((mask & (MASK_ONE << 11)) != 0) + value11 += value; + + if ((mask & (MASK_ONE << 12)) != 0) + value12 += value; + + if ((mask & (MASK_ONE << 13)) != 0) + value13 += value; + + if ((mask & (MASK_ONE << 14)) != 0) + value14 += value; + + if ((mask & (MASK_ONE << 15)) != 0) + value15 += value; + + if ((mask & (MASK_ONE << 16)) != 0) + value16 += value; + + if ((mask & (MASK_ONE << 17)) != 0) + value17 += value; + + if ((mask & (MASK_ONE << 18)) != 0) + value18 += value; + + if ((mask & (MASK_ONE << 19)) != 0) + value19 += value; + + if ((mask & (MASK_ONE << 20)) != 0) + value20 += value; + + if ((mask & (MASK_ONE << 21)) != 0) + value21 += value; + + if ((mask & (MASK_ONE << 22)) != 0) + value22 += value; + + if ((mask & (MASK_ONE << 23)) != 0) + value23 += value; + + if ((mask & (MASK_ONE << 24)) != 0) + value24 += value; + + if ((mask & (MASK_ONE << 25)) != 0) + value25 += value; + + if ((mask & (MASK_ONE << 26)) != 0) + value26 += value; + + if ((mask & (MASK_ONE << 27)) != 0) + value27 += value; + + if ((mask & (MASK_ONE << 28)) != 0) + value28 += value; + + if ((mask & (MASK_ONE << 29)) != 0) + value29 += value; + + if ((mask & (MASK_ONE << 30)) != 0) + value30 += value; + + if ((mask & (MASK_ONE << 31)) != 0) + value31 += value; + + if ((mask & (MASK_ONE << 32)) != 0) + value32 += value; + + if ((mask & (MASK_ONE << 33)) != 0) + value33 += value; + + if ((mask & (MASK_ONE << 34)) != 0) + value34 += value; + + if ((mask & (MASK_ONE << 35)) != 0) + value35 += value; + + if ((mask & (MASK_ONE << 36)) != 0) + value36 += value; + + if ((mask & (MASK_ONE << 37)) != 0) + value37 += value; + + if ((mask & (MASK_ONE << 38)) != 0) + value38 += value; + + if ((mask & (MASK_ONE << 39)) != 0) + value39 += value; + } + + while ((mask = *sub_mask++) != 0) + { + value = *sub_values++; + + __asm__ (" #reg %0" : "+d" (value)); + + if ((mask & (MASK_ONE << 0)) != 0) + value00 -= value; + + if ((mask & (MASK_ONE << 1)) != 0) + value01 -= value; + + if ((mask & (MASK_ONE << 2)) != 0) + value02 -= value; + + if ((mask & (MASK_ONE << 3)) != 0) + value03 -= value; + + if ((mask & (MASK_ONE << 4)) != 0) + value04 -= value; + + if ((mask & (MASK_ONE << 5)) != 0) + value05 -= value; + + if ((mask & (MASK_ONE << 6)) != 0) + value06 -= value; + + if ((mask & (MASK_ONE << 7)) != 0) + value07 -= value; + + if ((mask & (MASK_ONE << 8)) != 0) + value08 -= value; + + if ((mask & (MASK_ONE << 9)) != 0) + value09 -= value; + + if ((mask & (MASK_ONE << 10)) != 0) + value10 -= value; + + if ((mask & (MASK_ONE << 11)) != 0) + value11 -= value; + + if ((mask & (MASK_ONE << 12)) != 0) + value12 -= value; + + if ((mask & (MASK_ONE << 13)) != 0) + value13 -= value; + + if ((mask & (MASK_ONE << 14)) != 0) + value14 -= value; + + if ((mask & (MASK_ONE << 15)) != 0) + value15 -= value; + + if ((mask & (MASK_ONE << 16)) != 0) + value16 -= value; + + if ((mask & (MASK_ONE << 17)) != 0) + value17 -= value; + + if ((mask & (MASK_ONE << 18)) != 0) + value18 -= value; + + if ((mask & (MASK_ONE << 19)) != 0) + value19 -= value; + + if ((mask & (MASK_ONE << 20)) != 0) + value20 -= value; + + if ((mask & (MASK_ONE << 21)) != 0) + value21 -= value; + + if ((mask & (MASK_ONE << 22)) != 0) + value22 -= value; + + if ((mask & (MASK_ONE << 23)) != 0) + value23 -= value; + + if ((mask & (MASK_ONE << 24)) != 0) + value24 -= value; + + if ((mask & (MASK_ONE << 25)) != 0) + value25 -= value; + + if ((mask & (MASK_ONE << 26)) != 0) + value26 -= value; + + if ((mask & (MASK_ONE << 27)) != 0) + value27 -= value; + + if ((mask & (MASK_ONE << 28)) != 0) + value28 -= value; + + if ((mask & (MASK_ONE << 29)) != 0) + value29 -= value; + + if ((mask & (MASK_ONE << 30)) != 0) + value30 -= value; + + if ((mask & (MASK_ONE << 31)) != 0) + value31 -= value; + + if ((mask & (MASK_ONE << 32)) != 0) + value32 -= value; + + if ((mask & (MASK_ONE << 33)) != 0) + value33 -= value; + + if ((mask & (MASK_ONE << 34)) != 0) + value34 -= value; + + if ((mask & (MASK_ONE << 35)) != 0) + value35 -= value; + + if ((mask & (MASK_ONE << 36)) != 0) + value36 -= value; + + if ((mask & (MASK_ONE << 37)) != 0) + value37 -= value; + + if ((mask & (MASK_ONE << 38)) != 0) + value38 -= value; + + if ((mask & (MASK_ONE << 39)) != 0) + value39 -= value; + } + + while ((mask = *mul_mask++) != 0) + { + value = *mul_values++; + + __asm__ (" #reg %0" : "+d" (value)); + + if ((mask & (MASK_ONE << 0)) != 0) + value00 *= value; + + if ((mask & (MASK_ONE << 1)) != 0) + value01 *= value; + + if ((mask & (MASK_ONE << 2)) != 0) + value02 *= value; + + if ((mask & (MASK_ONE << 3)) != 0) + value03 *= value; + + if ((mask & (MASK_ONE << 4)) != 0) + value04 *= value; + + if ((mask & (MASK_ONE << 5)) != 0) + value05 *= value; + + if ((mask & (MASK_ONE << 6)) != 0) + value06 *= value; + + if ((mask & (MASK_ONE << 7)) != 0) + value07 *= value; + + if ((mask & (MASK_ONE << 8)) != 0) + value08 *= value; + + if ((mask & (MASK_ONE << 9)) != 0) + value09 *= value; + + if ((mask & (MASK_ONE << 10)) != 0) + value10 *= value; + + if ((mask & (MASK_ONE << 11)) != 0) + value11 *= value; + + if ((mask & (MASK_ONE << 12)) != 0) + value12 *= value; + + if ((mask & (MASK_ONE << 13)) != 0) + value13 *= value; + + if ((mask & (MASK_ONE << 14)) != 0) + value14 *= value; + + if ((mask & (MASK_ONE << 15)) != 0) + value15 *= value; + + if ((mask & (MASK_ONE << 16)) != 0) + value16 *= value; + + if ((mask & (MASK_ONE << 17)) != 0) + value17 *= value; + + if ((mask & (MASK_ONE << 18)) != 0) + value18 *= value; + + if ((mask & (MASK_ONE << 19)) != 0) + value19 *= value; + + if ((mask & (MASK_ONE << 20)) != 0) + value20 *= value; + + if ((mask & (MASK_ONE << 21)) != 0) + value21 *= value; + + if ((mask & (MASK_ONE << 22)) != 0) + value22 *= value; + + if ((mask & (MASK_ONE << 23)) != 0) + value23 *= value; + + if ((mask & (MASK_ONE << 24)) != 0) + value24 *= value; + + if ((mask & (MASK_ONE << 25)) != 0) + value25 *= value; + + if ((mask & (MASK_ONE << 26)) != 0) + value26 *= value; + + if ((mask & (MASK_ONE << 27)) != 0) + value27 *= value; + + if ((mask & (MASK_ONE << 28)) != 0) + value28 *= value; + + if ((mask & (MASK_ONE << 29)) != 0) + value29 *= value; + + if ((mask & (MASK_ONE << 30)) != 0) + value30 *= value; + + if ((mask & (MASK_ONE << 31)) != 0) + value31 *= value; + + if ((mask & (MASK_ONE << 32)) != 0) + value32 *= value; + + if ((mask & (MASK_ONE << 33)) != 0) + value33 *= value; + + if ((mask & (MASK_ONE << 34)) != 0) + value34 *= value; + + if ((mask & (MASK_ONE << 35)) != 0) + value35 *= value; + + if ((mask & (MASK_ONE << 36)) != 0) + value36 *= value; + + if ((mask & (MASK_ONE << 37)) != 0) + value37 *= value; + + if ((mask & (MASK_ONE << 38)) != 0) + value38 *= value; + + if ((mask & (MASK_ONE << 39)) != 0) + value39 *= value; + } + + while ((mask = *div_mask++) != 0) + { + value = *div_values++; + + __asm__ (" #reg %0" : "+d" (value)); + + if ((mask & (MASK_ONE << 0)) != 0) + value00 /= value; + + if ((mask & (MASK_ONE << 1)) != 0) + value01 /= value; + + if ((mask & (MASK_ONE << 2)) != 0) + value02 /= value; + + if ((mask & (MASK_ONE << 3)) != 0) + value03 /= value; + + if ((mask & (MASK_ONE << 4)) != 0) + value04 /= value; + + if ((mask & (MASK_ONE << 5)) != 0) + value05 /= value; + + if ((mask & (MASK_ONE << 6)) != 0) + value06 /= value; + + if ((mask & (MASK_ONE << 7)) != 0) + value07 /= value; + + if ((mask & (MASK_ONE << 8)) != 0) + value08 /= value; + + if ((mask & (MASK_ONE << 9)) != 0) + value09 /= value; + + if ((mask & (MASK_ONE << 10)) != 0) + value10 /= value; + + if ((mask & (MASK_ONE << 11)) != 0) + value11 /= value; + + if ((mask & (MASK_ONE << 12)) != 0) + value12 /= value; + + if ((mask & (MASK_ONE << 13)) != 0) + value13 /= value; + + if ((mask & (MASK_ONE << 14)) != 0) + value14 /= value; + + if ((mask & (MASK_ONE << 15)) != 0) + value15 /= value; + + if ((mask & (MASK_ONE << 16)) != 0) + value16 /= value; + + if ((mask & (MASK_ONE << 17)) != 0) + value17 /= value; + + if ((mask & (MASK_ONE << 18)) != 0) + value18 /= value; + + if ((mask & (MASK_ONE << 19)) != 0) + value19 /= value; + + if ((mask & (MASK_ONE << 20)) != 0) + value20 /= value; + + if ((mask & (MASK_ONE << 21)) != 0) + value21 /= value; + + if ((mask & (MASK_ONE << 22)) != 0) + value22 /= value; + + if ((mask & (MASK_ONE << 23)) != 0) + value23 /= value; + + if ((mask & (MASK_ONE << 24)) != 0) + value24 /= value; + + if ((mask & (MASK_ONE << 25)) != 0) + value25 /= value; + + if ((mask & (MASK_ONE << 26)) != 0) + value26 /= value; + + if ((mask & (MASK_ONE << 27)) != 0) + value27 /= value; + + if ((mask & (MASK_ONE << 28)) != 0) + value28 /= value; + + if ((mask & (MASK_ONE << 29)) != 0) + value29 /= value; + + if ((mask & (MASK_ONE << 30)) != 0) + value30 /= value; + + if ((mask & (MASK_ONE << 31)) != 0) + value31 /= value; + + if ((mask & (MASK_ONE << 32)) != 0) + value32 /= value; + + if ((mask & (MASK_ONE << 33)) != 0) + value33 /= value; + + if ((mask & (MASK_ONE << 34)) != 0) + value34 /= value; + + if ((mask & (MASK_ONE << 35)) != 0) + value35 /= value; + + if ((mask & (MASK_ONE << 36)) != 0) + value36 /= value; + + if ((mask & (MASK_ONE << 37)) != 0) + value37 /= value; + + if ((mask & (MASK_ONE << 38)) != 0) + value38 /= value; + + if ((mask & (MASK_ONE << 39)) != 0) + value39 /= value; + } + + while ((mask = *eq0_mask++) != 0) + { + eq0 = 0; + + if ((mask & (MASK_ONE << 0)) != 0) + eq0 |= (value00 == ZERO); + + if ((mask & (MASK_ONE << 1)) != 0) + eq0 |= (value01 == ZERO); + + if ((mask & (MASK_ONE << 2)) != 0) + eq0 |= (value02 == ZERO); + + if ((mask & (MASK_ONE << 3)) != 0) + eq0 |= (value03 == ZERO); + + if ((mask & (MASK_ONE << 4)) != 0) + eq0 |= (value04 == ZERO); + + if ((mask & (MASK_ONE << 5)) != 0) + eq0 |= (value05 == ZERO); + + if ((mask & (MASK_ONE << 6)) != 0) + eq0 |= (value06 == ZERO); + + if ((mask & (MASK_ONE << 7)) != 0) + eq0 |= (value07 == ZERO); + + if ((mask & (MASK_ONE << 8)) != 0) + eq0 |= (value08 == ZERO); + + if ((mask & (MASK_ONE << 9)) != 0) + eq0 |= (value09 == ZERO); + + if ((mask & (MASK_ONE << 10)) != 0) + eq0 |= (value10 == ZERO); + + if ((mask & (MASK_ONE << 11)) != 0) + eq0 |= (value11 == ZERO); + + if ((mask & (MASK_ONE << 12)) != 0) + eq0 |= (value12 == ZERO); + + if ((mask & (MASK_ONE << 13)) != 0) + eq0 |= (value13 == ZERO); + + if ((mask & (MASK_ONE << 14)) != 0) + eq0 |= (value14 == ZERO); + + if ((mask & (MASK_ONE << 15)) != 0) + eq0 |= (value15 == ZERO); + + if ((mask & (MASK_ONE << 16)) != 0) + eq0 |= (value16 == ZERO); + + if ((mask & (MASK_ONE << 17)) != 0) + eq0 |= (value17 == ZERO); + + if ((mask & (MASK_ONE << 18)) != 0) + eq0 |= (value18 == ZERO); + + if ((mask & (MASK_ONE << 19)) != 0) + eq0 |= (value19 == ZERO); + + if ((mask & (MASK_ONE << 20)) != 0) + eq0 |= (value20 == ZERO); + + if ((mask & (MASK_ONE << 21)) != 0) + eq0 |= (value21 == ZERO); + + if ((mask & (MASK_ONE << 22)) != 0) + eq0 |= (value22 == ZERO); + + if ((mask & (MASK_ONE << 23)) != 0) + eq0 |= (value23 == ZERO); + + if ((mask & (MASK_ONE << 24)) != 0) + eq0 |= (value24 == ZERO); + + if ((mask & (MASK_ONE << 25)) != 0) + eq0 |= (value25 == ZERO); + + if ((mask & (MASK_ONE << 26)) != 0) + eq0 |= (value26 == ZERO); + + if ((mask & (MASK_ONE << 27)) != 0) + eq0 |= (value27 == ZERO); + + if ((mask & (MASK_ONE << 28)) != 0) + eq0 |= (value28 == ZERO); + + if ((mask & (MASK_ONE << 29)) != 0) + eq0 |= (value29 == ZERO); + + if ((mask & (MASK_ONE << 30)) != 0) + eq0 |= (value30 == ZERO); + + if ((mask & (MASK_ONE << 31)) != 0) + eq0 |= (value31 == ZERO); + + if ((mask & (MASK_ONE << 32)) != 0) + eq0 |= (value32 == ZERO); + + if ((mask & (MASK_ONE << 33)) != 0) + eq0 |= (value33 == ZERO); + + if ((mask & (MASK_ONE << 34)) != 0) + eq0 |= (value34 == ZERO); + + if ((mask & (MASK_ONE << 35)) != 0) + eq0 |= (value35 == ZERO); + + if ((mask & (MASK_ONE << 36)) != 0) + eq0 |= (value36 == ZERO); + + if ((mask & (MASK_ONE << 37)) != 0) + eq0 |= (value37 == ZERO); + + if ((mask & (MASK_ONE << 38)) != 0) + eq0 |= (value38 == ZERO); + + if ((mask & (MASK_ONE << 39)) != 0) + eq0 |= (value39 == ZERO); + + *eq0_ptr++ = eq0; + } + + return ( value00 + value01 + value02 + value03 + value04 + + value05 + value06 + value07 + value08 + value09 + + value10 + value11 + value12 + value13 + value14 + + value15 + value16 + value17 + value18 + value19 + + value20 + value21 + value22 + value23 + value24 + + value25 + value26 + value27 + value28 + value29 + + value30 + value31 + value32 + value33 + value34 + + value35 + value36 + value37 + value38 + value39); +} + +/* { dg-final { scan-assembler "fadd" } } */ +/* { dg-final { scan-assembler "fsub" } } */ +/* { dg-final { scan-assembler "fmul" } } */ +/* { dg-final { scan-assembler "fdiv" } } */ +/* { dg-final { scan-assembler "fcmpu" } } */ +/* { dg-final { scan-assembler "xsadddp" } } */ +/* { dg-final { scan-assembler "xssubdp" } } */ +/* { dg-final { scan-assembler "xsmuldp" } } */ +/* { dg-final { scan-assembler "xsdivdp" } } */ +/* { dg-final { scan-assembler "xscmpudp" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/upper-regs-sf.c b/gcc/testsuite/gcc.target/powerpc/upper-regs-sf.c new file mode 100644 index 00000000000..401b5c16ffa --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/upper-regs-sf.c @@ -0,0 +1,726 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-mcpu=power8 -O2 -mupper-regs-df -mupper-regs-sf" } */ + +/* Test for the -mupper-regs-df option to make sure double values are allocated + to the Altivec registers as well as the traditional FPR registers. */ + +#ifndef TYPE +#define TYPE float +#endif + +#ifndef MASK_TYPE +#define MASK_TYPE unsigned long long +#endif + +#define MASK_ONE ((MASK_TYPE)1) +#define ZERO ((TYPE) 0.0) + +TYPE +test_add (const MASK_TYPE *add_mask, const TYPE *add_values, + const MASK_TYPE *sub_mask, const TYPE *sub_values, + const MASK_TYPE *mul_mask, const TYPE *mul_values, + const MASK_TYPE *div_mask, const TYPE *div_values, + const MASK_TYPE *eq0_mask, int *eq0_ptr) +{ + TYPE value; + TYPE value00 = ZERO; + TYPE value01 = ZERO; + TYPE value02 = ZERO; + TYPE value03 = ZERO; + TYPE value04 = ZERO; + TYPE value05 = ZERO; + TYPE value06 = ZERO; + TYPE value07 = ZERO; + TYPE value08 = ZERO; + TYPE value09 = ZERO; + TYPE value10 = ZERO; + TYPE value11 = ZERO; + TYPE value12 = ZERO; + TYPE value13 = ZERO; + TYPE value14 = ZERO; + TYPE value15 = ZERO; + TYPE value16 = ZERO; + TYPE value17 = ZERO; + TYPE value18 = ZERO; + TYPE value19 = ZERO; + TYPE value20 = ZERO; + TYPE value21 = ZERO; + TYPE value22 = ZERO; + TYPE value23 = ZERO; + TYPE value24 = ZERO; + TYPE value25 = ZERO; + TYPE value26 = ZERO; + TYPE value27 = ZERO; + TYPE value28 = ZERO; + TYPE value29 = ZERO; + TYPE value30 = ZERO; + TYPE value31 = ZERO; + TYPE value32 = ZERO; + TYPE value33 = ZERO; + TYPE value34 = ZERO; + TYPE value35 = ZERO; + TYPE value36 = ZERO; + TYPE value37 = ZERO; + TYPE value38 = ZERO; + TYPE value39 = ZERO; + MASK_TYPE mask; + int eq0; + + while ((mask = *add_mask++) != 0) + { + value = *add_values++; + + __asm__ (" #reg %0" : "+d" (value)); + + if ((mask & (MASK_ONE << 0)) != 0) + value00 += value; + + if ((mask & (MASK_ONE << 1)) != 0) + value01 += value; + + if ((mask & (MASK_ONE << 2)) != 0) + value02 += value; + + if ((mask & (MASK_ONE << 3)) != 0) + value03 += value; + + if ((mask & (MASK_ONE << 4)) != 0) + value04 += value; + + if ((mask & (MASK_ONE << 5)) != 0) + value05 += value; + + if ((mask & (MASK_ONE << 6)) != 0) + value06 += value; + + if ((mask & (MASK_ONE << 7)) != 0) + value07 += value; + + if ((mask & (MASK_ONE << 8)) != 0) + value08 += value; + + if ((mask & (MASK_ONE << 9)) != 0) + value09 += value; + + if ((mask & (MASK_ONE << 10)) != 0) + value10 += value; + + if ((mask & (MASK_ONE << 11)) != 0) + value11 += value; + + if ((mask & (MASK_ONE << 12)) != 0) + value12 += value; + + if ((mask & (MASK_ONE << 13)) != 0) + value13 += value; + + if ((mask & (MASK_ONE << 14)) != 0) + value14 += value; + + if ((mask & (MASK_ONE << 15)) != 0) + value15 += value; + + if ((mask & (MASK_ONE << 16)) != 0) + value16 += value; + + if ((mask & (MASK_ONE << 17)) != 0) + value17 += value; + + if ((mask & (MASK_ONE << 18)) != 0) + value18 += value; + + if ((mask & (MASK_ONE << 19)) != 0) + value19 += value; + + if ((mask & (MASK_ONE << 20)) != 0) + value20 += value; + + if ((mask & (MASK_ONE << 21)) != 0) + value21 += value; + + if ((mask & (MASK_ONE << 22)) != 0) + value22 += value; + + if ((mask & (MASK_ONE << 23)) != 0) + value23 += value; + + if ((mask & (MASK_ONE << 24)) != 0) + value24 += value; + + if ((mask & (MASK_ONE << 25)) != 0) + value25 += value; + + if ((mask & (MASK_ONE << 26)) != 0) + value26 += value; + + if ((mask & (MASK_ONE << 27)) != 0) + value27 += value; + + if ((mask & (MASK_ONE << 28)) != 0) + value28 += value; + + if ((mask & (MASK_ONE << 29)) != 0) + value29 += value; + + if ((mask & (MASK_ONE << 30)) != 0) + value30 += value; + + if ((mask & (MASK_ONE << 31)) != 0) + value31 += value; + + if ((mask & (MASK_ONE << 32)) != 0) + value32 += value; + + if ((mask & (MASK_ONE << 33)) != 0) + value33 += value; + + if ((mask & (MASK_ONE << 34)) != 0) + value34 += value; + + if ((mask & (MASK_ONE << 35)) != 0) + value35 += value; + + if ((mask & (MASK_ONE << 36)) != 0) + value36 += value; + + if ((mask & (MASK_ONE << 37)) != 0) + value37 += value; + + if ((mask & (MASK_ONE << 38)) != 0) + value38 += value; + + if ((mask & (MASK_ONE << 39)) != 0) + value39 += value; + } + + while ((mask = *sub_mask++) != 0) + { + value = *sub_values++; + + __asm__ (" #reg %0" : "+d" (value)); + + if ((mask & (MASK_ONE << 0)) != 0) + value00 -= value; + + if ((mask & (MASK_ONE << 1)) != 0) + value01 -= value; + + if ((mask & (MASK_ONE << 2)) != 0) + value02 -= value; + + if ((mask & (MASK_ONE << 3)) != 0) + value03 -= value; + + if ((mask & (MASK_ONE << 4)) != 0) + value04 -= value; + + if ((mask & (MASK_ONE << 5)) != 0) + value05 -= value; + + if ((mask & (MASK_ONE << 6)) != 0) + value06 -= value; + + if ((mask & (MASK_ONE << 7)) != 0) + value07 -= value; + + if ((mask & (MASK_ONE << 8)) != 0) + value08 -= value; + + if ((mask & (MASK_ONE << 9)) != 0) + value09 -= value; + + if ((mask & (MASK_ONE << 10)) != 0) + value10 -= value; + + if ((mask & (MASK_ONE << 11)) != 0) + value11 -= value; + + if ((mask & (MASK_ONE << 12)) != 0) + value12 -= value; + + if ((mask & (MASK_ONE << 13)) != 0) + value13 -= value; + + if ((mask & (MASK_ONE << 14)) != 0) + value14 -= value; + + if ((mask & (MASK_ONE << 15)) != 0) + value15 -= value; + + if ((mask & (MASK_ONE << 16)) != 0) + value16 -= value; + + if ((mask & (MASK_ONE << 17)) != 0) + value17 -= value; + + if ((mask & (MASK_ONE << 18)) != 0) + value18 -= value; + + if ((mask & (MASK_ONE << 19)) != 0) + value19 -= value; + + if ((mask & (MASK_ONE << 20)) != 0) + value20 -= value; + + if ((mask & (MASK_ONE << 21)) != 0) + value21 -= value; + + if ((mask & (MASK_ONE << 22)) != 0) + value22 -= value; + + if ((mask & (MASK_ONE << 23)) != 0) + value23 -= value; + + if ((mask & (MASK_ONE << 24)) != 0) + value24 -= value; + + if ((mask & (MASK_ONE << 25)) != 0) + value25 -= value; + + if ((mask & (MASK_ONE << 26)) != 0) + value26 -= value; + + if ((mask & (MASK_ONE << 27)) != 0) + value27 -= value; + + if ((mask & (MASK_ONE << 28)) != 0) + value28 -= value; + + if ((mask & (MASK_ONE << 29)) != 0) + value29 -= value; + + if ((mask & (MASK_ONE << 30)) != 0) + value30 -= value; + + if ((mask & (MASK_ONE << 31)) != 0) + value31 -= value; + + if ((mask & (MASK_ONE << 32)) != 0) + value32 -= value; + + if ((mask & (MASK_ONE << 33)) != 0) + value33 -= value; + + if ((mask & (MASK_ONE << 34)) != 0) + value34 -= value; + + if ((mask & (MASK_ONE << 35)) != 0) + value35 -= value; + + if ((mask & (MASK_ONE << 36)) != 0) + value36 -= value; + + if ((mask & (MASK_ONE << 37)) != 0) + value37 -= value; + + if ((mask & (MASK_ONE << 38)) != 0) + value38 -= value; + + if ((mask & (MASK_ONE << 39)) != 0) + value39 -= value; + } + + while ((mask = *mul_mask++) != 0) + { + value = *mul_values++; + + __asm__ (" #reg %0" : "+d" (value)); + + if ((mask & (MASK_ONE << 0)) != 0) + value00 *= value; + + if ((mask & (MASK_ONE << 1)) != 0) + value01 *= value; + + if ((mask & (MASK_ONE << 2)) != 0) + value02 *= value; + + if ((mask & (MASK_ONE << 3)) != 0) + value03 *= value; + + if ((mask & (MASK_ONE << 4)) != 0) + value04 *= value; + + if ((mask & (MASK_ONE << 5)) != 0) + value05 *= value; + + if ((mask & (MASK_ONE << 6)) != 0) + value06 *= value; + + if ((mask & (MASK_ONE << 7)) != 0) + value07 *= value; + + if ((mask & (MASK_ONE << 8)) != 0) + value08 *= value; + + if ((mask & (MASK_ONE << 9)) != 0) + value09 *= value; + + if ((mask & (MASK_ONE << 10)) != 0) + value10 *= value; + + if ((mask & (MASK_ONE << 11)) != 0) + value11 *= value; + + if ((mask & (MASK_ONE << 12)) != 0) + value12 *= value; + + if ((mask & (MASK_ONE << 13)) != 0) + value13 *= value; + + if ((mask & (MASK_ONE << 14)) != 0) + value14 *= value; + + if ((mask & (MASK_ONE << 15)) != 0) + value15 *= value; + + if ((mask & (MASK_ONE << 16)) != 0) + value16 *= value; + + if ((mask & (MASK_ONE << 17)) != 0) + value17 *= value; + + if ((mask & (MASK_ONE << 18)) != 0) + value18 *= value; + + if ((mask & (MASK_ONE << 19)) != 0) + value19 *= value; + + if ((mask & (MASK_ONE << 20)) != 0) + value20 *= value; + + if ((mask & (MASK_ONE << 21)) != 0) + value21 *= value; + + if ((mask & (MASK_ONE << 22)) != 0) + value22 *= value; + + if ((mask & (MASK_ONE << 23)) != 0) + value23 *= value; + + if ((mask & (MASK_ONE << 24)) != 0) + value24 *= value; + + if ((mask & (MASK_ONE << 25)) != 0) + value25 *= value; + + if ((mask & (MASK_ONE << 26)) != 0) + value26 *= value; + + if ((mask & (MASK_ONE << 27)) != 0) + value27 *= value; + + if ((mask & (MASK_ONE << 28)) != 0) + value28 *= value; + + if ((mask & (MASK_ONE << 29)) != 0) + value29 *= value; + + if ((mask & (MASK_ONE << 30)) != 0) + value30 *= value; + + if ((mask & (MASK_ONE << 31)) != 0) + value31 *= value; + + if ((mask & (MASK_ONE << 32)) != 0) + value32 *= value; + + if ((mask & (MASK_ONE << 33)) != 0) + value33 *= value; + + if ((mask & (MASK_ONE << 34)) != 0) + value34 *= value; + + if ((mask & (MASK_ONE << 35)) != 0) + value35 *= value; + + if ((mask & (MASK_ONE << 36)) != 0) + value36 *= value; + + if ((mask & (MASK_ONE << 37)) != 0) + value37 *= value; + + if ((mask & (MASK_ONE << 38)) != 0) + value38 *= value; + + if ((mask & (MASK_ONE << 39)) != 0) + value39 *= value; + } + + while ((mask = *div_mask++) != 0) + { + value = *div_values++; + + __asm__ (" #reg %0" : "+d" (value)); + + if ((mask & (MASK_ONE << 0)) != 0) + value00 /= value; + + if ((mask & (MASK_ONE << 1)) != 0) + value01 /= value; + + if ((mask & (MASK_ONE << 2)) != 0) + value02 /= value; + + if ((mask & (MASK_ONE << 3)) != 0) + value03 /= value; + + if ((mask & (MASK_ONE << 4)) != 0) + value04 /= value; + + if ((mask & (MASK_ONE << 5)) != 0) + value05 /= value; + + if ((mask & (MASK_ONE << 6)) != 0) + value06 /= value; + + if ((mask & (MASK_ONE << 7)) != 0) + value07 /= value; + + if ((mask & (MASK_ONE << 8)) != 0) + value08 /= value; + + if ((mask & (MASK_ONE << 9)) != 0) + value09 /= value; + + if ((mask & (MASK_ONE << 10)) != 0) + value10 /= value; + + if ((mask & (MASK_ONE << 11)) != 0) + value11 /= value; + + if ((mask & (MASK_ONE << 12)) != 0) + value12 /= value; + + if ((mask & (MASK_ONE << 13)) != 0) + value13 /= value; + + if ((mask & (MASK_ONE << 14)) != 0) + value14 /= value; + + if ((mask & (MASK_ONE << 15)) != 0) + value15 /= value; + + if ((mask & (MASK_ONE << 16)) != 0) + value16 /= value; + + if ((mask & (MASK_ONE << 17)) != 0) + value17 /= value; + + if ((mask & (MASK_ONE << 18)) != 0) + value18 /= value; + + if ((mask & (MASK_ONE << 19)) != 0) + value19 /= value; + + if ((mask & (MASK_ONE << 20)) != 0) + value20 /= value; + + if ((mask & (MASK_ONE << 21)) != 0) + value21 /= value; + + if ((mask & (MASK_ONE << 22)) != 0) + value22 /= value; + + if ((mask & (MASK_ONE << 23)) != 0) + value23 /= value; + + if ((mask & (MASK_ONE << 24)) != 0) + value24 /= value; + + if ((mask & (MASK_ONE << 25)) != 0) + value25 /= value; + + if ((mask & (MASK_ONE << 26)) != 0) + value26 /= value; + + if ((mask & (MASK_ONE << 27)) != 0) + value27 /= value; + + if ((mask & (MASK_ONE << 28)) != 0) + value28 /= value; + + if ((mask & (MASK_ONE << 29)) != 0) + value29 /= value; + + if ((mask & (MASK_ONE << 30)) != 0) + value30 /= value; + + if ((mask & (MASK_ONE << 31)) != 0) + value31 /= value; + + if ((mask & (MASK_ONE << 32)) != 0) + value32 /= value; + + if ((mask & (MASK_ONE << 33)) != 0) + value33 /= value; + + if ((mask & (MASK_ONE << 34)) != 0) + value34 /= value; + + if ((mask & (MASK_ONE << 35)) != 0) + value35 /= value; + + if ((mask & (MASK_ONE << 36)) != 0) + value36 /= value; + + if ((mask & (MASK_ONE << 37)) != 0) + value37 /= value; + + if ((mask & (MASK_ONE << 38)) != 0) + value38 /= value; + + if ((mask & (MASK_ONE << 39)) != 0) + value39 /= value; + } + + while ((mask = *eq0_mask++) != 0) + { + eq0 = 0; + + if ((mask & (MASK_ONE << 0)) != 0) + eq0 |= (value00 == ZERO); + + if ((mask & (MASK_ONE << 1)) != 0) + eq0 |= (value01 == ZERO); + + if ((mask & (MASK_ONE << 2)) != 0) + eq0 |= (value02 == ZERO); + + if ((mask & (MASK_ONE << 3)) != 0) + eq0 |= (value03 == ZERO); + + if ((mask & (MASK_ONE << 4)) != 0) + eq0 |= (value04 == ZERO); + + if ((mask & (MASK_ONE << 5)) != 0) + eq0 |= (value05 == ZERO); + + if ((mask & (MASK_ONE << 6)) != 0) + eq0 |= (value06 == ZERO); + + if ((mask & (MASK_ONE << 7)) != 0) + eq0 |= (value07 == ZERO); + + if ((mask & (MASK_ONE << 8)) != 0) + eq0 |= (value08 == ZERO); + + if ((mask & (MASK_ONE << 9)) != 0) + eq0 |= (value09 == ZERO); + + if ((mask & (MASK_ONE << 10)) != 0) + eq0 |= (value10 == ZERO); + + if ((mask & (MASK_ONE << 11)) != 0) + eq0 |= (value11 == ZERO); + + if ((mask & (MASK_ONE << 12)) != 0) + eq0 |= (value12 == ZERO); + + if ((mask & (MASK_ONE << 13)) != 0) + eq0 |= (value13 == ZERO); + + if ((mask & (MASK_ONE << 14)) != 0) + eq0 |= (value14 == ZERO); + + if ((mask & (MASK_ONE << 15)) != 0) + eq0 |= (value15 == ZERO); + + if ((mask & (MASK_ONE << 16)) != 0) + eq0 |= (value16 == ZERO); + + if ((mask & (MASK_ONE << 17)) != 0) + eq0 |= (value17 == ZERO); + + if ((mask & (MASK_ONE << 18)) != 0) + eq0 |= (value18 == ZERO); + + if ((mask & (MASK_ONE << 19)) != 0) + eq0 |= (value19 == ZERO); + + if ((mask & (MASK_ONE << 20)) != 0) + eq0 |= (value20 == ZERO); + + if ((mask & (MASK_ONE << 21)) != 0) + eq0 |= (value21 == ZERO); + + if ((mask & (MASK_ONE << 22)) != 0) + eq0 |= (value22 == ZERO); + + if ((mask & (MASK_ONE << 23)) != 0) + eq0 |= (value23 == ZERO); + + if ((mask & (MASK_ONE << 24)) != 0) + eq0 |= (value24 == ZERO); + + if ((mask & (MASK_ONE << 25)) != 0) + eq0 |= (value25 == ZERO); + + if ((mask & (MASK_ONE << 26)) != 0) + eq0 |= (value26 == ZERO); + + if ((mask & (MASK_ONE << 27)) != 0) + eq0 |= (value27 == ZERO); + + if ((mask & (MASK_ONE << 28)) != 0) + eq0 |= (value28 == ZERO); + + if ((mask & (MASK_ONE << 29)) != 0) + eq0 |= (value29 == ZERO); + + if ((mask & (MASK_ONE << 30)) != 0) + eq0 |= (value30 == ZERO); + + if ((mask & (MASK_ONE << 31)) != 0) + eq0 |= (value31 == ZERO); + + if ((mask & (MASK_ONE << 32)) != 0) + eq0 |= (value32 == ZERO); + + if ((mask & (MASK_ONE << 33)) != 0) + eq0 |= (value33 == ZERO); + + if ((mask & (MASK_ONE << 34)) != 0) + eq0 |= (value34 == ZERO); + + if ((mask & (MASK_ONE << 35)) != 0) + eq0 |= (value35 == ZERO); + + if ((mask & (MASK_ONE << 36)) != 0) + eq0 |= (value36 == ZERO); + + if ((mask & (MASK_ONE << 37)) != 0) + eq0 |= (value37 == ZERO); + + if ((mask & (MASK_ONE << 38)) != 0) + eq0 |= (value38 == ZERO); + + if ((mask & (MASK_ONE << 39)) != 0) + eq0 |= (value39 == ZERO); + + *eq0_ptr++ = eq0; + } + + return ( value00 + value01 + value02 + value03 + value04 + + value05 + value06 + value07 + value08 + value09 + + value10 + value11 + value12 + value13 + value14 + + value15 + value16 + value17 + value18 + value19 + + value20 + value21 + value22 + value23 + value24 + + value25 + value26 + value27 + value28 + value29 + + value30 + value31 + value32 + value33 + value34 + + value35 + value36 + value37 + value38 + value39); +} + +/* { dg-final { scan-assembler "fadds" } } */ +/* { dg-final { scan-assembler "fsubs" } } */ +/* { dg-final { scan-assembler "fmuls" } } */ +/* { dg-final { scan-assembler "fdivs" } } */ +/* { dg-final { scan-assembler "fcmpu" } } */ +/* { dg-final { scan-assembler "xsaddsp" } } */ +/* { dg-final { scan-assembler "xssubsp" } } */ +/* { dg-final { scan-assembler "xsmulsp" } } */ +/* { dg-final { scan-assembler "xsdivsp" } } */ +/* { dg-final { scan-assembler "xscmpudp" } } */ |