diff options
Diffstat (limited to 'gcc/config/rs6000')
-rw-r--r-- | gcc/config/rs6000/altivec.h | 4 | ||||
-rw-r--r-- | gcc/config/rs6000/altivec.md | 55 | ||||
-rw-r--r-- | gcc/config/rs6000/paired.md | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/ppc-auxv.h | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/predicates.md | 8 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-builtin.def | 22 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-c.c | 121 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-cpus.def | 31 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 721 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.h | 11 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.md | 67 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.opt | 31 | ||||
-rw-r--r-- | gcc/config/rs6000/rtems.h | 206 | ||||
-rw-r--r-- | gcc/config/rs6000/sysv4.h | 46 | ||||
-rw-r--r-- | gcc/config/rs6000/t-rtems | 6 | ||||
-rw-r--r-- | gcc/config/rs6000/vector.md | 14 | ||||
-rw-r--r-- | gcc/config/rs6000/vsx.md | 316 |
18 files changed, 881 insertions, 784 deletions
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 71cdca523df..c8e508cf0a0 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -355,6 +355,7 @@ #define vec_vsx_ld __builtin_vec_vsx_ld #define vec_vsx_st __builtin_vec_vsx_st #define vec_xl __builtin_vec_vsx_ld +#define vec_xl_be __builtin_vec_xl_be #define vec_xst __builtin_vec_vsx_st /* Note, xxsldi and xxpermdi were added as __builtin_vsx_<xxx> functions @@ -449,6 +450,9 @@ #define vec_insert_exp __builtin_vec_insert_exp #define vec_test_data_class __builtin_vec_test_data_class +#define vec_extract_fp_from_shorth __builtin_vec_vextract_fp_from_shorth +#define vec_extract_fp_from_shortl __builtin_vec_vextract_fp_from_shortl + #define scalar_extract_exp __builtin_vec_scalar_extract_exp #define scalar_extract_sig __builtin_vec_scalar_extract_sig #define scalar_insert_exp __builtin_vec_scalar_insert_exp diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 91c56512308..4077afdadb6 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -36,10 +36,14 @@ UNSPEC_VMULESB UNSPEC_VMULEUH UNSPEC_VMULESH + UNSPEC_VMULEUW + UNSPEC_VMULESW UNSPEC_VMULOUB UNSPEC_VMULOSB UNSPEC_VMULOUH UNSPEC_VMULOSH + UNSPEC_VMULOUW + UNSPEC_VMULOSW UNSPEC_VPKPX UNSPEC_VPACK_SIGN_SIGN_SAT UNSPEC_VPACK_SIGN_UNS_SAT @@ -307,7 +311,7 @@ for (i = 0; i < num_elements; i++) RTVEC_ELT (v, i) = constm1_rtx; - emit_insn (gen_vec_initv4si (dest, gen_rtx_PARALLEL (mode, v))); + emit_insn (gen_vec_initv4sisi (dest, gen_rtx_PARALLEL (mode, v))); emit_insn (gen_rtx_SET (dest, gen_rtx_ASHIFT (mode, dest, dest))); DONE; }) @@ -1538,6 +1542,41 @@ "vmulosh %0,%1,%2" [(set_attr "type" "veccomplex")]) +(define_insn "altivec_vmuleuw" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMULEUW))] + "TARGET_P8_VECTOR" + "vmuleuw %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmulouw" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMULOUW))] + "TARGET_P8_VECTOR" + "vmulouw %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmulesw" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMULESW))] + "TARGET_P8_VECTOR" + "vmulesw %0,%1,%2" + [(set_attr "type" "veccomplex")]) + +(define_insn "altivec_vmulosw" + [(set (match_operand:V2DI 0 "register_operand" "=v") + (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")] + UNSPEC_VMULOSW))] + "TARGET_P8_VECTOR" + "vmulosw %0,%1,%2" + [(set_attr "type" "veccomplex")]) ;; Vector pack/unpack (define_insn "altivec_vpkpx" @@ -2228,7 +2267,7 @@ RTVEC_ELT (v, 2) = GEN_INT (mask_val); RTVEC_ELT (v, 3) = GEN_INT (mask_val); - emit_insn (gen_vec_initv4si (mask, gen_rtx_PARALLEL (V4SImode, v))); + emit_insn (gen_vec_initv4sisi (mask, gen_rtx_PARALLEL (V4SImode, v))); emit_insn (gen_vector_select_v4sf (operands[0], operands[1], operands[2], gen_lowpart (V4SFmode, mask))); DONE; @@ -3370,7 +3409,7 @@ RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 : 0); RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 7 : 16); - emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); + emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask)); DONE; }") @@ -3406,7 +3445,7 @@ RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 6 : 17); RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 7 : 16); - emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); + emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask)); DONE; }") @@ -3442,7 +3481,7 @@ RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 : 8); RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16); - emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); + emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask)); DONE; }") @@ -3478,7 +3517,7 @@ RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 14 : 17); RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16); - emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); + emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask)); DONE; }") @@ -3719,7 +3758,7 @@ = gen_rtx_CONST_INT (QImode, BYTES_BIG_ENDIAN ? 2 * i + 17 : 15 - 2 * i); } - emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); + emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); emit_insn (gen_altivec_vmulesb (even, operands[1], operands[2])); emit_insn (gen_altivec_vmulosb (odd, operands[1], operands[2])); emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], even, odd, mask)); @@ -3765,7 +3804,7 @@ RTVEC_ELT (v, i + j * size) = GEN_INT (i + (num_elements - 1 - j) * size); - emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); + emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1], operands[1], mask)); DONE; diff --git a/gcc/config/rs6000/paired.md b/gcc/config/rs6000/paired.md index c9f95867c0f..b0aa329d7b8 100644 --- a/gcc/config/rs6000/paired.md +++ b/gcc/config/rs6000/paired.md @@ -377,7 +377,7 @@ "ps_muls1 %0, %1, %2" [(set_attr "type" "fp")]) -(define_expand "vec_initv2sf" +(define_expand "vec_initv2sfsf" [(match_operand:V2SF 0 "gpc_reg_operand" "=f") (match_operand 1 "" "")] "TARGET_PAIRED_FLOAT" diff --git a/gcc/config/rs6000/ppc-auxv.h b/gcc/config/rs6000/ppc-auxv.h index c7e2e0bfbf2..dcee28a8152 100644 --- a/gcc/config/rs6000/ppc-auxv.h +++ b/gcc/config/rs6000/ppc-auxv.h @@ -89,6 +89,8 @@ #define PPC_FEATURE2_HTM_NOSC 0x01000000 #define PPC_FEATURE2_ARCH_3_00 0x00800000 #define PPC_FEATURE2_HAS_IEEE128 0x00400000 +#define PPC_FEATURE2_DARN 0x00200000 +#define PPC_FEATURE2_SCV 0x00100000 /* Thread Control Block (TCB) offsets of the AT_PLATFORM, AT_HWCAP and diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index aa1c01b93dd..466f9131aa0 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -783,10 +783,8 @@ (and (and (match_code "mem") (match_test "MEM_VOLATILE_P (op)")) (if_then_else (match_test "reload_completed") - (match_operand 0 "memory_operand") - (if_then_else (match_test "reload_in_progress") - (match_test "strict_memory_address_p (mode, XEXP (op, 0))") - (match_test "memory_address_p (mode, XEXP (op, 0))"))))) + (match_operand 0 "memory_operand") + (match_test "memory_address_p (mode, XEXP (op, 0))")))) ;; Return 1 if the operand is an offsettable memory operand. (define_predicate "offsettable_mem_operand" @@ -1142,7 +1140,7 @@ if (! volatile_ok && MEM_VOLATILE_P (op)) return 0; - if (reload_in_progress || lra_in_progress || reload_completed) + if (lra_in_progress || reload_completed) return indexed_or_indirect_address (addr, vmode); else return memory_address_addr_space_p (vmode, addr, MEM_ADDR_SPACE (op)); diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index bf2c90b49fb..850164a0987 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -1031,10 +1031,14 @@ BU_ALTIVEC_2 (VMULEUB, "vmuleub", CONST, vec_widen_umult_even_v16qi) BU_ALTIVEC_2 (VMULESB, "vmulesb", CONST, vec_widen_smult_even_v16qi) BU_ALTIVEC_2 (VMULEUH, "vmuleuh", CONST, vec_widen_umult_even_v8hi) BU_ALTIVEC_2 (VMULESH, "vmulesh", CONST, vec_widen_smult_even_v8hi) +BU_ALTIVEC_2 (VMULEUW, "vmuleuw", CONST, altivec_vmuleuw) +BU_ALTIVEC_2 (VMULESW, "vmulesw", CONST, altivec_vmulesw) BU_ALTIVEC_2 (VMULOUB, "vmuloub", CONST, vec_widen_umult_odd_v16qi) BU_ALTIVEC_2 (VMULOSB, "vmulosb", CONST, vec_widen_smult_odd_v16qi) BU_ALTIVEC_2 (VMULOUH, "vmulouh", CONST, vec_widen_umult_odd_v8hi) BU_ALTIVEC_2 (VMULOSH, "vmulosh", CONST, vec_widen_smult_odd_v8hi) +BU_ALTIVEC_2 (VMULOUW, "vmulouw", CONST, altivec_vmulouw) +BU_ALTIVEC_2 (VMULOSW, "vmulosw", CONST, altivec_vmulosw) BU_ALTIVEC_2 (VNOR, "vnor", CONST, norv4si3) BU_ALTIVEC_2 (VOR, "vor", CONST, iorv4si3) BU_ALTIVEC_2 (VPKUHUM, "vpkuhum", CONST, altivec_vpkuhum) @@ -1353,12 +1357,16 @@ BU_ALTIVEC_OVERLOAD_2 (VMRGLH, "vmrglh") BU_ALTIVEC_OVERLOAD_2 (VMRGLW, "vmrglw") BU_ALTIVEC_OVERLOAD_2 (VMULESB, "vmulesb") BU_ALTIVEC_OVERLOAD_2 (VMULESH, "vmulesh") +BU_ALTIVEC_OVERLOAD_2 (VMULESW, "vmulesw") BU_ALTIVEC_OVERLOAD_2 (VMULEUB, "vmuleub") BU_ALTIVEC_OVERLOAD_2 (VMULEUH, "vmuleuh") +BU_ALTIVEC_OVERLOAD_2 (VMULEUW, "vmuleuw") BU_ALTIVEC_OVERLOAD_2 (VMULOSB, "vmulosb") BU_ALTIVEC_OVERLOAD_2 (VMULOSH, "vmulosh") +BU_ALTIVEC_OVERLOAD_2 (VMULOSW, "vmulosw") BU_ALTIVEC_OVERLOAD_2 (VMULOUB, "vmuloub") BU_ALTIVEC_OVERLOAD_2 (VMULOUH, "vmulouh") +BU_ALTIVEC_OVERLOAD_2 (VMULOUW, "vmulouw") BU_ALTIVEC_OVERLOAD_2 (VPKSHSS, "vpkshss") BU_ALTIVEC_OVERLOAD_2 (VPKSHUS, "vpkshus") BU_ALTIVEC_OVERLOAD_2 (VPKSWSS, "vpkswss") @@ -1727,6 +1735,14 @@ BU_VSX_X (LXVW4X_V4SF, "lxvw4x_v4sf", MEM) BU_VSX_X (LXVW4X_V4SI, "lxvw4x_v4si", MEM) BU_VSX_X (LXVW4X_V8HI, "lxvw4x_v8hi", MEM) BU_VSX_X (LXVW4X_V16QI, "lxvw4x_v16qi", MEM) + +BU_VSX_X (XL_BE_V16QI, "xl_be_v16qi", MEM) +BU_VSX_X (XL_BE_V8HI, "xl_be_v8hi", MEM) +BU_VSX_X (XL_BE_V4SI, "xl_be_v4si", MEM) +BU_VSX_X (XL_BE_V2DI, "xl_be_v2di", MEM) +BU_VSX_X (XL_BE_V4SF, "xl_be_v4sf", MEM) +BU_VSX_X (XL_BE_V2DF, "xl_be_v2df", MEM) + BU_VSX_X (STXSDX, "stxsdx", MEM) BU_VSX_X (STXVD2X_V1TI, "stxvd2x_v1ti", MEM) BU_VSX_X (STXVD2X_V2DF, "stxvd2x_v2df", MEM) @@ -1827,6 +1843,7 @@ BU_VSX_OVERLOAD_1 (VUNSIGNEDO, "vunsignedo") BU_VSX_OVERLOAD_X (LD, "ld") BU_VSX_OVERLOAD_X (ST, "st") BU_VSX_OVERLOAD_X (XL, "xl") +BU_VSX_OVERLOAD_X (XL_BE, "xl_be") BU_VSX_OVERLOAD_X (XST, "xst") /* 2 argument CMPB instructions added in ISA 2.05. */ @@ -2058,6 +2075,9 @@ BU_P9V_OVERLOAD_1 (VSTDCNSP, "scalar_test_neg_sp") BU_P9V_OVERLOAD_1 (REVB, "revb") +BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_SHORTH, "vextract_fp_from_shorth") +BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_SHORTL, "vextract_fp_from_shortl") + /* ISA 3.0 vector scalar overloaded 2 argument functions. */ BU_P9V_OVERLOAD_2 (VSIEDP, "scalar_insert_exp") @@ -2076,6 +2096,8 @@ BU_P9V_VSX_1 (VEEDP, "extract_exp_dp", CONST, xvxexpdp) BU_P9V_VSX_1 (VEESP, "extract_exp_sp", CONST, xvxexpsp) BU_P9V_VSX_1 (VESDP, "extract_sig_dp", CONST, xvxsigdp) BU_P9V_VSX_1 (VESSP, "extract_sig_sp", CONST, xvxsigsp) +BU_P9V_VSX_1 (VEXTRACT_FP_FROM_SHORTH, "vextract_fp_from_shorth", CONST, vextract_fp_from_shorth) +BU_P9V_VSX_1 (VEXTRACT_FP_FROM_SHORTL, "vextract_fp_from_shortl", CONST, vextract_fp_from_shortl) /* 2 argument vsx vector functions added in ISA 3.0 (power9). */ BU_P9V_VSX_2 (VIEDP, "insert_exp_dp", CONST, xviexpdp) diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index 2a361260759..11febbb4d46 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -575,40 +575,6 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, 2. If TARGET_ALTIVEC is turned off. */ if ((flags & OPTION_MASK_CRYPTO) != 0) rs6000_define_or_undefine_macro (define_p, "__CRYPTO__"); - /* Note that the OPTION_MASK_UPPER_REGS_DF flag is automatically - turned on in the following conditions: - 1. If TARGET_UPPER_REGS is explicitly turned on and - TARGET_VSX is turned on and OPTION_MASK_UPPER_REGS_DF is not - explicitly turned off. Hereafter, the - OPTION_MASK_UPPER_REGS_DF flag is considered to have been - explicitly set. - Note that the OPTION_MASK_UPPER_REGS_DF flag is automatically - turned off in the following conditions: - 1. If TARGET_UPPER_REGS is explicitly turned off and TARGET_VSX - is turned on and OPTION_MASK_UPPER_REGS_DF is not explicitly - turned on. Hereafter, the OPTION_MASK_UPPER_REGS_DF flag is - considered to have been explicitly cleared. - 2. If TARGET_UPPER_REGS_DF is turned on but TARGET_VSX is turned - off. */ - if ((flags & OPTION_MASK_UPPER_REGS_DF) != 0) - rs6000_define_or_undefine_macro (define_p, "__UPPER_REGS_DF__"); - /* Note that the OPTION_MASK_UPPER_REGS_SF flag is automatically - turned on in the following conditions: - 1. If TARGET_UPPER_REGS is explicitly turned on and - TARGET_P8_VECTOR is on and OPTION_MASK_UPPER_REGS_SF is not - turned off explicitly. Hereafter, the - OPTION_MASK_UPPER_REGS_SF flag is considered to have been - explicitly set. - Note that the OPTION_MASK_UPPER_REGS_SF flag is automatically - turned off in the following conditions: - 1. If TARGET_UPPER_REGS is explicitly turned off and - TARGET_P8_VECTOR is on and OPTION_MASK_UPPER_REGS_SF is not - turned off explicitly. Hereafter, the - OPTION_MASK_UPPER_REGS_SF flag is considered to have been - explicitly cleared. - 2. If TARGET_P8_VECTOR is off. */ - if ((flags & OPTION_MASK_UPPER_REGS_SF) != 0) - rs6000_define_or_undefine_macro (define_p, "__UPPER_REGS_SF__"); /* options from the builtin masks. */ /* Note that RS6000_BTM_PAIRED is enabled only if @@ -2232,9 +2198,9 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESH, RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 }, - { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESH, + { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESW, RS6000_BTI_V2DI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, - { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUH, + { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUW, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_VMULEUB, ALTIVEC_BUILTIN_VMULEUB, @@ -2251,9 +2217,9 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUH, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 }, - { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH, + { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSW, RS6000_BTI_V2DI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, - { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUH, + { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUW, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH, @@ -3111,6 +3077,26 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { ~RS6000_BTI_unsigned_V16QI, 0 }, { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, + { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, @@ -5184,6 +5170,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B, RS6000_BTI_INTDI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI, 0 }, + { P9V_BUILTIN_VEC_VEXTRACT_FP_FROM_SHORTH, P9V_BUILTIN_VEXTRACT_FP_FROM_SHORTH, + RS6000_BTI_V4SF, RS6000_BTI_unsigned_V8HI, 0, 0 }, + { P9V_BUILTIN_VEC_VEXTRACT_FP_FROM_SHORTL, P9V_BUILTIN_VEXTRACT_FP_FROM_SHORTL, + RS6000_BTI_V4SF, RS6000_BTI_unsigned_V8HI, 0, 0 }, + { P9V_BUILTIN_VEC_VEXTULX, P9V_BUILTIN_VEXTUBLX, RS6000_BTI_INTQI, RS6000_BTI_UINTSI, RS6000_BTI_V16QI, 0 }, @@ -5881,6 +5872,12 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, tree arg1 = (*arglist)[1]; tree arg1_type = TREE_TYPE (arg1); + /* Both arguments must be vectors and the types must be compatible. */ + if (TREE_CODE (arg0_type) != VECTOR_TYPE) + goto bad; + if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)) + goto bad; + /* Power9 instructions provide the most efficient implementation of ALTIVEC_BUILTIN_VEC_CMPNE if the mode is not DImode or TImode or SFmode or DFmode. */ @@ -5890,12 +5887,6 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, || (TYPE_MODE (TREE_TYPE (arg0_type)) == SFmode) || (TYPE_MODE (TREE_TYPE (arg0_type)) == DFmode)) { - /* Both arguments must be vectors and the types must be compatible. */ - if (TREE_CODE (arg0_type) != VECTOR_TYPE) - goto bad; - if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)) - goto bad; - switch (TYPE_MODE (TREE_TYPE (arg0_type))) { /* vec_cmpneq (va, vb) == vec_nor (vec_cmpeq (va, vb), @@ -5960,8 +5951,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, __int128) and the types must be compatible. */ if (TREE_CODE (arg0_type) != VECTOR_TYPE) goto bad; - if (!lang_hooks.types_compatible_p (arg0_type, arg1_type) || - !lang_hooks.types_compatible_p (arg1_type, arg2_type)) + if (!lang_hooks.types_compatible_p (arg0_type, arg1_type) + || !lang_hooks.types_compatible_p (arg1_type, arg2_type)) goto bad; switch (TYPE_MODE (TREE_TYPE (arg0_type))) @@ -6043,8 +6034,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, __int128) and the types must be compatible. */ if (TREE_CODE (arg0_type) != VECTOR_TYPE) goto bad; - if (!lang_hooks.types_compatible_p (arg0_type, arg1_type) || - !lang_hooks.types_compatible_p (arg1_type, arg2_type)) + if (!lang_hooks.types_compatible_p (arg0_type, arg1_type) + || !lang_hooks.types_compatible_p (arg1_type, arg2_type)) goto bad; switch (TYPE_MODE (TREE_TYPE (arg0_type))) @@ -6493,6 +6484,9 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, /* Strip qualifiers like "const" from the pointer arg. */ tree arg1_type = TREE_TYPE (arg1); + if (!POINTER_TYPE_P (arg1_type) && TREE_CODE (arg1_type) != ARRAY_TYPE) + goto bad; + tree inner_type = TREE_TYPE (arg1_type); if (TYPE_QUALS (TREE_TYPE (arg1_type)) != 0) { @@ -6581,11 +6575,6 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, arg2 = build1 (ADDR_EXPR, arg2_type, arg2_elt0); } - tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg2_type, - arg2, arg1); - tree aligned = fold_build2_loc (loc, BIT_AND_EXPR, arg2_type, addr, - build_int_cst (arg2_type, -16)); - /* Find the built-in to make sure a compatible one exists; if not we fall back to default handling to get the error message. */ for (desc = altivec_overloaded_builtins; @@ -6598,6 +6587,12 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, && rs6000_builtin_type_compatible (TREE_TYPE (arg2), desc->op3)) { + tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg2_type, + arg2, arg1); + tree aligned + = fold_build2_loc (loc, BIT_AND_EXPR, arg2_type, + addr, build_int_cst (arg2_type, -16)); + tree arg0_type = TREE_TYPE (arg0); if (TYPE_MODE (arg0_type) == V2DImode) /* Type-based aliasing analysis thinks vector long @@ -6723,8 +6718,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, overloaded_code = P6_BUILTIN_CMPB_32; } - while (desc->code && desc->code == fcode && - desc->overloaded_code != overloaded_code) + while (desc->code && desc->code == fcode + && desc->overloaded_code != overloaded_code) desc++; if (desc->code && (desc->code == fcode) @@ -6770,8 +6765,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, else overloaded_code = P9V_BUILTIN_VSIEDP; } - while (desc->code && desc->code == fcode && - desc->overloaded_code != overloaded_code) + while (desc->code && desc->code == fcode + && desc->overloaded_code != overloaded_code) desc++; if (desc->code && (desc->code == fcode) && rs6000_builtin_type_compatible (types[0], desc->op1) @@ -6807,15 +6802,15 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, if (unsupported_builtin) { const char *name = rs6000_overloaded_builtin_name (fcode); - error ("Builtin function %s not supported in this compiler configuration", + error ("builtin function %s not supported in this compiler configuration", name); return error_mark_node; } } bad: - { - const char *name = rs6000_overloaded_builtin_name (fcode); - error ("invalid parameter combination for AltiVec intrinsic %s", name); - return error_mark_node; - } + { + const char *name = rs6000_overloaded_builtin_name (fcode); + error ("invalid parameter combination for AltiVec intrinsic %s", name); + return error_mark_node; + } } diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index cd5c70688d8..190f9123fa0 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -44,9 +44,7 @@ #define ISA_2_6_MASKS_SERVER (ISA_2_5_MASKS_SERVER \ | OPTION_MASK_POPCNTD \ | OPTION_MASK_ALTIVEC \ - | OPTION_MASK_VSX \ - | OPTION_MASK_UPPER_REGS_DI \ - | OPTION_MASK_UPPER_REGS_DF) + | OPTION_MASK_VSX) /* For now, don't provide an embedded version of ISA 2.07. */ #define ISA_2_7_MASKS_SERVER (ISA_2_6_MASKS_SERVER \ @@ -57,9 +55,7 @@ | OPTION_MASK_EFFICIENT_UNALIGNED_VSX \ | OPTION_MASK_HTM \ | OPTION_MASK_QUAD_MEMORY \ - | OPTION_MASK_QUAD_MEMORY_ATOMIC \ - | OPTION_MASK_UPPER_REGS_SF \ - | OPTION_MASK_VSX_SMALL_INTEGER) + | OPTION_MASK_QUAD_MEMORY_ATOMIC) /* Add ISEL back into ISA 3.0, since it is supposed to be a win. Do not add FLOAT128_HW here until we are ready to make -mfloat128 on by default. */ @@ -78,11 +74,7 @@ #define ISA_3_0_MASKS_IEEE (OPTION_MASK_VSX \ | OPTION_MASK_P8_VECTOR \ | OPTION_MASK_P9_VECTOR \ - | OPTION_MASK_DIRECT_MOVE \ - | OPTION_MASK_UPPER_REGS_DI \ - | OPTION_MASK_UPPER_REGS_DF \ - | OPTION_MASK_UPPER_REGS_SF \ - | OPTION_MASK_VSX_SMALL_INTEGER) + | OPTION_MASK_DIRECT_MOVE) /* Flags that need to be turned off if -mno-power9-vector. */ #define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW \ @@ -94,8 +86,7 @@ #define OTHER_P8_VECTOR_MASKS (OTHER_P9_VECTOR_MASKS \ | OPTION_MASK_P9_VECTOR \ | OPTION_MASK_DIRECT_MOVE \ - | OPTION_MASK_CRYPTO \ - | OPTION_MASK_UPPER_REGS_SF) \ + | OPTION_MASK_CRYPTO) /* Flags that need to be turned off if -mno-vsx. */ #define OTHER_VSX_VECTOR_MASKS (OTHER_P8_VECTOR_MASKS \ @@ -103,9 +94,6 @@ | OPTION_MASK_FLOAT128_KEYWORD \ | OPTION_MASK_FLOAT128_TYPE \ | OPTION_MASK_P8_VECTOR \ - | OPTION_MASK_UPPER_REGS_DI \ - | OPTION_MASK_UPPER_REGS_DF \ - | OPTION_MASK_VSX_SMALL_INTEGER \ | OPTION_MASK_VSX_TIMODE) #define POWERPC_7400_MASK (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC) @@ -135,7 +123,6 @@ | OPTION_MASK_FPRND \ | OPTION_MASK_HTM \ | OPTION_MASK_ISEL \ - | OPTION_MASK_LRA \ | OPTION_MASK_MFCRF \ | OPTION_MASK_MFPGPR \ | OPTION_MASK_MODULO \ @@ -160,11 +147,7 @@ | OPTION_MASK_SOFT_FLOAT \ | OPTION_MASK_STRICT_ALIGN_OPTIONAL \ | OPTION_MASK_TOC_FUSION \ - | OPTION_MASK_UPPER_REGS_DI \ - | OPTION_MASK_UPPER_REGS_DF \ - | OPTION_MASK_UPPER_REGS_SF \ | OPTION_MASK_VSX \ - | OPTION_MASK_VSX_SMALL_INTEGER \ | OPTION_MASK_VSX_TIMODE) #endif @@ -251,11 +234,7 @@ RS6000_CPU ("power6", PROCESSOR_POWER6, MASK_POWERPC64 | MASK_PPC_GPOPT RS6000_CPU ("power6x", PROCESSOR_POWER6, MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_MFPGPR | MASK_RECIP_PRECISION) -RS6000_CPU ("power7", PROCESSOR_POWER7, /* Don't add MASK_ISEL by default */ - POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF - | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD - | MASK_VSX | MASK_RECIP_PRECISION | OPTION_MASK_UPPER_REGS_DF - | OPTION_MASK_UPPER_REGS_DI) +RS6000_CPU ("power7", PROCESSOR_POWER7, MASK_POWERPC64 | ISA_2_6_MASKS_SERVER) RS6000_CPU ("power8", PROCESSOR_POWER8, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER) RS6000_CPU ("power9", PROCESSOR_POWER9, MASK_POWERPC64 | ISA_3_0_MASKS_SERVER) RS6000_CPU ("powerpc", PROCESSOR_POWERPC, 0) diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index aeec9b2f1c2..144bdb26fa4 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -151,10 +151,10 @@ extern rtx rs6000_longcall_ref (rtx); extern void rs6000_fatal_bad_address (rtx); extern rtx create_TOC_reference (rtx, rtx); extern void rs6000_split_multireg_move (rtx, rtx); +extern void rs6000_emit_le_vsx_permute (rtx, rtx, machine_mode); extern void rs6000_emit_le_vsx_move (rtx, rtx, machine_mode); extern bool valid_sf_si_move (rtx, rtx, machine_mode); extern void rs6000_emit_move (rtx, rtx, machine_mode); -extern rtx rs6000_secondary_memory_needed_rtx (machine_mode); extern machine_mode rs6000_secondary_memory_needed_mode (machine_mode); extern rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int, int, int *); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 988926b8d59..74158cdd075 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -143,10 +143,6 @@ typedef struct GTY(()) machine_function /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4 varargs save area. */ HOST_WIDE_INT varargs_save_offset; - /* Temporary stack slot to use for SDmode copies. This slot is - 64-bits wide and is allocated early enough so that the offset - does not overflow the 16-bit load/store offset field. */ - rtx sdmode_stack_slot; /* Alternative internal arg pointer for -fsplit-stack. */ rtx split_stack_arg_pointer; bool split_stack_argp_used; @@ -379,7 +375,9 @@ static const struct { "tar", PPC_FEATURE2_HAS_TAR, 1 }, { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 }, { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 }, - { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 } + { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }, + { "darn", PPC_FEATURE2_DARN, 1 }, + { "scv", PPC_FEATURE2_SCV, 1 } }; /* On PowerPC, we have a limited number of target clones that we care about @@ -437,7 +435,7 @@ enum rs6000_reg_type { ALTIVEC_REG_TYPE, FPR_REG_TYPE, SPR_REG_TYPE, - CR_REG_TYPE, + CR_REG_TYPE }; /* Map register class to register type. */ @@ -1872,12 +1870,6 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_BUILTIN_RECIPROCAL #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal -#undef TARGET_EXPAND_TO_RTL_HOOK -#define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot - -#undef TARGET_INSTANTIATE_DECLS -#define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls - #undef TARGET_SECONDARY_RELOAD #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload @@ -1887,9 +1879,6 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_MODE_DEPENDENT_ADDRESS_P #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p -#undef TARGET_LRA_P -#define TARGET_LRA_P rs6000_lra_p - #undef TARGET_COMPUTE_PRESSURE_CLASSES #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes @@ -2104,14 +2093,11 @@ rs6000_hard_regno_mode_ok (int regno, machine_mode mode) if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD) return 1; - if (TARGET_VSX_SMALL_INTEGER) - { - if (mode == SImode) - return 1; + if (TARGET_P8_VECTOR && (mode == SImode)) + return 1; - if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode)) - return 1; - } + if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode)) + return 1; } if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT @@ -2793,8 +2779,6 @@ rs6000_debug_reg_global (void) if (TARGET_LINK_STACK) fprintf (stderr, DEBUG_FMT_S, "link_stack", "true"); - fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false"); - if (TARGET_P8_FUSION) { char options[80]; @@ -2907,9 +2891,7 @@ rs6000_setup_reg_addr_masks (void) && !VECTOR_MODE_P (m2) && !FLOAT128_VECTOR_P (m2) && !complex_p - && !small_int_vsx_p - && (m2 != DFmode || !TARGET_UPPER_REGS_DF) - && (m2 != SFmode || !TARGET_UPPER_REGS_SF)) + && !small_int_vsx_p) { addr_mask |= RELOAD_REG_PRE_INCDEC; @@ -3218,22 +3200,12 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS; rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */ rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */ + rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; /* DFmode */ + rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; /* DFmode */ + rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS; /* DImode */ if (TARGET_VSX_TIMODE) rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */ - - if (TARGET_UPPER_REGS_DF) /* DFmode */ - { - rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; - rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; - } - else - rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS; - - if (TARGET_UPPER_REGS_DI) /* DImode */ - rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS; - else - rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; } /* Add conditional constraints based on various options, to allow us to @@ -3263,7 +3235,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS; } - if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */ + if (TARGET_P8_VECTOR) /* SFmode */ { rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS; rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS; @@ -3303,7 +3275,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS; /* Support small integers in VSX registers. */ - if (TARGET_VSX_SMALL_INTEGER) + if (TARGET_P8_VECTOR) { rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS; rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS; @@ -3458,18 +3430,14 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) } } - if (TARGET_UPPER_REGS_DF) - reg_addr[DFmode].scalar_in_vmx_p = true; - - if (TARGET_UPPER_REGS_DI) - reg_addr[DImode].scalar_in_vmx_p = true; + reg_addr[DFmode].scalar_in_vmx_p = true; + reg_addr[DImode].scalar_in_vmx_p = true; - if (TARGET_UPPER_REGS_SF) - reg_addr[SFmode].scalar_in_vmx_p = true; - - if (TARGET_VSX_SMALL_INTEGER) + if (TARGET_P8_VECTOR) { + reg_addr[SFmode].scalar_in_vmx_p = true; reg_addr[SImode].scalar_in_vmx_p = true; + if (TARGET_P9_VECTOR) { reg_addr[HImode].scalar_in_vmx_p = true; @@ -4214,6 +4182,10 @@ rs6000_option_override_internal (bool global_init_p) rs6000_altivec_element_order = 0; } + if (!rs6000_fold_gimple) + fprintf (stderr, + "gimple folding of rs6000 builtins has been disabled.\n"); + /* Add some warnings for VSX. */ if (TARGET_VSX) { @@ -4277,20 +4249,12 @@ rs6000_option_override_internal (bool global_init_p) { if (cpu_index == PROCESSOR_POWER9) { - /* legacy behavior: allow -mcpu-power9 with certain + /* legacy behavior: allow -mcpu=power9 with certain capabilities explicitly disabled. */ rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks); - /* However, reject this automatic fix if certain - capabilities required for TARGET_P9_MINMAX support - have been explicitly disabled. */ - if (((OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF - | OPTION_MASK_UPPER_REGS_DF) & rs6000_isa_flags) - != (OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF - | OPTION_MASK_UPPER_REGS_DF)) - error ("-mpower9-minmax incompatible with explicitly disabled options"); - } + } else - error ("Power9 target option is incompatible with -mcpu=<xxx> for " + error ("power9 target option is incompatible with -mcpu=<xxx> for " "<xxx> less than power9"); } else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit) @@ -4374,73 +4338,6 @@ rs6000_option_override_internal (bool global_init_p) rs6000_isa_flags &= ~OPTION_MASK_DFP; } - /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di, - and -mupper-regs-sf, depending on the cpu, unless the user explicitly also - set the individual option. */ - if (TARGET_UPPER_REGS > 0) - { - if (TARGET_VSX - && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)) - { - rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF; - rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF; - } - if (TARGET_VSX - && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI)) - { - rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI; - rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI; - } - if (TARGET_P8_VECTOR - && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)) - { - rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF; - rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF; - } - } - else if (TARGET_UPPER_REGS == 0) - { - if (TARGET_VSX - && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)) - { - rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF; - rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF; - } - if (TARGET_VSX - && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI)) - { - rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI; - rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI; - } - if (TARGET_P8_VECTOR - && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)) - { - rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF; - rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF; - } - } - - if (TARGET_UPPER_REGS_DF && !TARGET_VSX) - { - if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF) - error ("-mupper-regs-df requires -mvsx"); - rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF; - } - - if (TARGET_UPPER_REGS_DI && !TARGET_VSX) - { - if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI) - error ("-mupper-regs-di requires -mvsx"); - rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI; - } - - if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR) - { - if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF) - error ("-mupper-regs-sf requires -mpower8-vector"); - rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF; - } - /* The quad memory instructions only works in 64-bit mode. In 32-bit mode, silently turn off quad memory mode. */ if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64) @@ -4649,53 +4546,10 @@ rs6000_option_override_internal (bool global_init_p) } } - if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF) - { - /* We prefer to not mention undocumented options in - error messages. However, if users have managed to select - power9-dform without selecting upper-regs-df, they - already know about undocumented flags. */ - if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF) - error ("-mpower9-dform requires -mupper-regs-df"); - rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR; - } - - if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF) - { - if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF) - error ("-mpower9-dform requires -mupper-regs-sf"); - rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR; - } - - /* Enable LRA by default. */ - if ((rs6000_isa_flags_explicit & OPTION_MASK_LRA) == 0) - rs6000_isa_flags |= OPTION_MASK_LRA; - - /* There have been bugs with -mvsx-timode that don't show up with -mlra, - but do show up with -mno-lra. Given -mlra will become the default once - PR 69847 is fixed, turn off the options with problems by default if - -mno-lra was used, and warn if the user explicitly asked for the option. - - Enable -mpower9-dform-vector by default if LRA and other power9 options. - Enable -mvsx-timode by default if LRA and VSX. */ - if (!TARGET_LRA) - { - if (TARGET_VSX_TIMODE) - { - if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0) - warning (0, "-mvsx-timode might need -mlra"); - - else - rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE; - } - } - - else - { - if (TARGET_VSX && !TARGET_VSX_TIMODE - && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0) - rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE; - } + /* Enable -mvsx-timode by default if VSX. */ + if (TARGET_VSX && !TARGET_VSX_TIMODE + && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0) + rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE; /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07 support. If we only have ISA 2.06 support, and the user did not specify @@ -4737,20 +4591,6 @@ rs6000_option_override_internal (bool global_init_p) } } - /* Check whether we should allow small integers into VSX registers. We - require direct move to prevent the register allocator from having to move - variables through memory to do moves. SImode can be used on ISA 2.07, - while HImode and QImode require ISA 3.0. */ - if (TARGET_VSX_SMALL_INTEGER - && (!TARGET_DIRECT_MOVE || !TARGET_P8_VECTOR || !TARGET_UPPER_REGS_DI)) - { - if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_SMALL_INTEGER) - error ("-mvsx-small-integer requires -mpower8-vector, " - "-mupper-regs-di, and -mdirect-move"); - - rs6000_isa_flags &= ~OPTION_MASK_VSX_SMALL_INTEGER; - } - /* Set long double size before the IEEE 128-bit tests. */ if (!global_options_set.x_rs6000_long_double_type_size) { @@ -5757,7 +5597,7 @@ rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, if (TARGET_P9_VECTOR) return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2; else - return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 11; + return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5; } else /* V2DFmode doesn't need a direct move. */ @@ -7443,8 +7283,7 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt) else if (mode == V2DImode) insn = gen_vsx_set_v2di (target, target, val, elt_rtx); - else if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER - && TARGET_UPPER_REGS_DI && TARGET_POWERPC64) + else if (TARGET_P9_VECTOR && TARGET_POWERPC64) { if (mode == V4SImode) insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx); @@ -8785,14 +8624,6 @@ legitimate_indexed_address_p (rtx x, int strict) op0 = XEXP (x, 0); op1 = XEXP (x, 1); - /* Recognize the rtl generated by reload which we know will later be - replaced with proper base and index regs. */ - if (!strict - && reload_in_progress - && (REG_P (op0) || GET_CODE (op0) == PLUS) - && REG_P (op1)) - return true; - return (REG_P (op0) && REG_P (op1) && ((INT_REG_OK_FOR_BASE_P (op0, strict) && INT_REG_OK_FOR_INDEX_P (op1, strict)) @@ -10036,9 +9867,7 @@ rs6000_debug_legitimate_address_p (machine_mode mode, rtx x, ret ? "true" : "false", GET_MODE_NAME (mode), reg_ok_strict, - (reload_completed - ? "after" - : (reload_in_progress ? "progress" : "before")), + (reload_completed ? "after" : "before"), GET_RTX_NAME (GET_CODE (x))); debug_rtx (x); @@ -10440,9 +10269,6 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) static void rs6000_eliminate_indexed_memrefs (rtx operands[2]) { - if (reload_in_progress) - return; - if (GET_CODE (operands[0]) == MEM && GET_CODE (XEXP (operands[0], 0)) != REG && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0), @@ -10501,19 +10327,30 @@ rs6000_const_vec (machine_mode mode) return v; } -/* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi - for a VSX load or store operation. */ -rtx -rs6000_gen_le_vsx_permute (rtx source, machine_mode mode) +/* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or + store operation. */ +void +rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode) { - /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and - 128-bit integers if they are allowed in VSX registers. */ - if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode) - return gen_rtx_ROTATE (mode, source, GEN_INT (64)); + /* Scalar permutations are easier to express in integer modes rather than + floating-point modes, so cast them here. We use V1TImode instead + of TImode to ensure that the values don't go through GPRs. */ + if (FLOAT128_VECTOR_P (mode)) + { + dest = gen_lowpart (V1TImode, dest); + source = gen_lowpart (V1TImode, source); + mode = V1TImode; + } + + /* Use ROTATE instead of VEC_SELECT if the mode contains only a single + scalar. */ + if (mode == TImode || mode == V1TImode) + emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source, + GEN_INT (64)))); else { rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode)); - return gen_rtx_VEC_SELECT (mode, source, par); + emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par))); } } @@ -10523,8 +10360,6 @@ rs6000_gen_le_vsx_permute (rtx source, machine_mode mode) void rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode) { - rtx tmp, permute_mem, permute_reg; - /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode, V1TImode). */ if (mode == TImode || mode == V1TImode) @@ -10534,11 +10369,9 @@ rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode) source = adjust_address (source, V2DImode, 0); } - tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest; - permute_mem = rs6000_gen_le_vsx_permute (source, mode); - permute_reg = rs6000_gen_le_vsx_permute (tmp, mode); - emit_insn (gen_rtx_SET (tmp, permute_mem)); - emit_insn (gen_rtx_SET (dest, permute_reg)); + rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest; + rs6000_emit_le_vsx_permute (tmp, source, mode); + rs6000_emit_le_vsx_permute (dest, tmp, mode); } /* Emit a little-endian store to vector memory location DEST from VSX @@ -10547,12 +10380,10 @@ rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode) void rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode) { - rtx tmp, permute_src, permute_tmp; - - /* This should never be called during or after reload, because it does + /* This should never be called during or after LRA, because it does not re-permute the source register. It is intended only for use during expand. */ - gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed); + gcc_assert (!lra_in_progress && !reload_completed); /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode, V1TImode). */ @@ -10563,11 +10394,9 @@ rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode) source = gen_lowpart (V2DImode, source); } - tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source; - permute_src = rs6000_gen_le_vsx_permute (source, mode); - permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode); - emit_insn (gen_rtx_SET (tmp, permute_src)); - emit_insn (gen_rtx_SET (dest, permute_tmp)); + rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source; + rs6000_emit_le_vsx_permute (tmp, source, mode); + rs6000_emit_le_vsx_permute (dest, tmp, mode); } /* Emit a sequence representing a little-endian VSX load or store, @@ -10646,8 +10475,7 @@ valid_sf_si_move (rtx dest, rtx src, machine_mode mode) static bool rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode) { - if (TARGET_DIRECT_MOVE_64BIT && !reload_in_progress && !reload_completed - && !lra_in_progress + if (TARGET_DIRECT_MOVE_64BIT && !lra_in_progress && !reload_completed && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode)) && SUBREG_P (source) && sf_subreg_operand (source, mode)) { @@ -10681,10 +10509,10 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode) if (TARGET_DEBUG_ADDR) { fprintf (stderr, - "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, " + "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, " "reload_completed = %d, can_create_pseudos = %d.\ndest:\n", GET_MODE_NAME (mode), - reload_in_progress, + lra_in_progress, reload_completed, can_create_pseudo_p ()); debug_rtx (dest); @@ -10758,12 +10586,6 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode) operands[1] = tmp; } - /* Handle the case where reload calls us with an invalid address. */ - if (reload_in_progress && mode == Pmode - && (! general_operand (operands[1], mode) - || ! nonimmediate_operand (operands[0], mode))) - goto emit_set; - /* 128-bit constant floating-point values on Darwin should really be loaded as two parts. However, this premature splitting is a problem when DFmode values can go into Altivec registers. */ @@ -10781,11 +10603,6 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode) return; } - if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX) - cfun->machine->sdmode_stack_slot = - eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX); - - /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD), p1:SD) if p1 is not of floating point class and p0 is spilled as we can have no analogous movsd_store for this. */ @@ -10895,57 +10712,6 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode) return; } - if (reload_in_progress - && mode == SDmode - && cfun->machine->sdmode_stack_slot != NULL_RTX - && MEM_P (operands[0]) - && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot) - && REG_P (operands[1])) - { - if (FP_REGNO_P (REGNO (operands[1]))) - { - rtx mem = adjust_address_nv (operands[0], DDmode, 0); - mem = eliminate_regs (mem, VOIDmode, NULL_RTX); - emit_insn (gen_movsd_store (mem, operands[1])); - } - else if (INT_REGNO_P (REGNO (operands[1]))) - { - rtx mem = operands[0]; - if (BYTES_BIG_ENDIAN) - mem = adjust_address_nv (mem, mode, 4); - mem = eliminate_regs (mem, VOIDmode, NULL_RTX); - emit_insn (gen_movsd_hardfloat (mem, operands[1])); - } - else - gcc_unreachable(); - return; - } - if (reload_in_progress - && mode == SDmode - && REG_P (operands[0]) - && MEM_P (operands[1]) - && cfun->machine->sdmode_stack_slot != NULL_RTX - && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot)) - { - if (FP_REGNO_P (REGNO (operands[0]))) - { - rtx mem = adjust_address_nv (operands[1], DDmode, 0); - mem = eliminate_regs (mem, VOIDmode, NULL_RTX); - emit_insn (gen_movsd_load (operands[0], mem)); - } - else if (INT_REGNO_P (REGNO (operands[0]))) - { - rtx mem = operands[1]; - if (BYTES_BIG_ENDIAN) - mem = adjust_address_nv (mem, mode, 4); - mem = eliminate_regs (mem, VOIDmode, NULL_RTX); - emit_insn (gen_movsd_hardfloat (operands[0], mem)); - } - else - gcc_unreachable(); - return; - } - /* FIXME: In the long term, this switch statement should go away and be replaced by a sequence of tests based on things like mode == Pmode. */ @@ -11104,10 +10870,9 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode) /* If we are to limit the number of things we put in the TOC and this is a symbol plus a constant we can add in one insn, - just put the symbol in the TOC and add the constant. Don't do - this if reload is in progress. */ + just put the symbol in the TOC and add the constant. */ if (GET_CODE (operands[1]) == CONST - && TARGET_NO_SUM_IN_TOC && ! reload_in_progress + && TARGET_NO_SUM_IN_TOC && GET_CODE (XEXP (operands[1], 0)) == PLUS && add_operand (XEXP (XEXP (operands[1], 0), 1), mode) && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF @@ -11153,10 +10918,9 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode) /* Above, we may have called force_const_mem which may have returned an invalid address. If we can, fix this up; otherwise, reload will have to deal with it. */ - if (GET_CODE (operands[1]) == MEM && ! reload_in_progress) + if (GET_CODE (operands[1]) == MEM) operands[1] = validize_mem (operands[1]); - emit_set: emit_insn (gen_rtx_SET (operands[0], operands[1])); } @@ -14692,6 +14456,58 @@ altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk) } static rtx +altivec_expand_xl_be_builtin (enum insn_code icode, tree exp, rtx target, bool blk) +{ + rtx pat, addr; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + machine_mode tmode = insn_data[icode].operand[0].mode; + machine_mode mode0 = Pmode; + machine_mode mode1 = Pmode; + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node || arg1 == error_mark_node) + return const0_rtx; + + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + op1 = copy_to_mode_reg (mode1, op1); + + if (op0 == const0_rtx) + addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1); + else + { + op0 = copy_to_mode_reg (mode0, op0); + addr = gen_rtx_MEM (blk ? BLKmode : tmode, + gen_rtx_PLUS (Pmode, op1, op0)); + } + + pat = GEN_FCN (icode) (target, addr); + if (!pat) + return 0; + + emit_insn (pat); + /* Reverse element order of elements if in LE mode */ + if (!VECTOR_ELT_ORDER_BIG) + { + rtx sel = swap_selector_for_mode (tmode); + rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, target, target, sel), + UNSPEC_VPERM); + emit_insn (gen_rtx_SET (target, vperm)); + } + return target; +} + +static rtx paired_expand_stv_builtin (enum insn_code icode, tree exp) { tree arg0 = CALL_EXPR_ARG (exp, 0); @@ -16083,6 +15899,50 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) /* Fall through. */ } + /* XL_BE We initialized them to always load in big endian order. */ + switch (fcode) + { + case VSX_BUILTIN_XL_BE_V2DI: + { + enum insn_code code = CODE_FOR_vsx_load_v2di; + return altivec_expand_xl_be_builtin (code, exp, target, false); + } + break; + case VSX_BUILTIN_XL_BE_V4SI: + { + enum insn_code code = CODE_FOR_vsx_load_v4si; + return altivec_expand_xl_be_builtin (code, exp, target, false); + } + break; + case VSX_BUILTIN_XL_BE_V8HI: + { + enum insn_code code = CODE_FOR_vsx_load_v8hi; + return altivec_expand_xl_be_builtin (code, exp, target, false); + } + break; + case VSX_BUILTIN_XL_BE_V16QI: + { + enum insn_code code = CODE_FOR_vsx_load_v16qi; + return altivec_expand_xl_be_builtin (code, exp, target, false); + } + break; + case VSX_BUILTIN_XL_BE_V2DF: + { + enum insn_code code = CODE_FOR_vsx_load_v2df; + return altivec_expand_xl_be_builtin (code, exp, target, false); + } + break; + case VSX_BUILTIN_XL_BE_V4SF: + { + enum insn_code code = CODE_FOR_vsx_load_v4sf; + return altivec_expand_xl_be_builtin (code, exp, target, false); + } + break; + default: + break; + /* Fall through. */ + } + *expandedp = false; return NULL_RTX; } @@ -16198,51 +16058,51 @@ paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target) static void rs6000_invalid_builtin (enum rs6000_builtins fncode) { - size_t uns_fncode = (size_t)fncode; + size_t uns_fncode = (size_t) fncode; const char *name = rs6000_builtin_info[uns_fncode].name; HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask; gcc_assert (name != NULL); if ((fnmask & RS6000_BTM_CELL) != 0) - error ("Builtin function %s is only valid for the cell processor", name); + error ("builtin function %s is only valid for the cell processor", name); else if ((fnmask & RS6000_BTM_VSX) != 0) - error ("Builtin function %s requires the -mvsx option", name); + error ("builtin function %s requires the -mvsx option", name); else if ((fnmask & RS6000_BTM_HTM) != 0) - error ("Builtin function %s requires the -mhtm option", name); + error ("builtin function %s requires the -mhtm option", name); else if ((fnmask & RS6000_BTM_ALTIVEC) != 0) - error ("Builtin function %s requires the -maltivec option", name); + error ("builtin function %s requires the -maltivec option", name); else if ((fnmask & RS6000_BTM_PAIRED) != 0) - error ("Builtin function %s requires the -mpaired option", name); + error ("builtin function %s requires the -mpaired option", name); else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR)) == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR)) - error ("Builtin function %s requires the -mhard-dfp and" + error ("builtin function %s requires the -mhard-dfp and" " -mpower8-vector options", name); else if ((fnmask & RS6000_BTM_DFP) != 0) - error ("Builtin function %s requires the -mhard-dfp option", name); + error ("builtin function %s requires the -mhard-dfp option", name); else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0) - error ("Builtin function %s requires the -mpower8-vector option", name); + error ("builtin function %s requires the -mpower8-vector option", name); else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT)) == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT)) - error ("Builtin function %s requires the -mcpu=power9 and" + error ("builtin function %s requires the -mcpu=power9 and" " -m64 options", name); else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0) - error ("Builtin function %s requires the -mcpu=power9 option", name); + error ("builtin function %s requires the -mcpu=power9 option", name); else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT)) == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT)) - error ("Builtin function %s requires the -mcpu=power9 and" + error ("builtin function %s requires the -mcpu=power9 and" " -m64 options", name); else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC) - error ("Builtin function %s requires the -mcpu=power9 option", name); + error ("builtin function %s requires the -mcpu=power9 option", name); else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128)) == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128)) - error ("Builtin function %s requires the -mhard-float and" + error ("builtin function %s requires the -mhard-float and" " -mlong-double-128 options", name); else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0) - error ("Builtin function %s requires the -mhard-float option", name); + error ("builtin function %s requires the -mhard-float option", name); else if ((fnmask & RS6000_BTM_FLOAT128) != 0) - error ("Builtin function %s requires the -mfloat128 option", name); + error ("builtin function %s requires the -mfloat128 option", name); else - error ("Builtin function %s is not supported with the current options", + error ("builtin function %s is not supported with the current options", name); } @@ -16303,6 +16163,20 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); tree arg0, arg1, lhs; + size_t uns_fncode = (size_t) fn_code; + enum insn_code icode = rs6000_builtin_info[uns_fncode].icode; + const char *fn_name1 = rs6000_builtin_info[uns_fncode].name; + const char *fn_name2 = (icode != CODE_FOR_nothing) + ? get_insn_name ((int) icode) + : "nothing"; + + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n", + fn_code, fn_name1, fn_name2); + + if (!rs6000_fold_gimple) + return false; + /* Generic solution to prevent gimple folding of code without a LHS. */ if (!gimple_call_lhs (stmt)) return false; @@ -16662,6 +16536,9 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) return true; } default: + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", + fn_code, fn_name1, fn_name2); break; } @@ -16694,9 +16571,9 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, { enum insn_code icode = rs6000_builtin_info[uns_fcode].icode; const char *name1 = rs6000_builtin_info[uns_fcode].name; - const char *name2 = ((icode != CODE_FOR_nothing) - ? get_insn_name ((int)icode) - : "nothing"); + const char *name2 = (icode != CODE_FOR_nothing) + ? get_insn_name ((int) icode) + : "nothing"; const char *name3; switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK) @@ -16715,7 +16592,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, fprintf (stderr, "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n", (name1) ? name1 : "---", fcode, - (name2) ? name2 : "---", (int)icode, + (name2) ? name2 : "---", (int) icode, name3, func_valid_p ? "" : ", not valid"); } @@ -17543,6 +17420,19 @@ altivec_init_builtins (void) def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V4SI); + def_builtin ("__builtin_vsx_le_be_v8hi", v8hi_ftype_long_pcvoid, + VSX_BUILTIN_XL_BE_V8HI); + def_builtin ("__builtin_vsx_le_be_v4si", v4si_ftype_long_pcvoid, + VSX_BUILTIN_XL_BE_V4SI); + def_builtin ("__builtin_vsx_le_be_v2di", v2di_ftype_long_pcvoid, + VSX_BUILTIN_XL_BE_V2DI); + def_builtin ("__builtin_vsx_le_be_v4sf", v4sf_ftype_long_pcvoid, + VSX_BUILTIN_XL_BE_V4SF); + def_builtin ("__builtin_vsx_le_be_v2df", v2df_ftype_long_pcvoid, + VSX_BUILTIN_XL_BE_V2DF); + def_builtin ("__builtin_vsx_le_be_v16qi", v16qi_ftype_long_pcvoid, + VSX_BUILTIN_XL_BE_V16QI); + if (TARGET_P9_VECTOR) { def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid, @@ -17572,6 +17462,8 @@ altivec_init_builtins (void) VSX_BUILTIN_VEC_ST); def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid, VSX_BUILTIN_VEC_XL); + def_builtin ("__builtin_vec_xl_be", opaque_ftype_long_pcvoid, + VSX_BUILTIN_VEC_XL_BE); def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid, VSX_BUILTIN_VEC_XST); @@ -18065,8 +17957,10 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, /* unsigned 2 argument functions. */ case ALTIVEC_BUILTIN_VMULEUB: case ALTIVEC_BUILTIN_VMULEUH: + case ALTIVEC_BUILTIN_VMULEUW: case ALTIVEC_BUILTIN_VMULOUB: case ALTIVEC_BUILTIN_VMULOUH: + case ALTIVEC_BUILTIN_VMULOUW: case CRYPTO_BUILTIN_VCIPHER: case CRYPTO_BUILTIN_VCIPHERLAST: case CRYPTO_BUILTIN_VNCIPHER: @@ -19347,42 +19241,6 @@ mems_ok_for_quad_peep (rtx mem1, rtx mem2) return 1; } - -rtx -rs6000_secondary_memory_needed_rtx (machine_mode mode) -{ - static bool eliminated = false; - rtx ret; - - if (mode != SDmode || TARGET_NO_SDMODE_STACK) - ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); - else - { - rtx mem = cfun->machine->sdmode_stack_slot; - gcc_assert (mem != NULL_RTX); - - if (!eliminated) - { - mem = eliminate_regs (mem, VOIDmode, NULL_RTX); - cfun->machine->sdmode_stack_slot = mem; - eliminated = true; - } - ret = mem; - } - - if (TARGET_DEBUG_ADDR) - { - fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n", - GET_MODE_NAME (mode)); - if (!ret) - fprintf (stderr, "\tNULL_RTX\n"); - else - debug_rtx (ret); - } - - return ret; -} - /* Return the mode to be used for memory when a secondary memory location is needed. For SDmode values we need to use DDmode, in all other cases we can use the same mode. */ @@ -19394,36 +19252,6 @@ rs6000_secondary_memory_needed_mode (machine_mode mode) return mode; } -static tree -rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) -{ - /* Don't walk into types. */ - if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp)) - { - *walk_subtrees = 0; - return NULL_TREE; - } - - switch (TREE_CODE (*tp)) - { - case VAR_DECL: - case PARM_DECL: - case FIELD_DECL: - case RESULT_DECL: - case SSA_NAME: - case REAL_CST: - case MEM_REF: - case VIEW_CONVERT_EXPR: - if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode) - return *tp; - break; - default: - break; - } - - return NULL_TREE; -} - /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work on traditional floating point registers, and the VMRGOW/VMRGEW instructions only work on the traditional altivec registers, note if an altivec register @@ -19444,7 +19272,7 @@ register_to_reg_type (rtx reg, bool *is_altivec) regno = REGNO (reg); if (regno >= FIRST_PSEUDO_REGISTER) { - if (!lra_in_progress && !reload_in_progress && !reload_completed) + if (!lra_in_progress && !reload_completed) return PSEUDO_REG_TYPE; regno = true_regnum (reg); @@ -19817,7 +19645,7 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, } /* ISA 2.07: MTVSRWZ or MFVSRWZ. */ - if (TARGET_VSX_SMALL_INTEGER) + if (TARGET_P8_VECTOR) { if (mode == SImode) return true; @@ -20521,64 +20349,6 @@ rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p) return; } -/* Allocate a 64-bit stack slot to be used for copying SDmode values through if - this function has any SDmode references. If we are on a power7 or later, we - don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions - can load/store the value. */ - -static void -rs6000_alloc_sdmode_stack_slot (void) -{ - tree t; - basic_block bb; - gimple_stmt_iterator gsi; - - gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX); - /* We use a different approach for dealing with the secondary - memory in LRA. */ - if (ira_use_lra_p) - return; - - if (TARGET_NO_SDMODE_STACK) - return; - - FOR_EACH_BB_FN (bb, cfun) - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL); - if (ret) - { - rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0); - cfun->machine->sdmode_stack_slot = adjust_address_nv (stack, - SDmode, 0); - return; - } - } - - /* Check for any SDmode parameters of the function. */ - for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t)) - { - if (TREE_TYPE (t) == error_mark_node) - continue; - - if (TYPE_MODE (TREE_TYPE (t)) == SDmode - || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode) - { - rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0); - cfun->machine->sdmode_stack_slot = adjust_address_nv (stack, - SDmode, 0); - return; - } - } -} - -static void -rs6000_instantiate_decls (void) -{ - if (cfun->machine->sdmode_stack_slot != NULL_RTX) - instantiate_decl_rtl (cfun->machine->sdmode_stack_slot); -} - /* Given an rtx X being reloaded into a reg required to be in class CLASS, return the class of reg to actually use. In general this is just CLASS; but on some machines @@ -20651,7 +20421,6 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass) /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and a sign extend in the Altivec registers. */ if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR - && TARGET_VSX_SMALL_INTEGER && (rclass == ALTIVEC_REGS || rclass == VSX_REGS)) return ALTIVEC_REGS; } @@ -23607,10 +23376,9 @@ static rtx rs6000_pre_atomic_barrier (rtx mem, enum memmodel model) { rtx addr = XEXP (mem, 0); - int strict_p = (reload_in_progress || reload_completed); - if (!legitimate_indirect_address_p (addr, strict_p) - && !legitimate_indexed_address_p (addr, strict_p)) + if (!legitimate_indirect_address_p (addr, reload_completed) + && !legitimate_indexed_address_p (addr, reload_completed)) { addr = force_reg (Pmode, addr); mem = replace_equiv_address_nv (mem, addr); @@ -24684,6 +24452,21 @@ rs6000_savres_strategy (rs6000_stack_t *info, else if (!lr_save_p && info->first_gp_reg_save > 29) strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; + /* We can only use save multiple if we need to save all the registers from + first_gp_reg_save. Otherwise, the CFI gets messed up (we save some + register we do not restore). */ + if (strategy & SAVE_MULTIPLE) + { + int i; + + for (i = info->first_gp_reg_save; i < 32; i++) + if (fixed_reg_p (i) || !save_reg_p (i)) + { + strategy &= ~SAVE_MULTIPLE; + break; + } + } + /* We can only use load multiple or the out-of-line routines to restore gprs if we've saved all the registers from first_gp_reg_save. Otherwise, we risk loading garbage. @@ -32525,7 +32308,7 @@ rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt) rtx fnmem, fn_reg, toc_reg; if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS) - error ("You cannot take the address of a nested function if you use " + error ("you cannot take the address of a nested function if you use " "the -mno-pointers-to-nested-functions option."); fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr)); @@ -33337,7 +33120,7 @@ rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode, { rtx base, offset; - if (reg == NULL && ! reload_in_progress && ! reload_completed) + if (reg == NULL && !reload_completed) reg = gen_reg_rtx (Pmode); if (GET_CODE (orig) == CONST) @@ -33363,7 +33146,7 @@ rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode, { if (SMALL_INT (offset)) return plus_constant (Pmode, base, INTVAL (offset)); - else if (! reload_in_progress && ! reload_completed) + else if (!reload_completed) offset = force_reg (Pmode, offset); else { @@ -35999,14 +35782,6 @@ rs6000_libcall_value (machine_mode mode) return gen_rtx_REG (mode, regno); } - -/* Return true if we use LRA instead of reload pass. */ -static bool -rs6000_lra_p (void) -{ - return TARGET_LRA; -} - /* Compute register pressure classes. We implement the target hook to avoid IRA picking something like NON_SPECIAL_REGS as a pressure class, which can lead to incorrect estimates of number of available registers and therefor @@ -36358,11 +36133,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] = { "string", OPTION_MASK_STRING, false, true }, { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true }, { "update", OPTION_MASK_NO_UPDATE, true , true }, - { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI, false, true }, - { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true }, - { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true }, { "vsx", OPTION_MASK_VSX, false, true }, - { "vsx-small-integer", OPTION_MASK_VSX_SMALL_INTEGER, false, true }, { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true }, #ifdef OPTION_MASK_64BIT #if TARGET_AIX_OS @@ -37624,7 +37395,7 @@ rs6000_allocate_stack_temp (machine_mode mode, { rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode)); rtx addr = XEXP (stack, 0); - int strict_p = (reload_in_progress || reload_completed); + int strict_p = reload_completed; if (!legitimate_indirect_address_p (addr, strict_p)) { @@ -37646,13 +37417,12 @@ rs6000_allocate_stack_temp (machine_mode mode, rtx rs6000_address_for_fpconvert (rtx x) { - int strict_p = (reload_in_progress || reload_completed); rtx addr; gcc_assert (MEM_P (x)); addr = XEXP (x, 0); - if (! legitimate_indirect_address_p (addr, strict_p) - && ! legitimate_indexed_address_p (addr, strict_p)) + if (! legitimate_indirect_address_p (addr, reload_completed) + && ! legitimate_indexed_address_p (addr, reload_completed)) { if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) { @@ -37690,10 +37460,9 @@ rs6000_address_for_altivec (rtx x) if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x))) { rtx addr = XEXP (x, 0); - int strict_p = (reload_in_progress || reload_completed); - if (!legitimate_indexed_address_p (addr, strict_p) - && !legitimate_indirect_address_p (addr, strict_p)) + if (!legitimate_indexed_address_p (addr, reload_completed) + && !legitimate_indirect_address_p (addr, reload_completed)) addr = copy_to_mode_reg (Pmode, addr); addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16)); diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 9b73be1e176..82a0bda48c6 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -600,8 +600,7 @@ extern int rs6000_vector_align[]; #define TARGET_DIRECT_MOVE_128 (TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \ && TARGET_POWERPC64) #define TARGET_VEXTRACTUB (TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \ - && TARGET_UPPER_REGS_DI && TARGET_POWERPC64) - + && TARGET_POWERPC64) /* Whether we should avoid (SUBREG:SI (REG:SF) and (SUBREG:SF (REG:SI). */ #define TARGET_NO_SF_SUBREG TARGET_DIRECT_MOVE_64BIT @@ -761,7 +760,6 @@ extern int rs6000_vector_align[]; #define TARGET_DIRECT_MOVE_64BIT (TARGET_DIRECT_MOVE \ && TARGET_P8_VECTOR \ && TARGET_POWERPC64 \ - && TARGET_UPPER_REGS_DI \ && (rs6000_altivec_element_order != 2)) /* Whether the various reciprocal divide/square root estimate instructions @@ -1585,13 +1583,6 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX]; #define SECONDARY_MEMORY_NEEDED(CLASS1,CLASS2,MODE) \ rs6000_secondary_memory_needed_ptr (CLASS1, CLASS2, MODE) -/* For cpus that cannot load/store SDmode values from the 64-bit - FP registers without using a full 64-bit load/store, we need - to allocate a full 64-bit stack slot for them. */ - -#define SECONDARY_MEMORY_NEEDED_RTX(MODE) \ - rs6000_secondary_memory_needed_rtx (MODE) - /* Specify the mode to be used for memory when a secondary memory location is needed. For cpus that cannot load/store SDmode values from the 64-bit FP registers without using a full 64-bit diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 2fd9ef0f168..6985b9f82da 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -691,7 +691,7 @@ ;; D-form load to FPR register & move to Altivec register ;; Move Altivec register to FPR register and store (define_mode_iterator ALTIVEC_DFORM [DF - SF + (SF "TARGET_P8_VECTOR") (DI "TARGET_POWERPC64")]) @@ -1004,8 +1004,7 @@ (define_split [(set (match_operand:DI 0 "altivec_register_operand") (sign_extend:DI (match_operand:SI 1 "altivec_register_operand")))] - "TARGET_VSX_SMALL_INTEGER && TARGET_P8_VECTOR && !TARGET_P9_VECTOR - && reload_completed" + "TARGET_P8_VECTOR && !TARGET_P9_VECTOR && reload_completed" [(const_int 0)] { rtx dest = operands[0]; @@ -5161,7 +5160,7 @@ operands[1] = rs6000_address_for_fpconvert (operands[1]); if (GET_CODE (operands[2]) == SCRATCH) operands[2] = gen_reg_rtx (DImode); - if (TARGET_VSX_SMALL_INTEGER) + if (TARGET_P8_VECTOR) emit_insn (gen_extendsidi2 (operands[2], operands[1])); else emit_insn (gen_lfiwax (operands[2], operands[1])); @@ -5238,7 +5237,7 @@ operands[1] = rs6000_address_for_fpconvert (operands[1]); if (GET_CODE (operands[2]) == SCRATCH) operands[2] = gen_reg_rtx (DImode); - if (TARGET_VSX_SMALL_INTEGER) + if (TARGET_P8_VECTOR) emit_insn (gen_zero_extendsidi2 (operands[2], operands[1])); else emit_insn (gen_lfiwzx (operands[2], operands[1])); @@ -5423,8 +5422,7 @@ (clobber (match_scratch:DI 2)) (clobber (match_scratch:DI 3)) (clobber (match_scratch:<QHI:MODE> 4))])] - "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64 - && TARGET_VSX_SMALL_INTEGER" + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64" { if (MEM_P (operands[1])) operands[1] = rs6000_address_for_fpconvert (operands[1]); @@ -5437,8 +5435,7 @@ (clobber (match_scratch:DI 2 "=wK,wi,wK")) (clobber (match_scratch:DI 3 "=X,r,X")) (clobber (match_scratch:<QHI:MODE> 4 "=X,X,wK"))] - "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64 - && TARGET_UPPER_REGS_DI && TARGET_VSX_SMALL_INTEGER" + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64" "#" "&& reload_completed" [(const_int 0)] @@ -5477,8 +5474,7 @@ (match_operand:QHI 1 "input_operand" ""))) (clobber (match_scratch:DI 2 "")) (clobber (match_scratch:DI 3 ""))])] - "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64 - && TARGET_VSX_SMALL_INTEGER" + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64" { if (MEM_P (operands[1])) operands[1] = rs6000_address_for_fpconvert (operands[1]); @@ -5490,8 +5486,7 @@ (match_operand:QHI 1 "reg_or_indexed_operand" "wK,r,Z"))) (clobber (match_scratch:DI 2 "=wK,wi,wJwK")) (clobber (match_scratch:DI 3 "=X,r,X"))] - "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64 - && TARGET_VSX_SMALL_INTEGER" + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64" "#" "&& reload_completed" [(const_int 0)] @@ -5524,7 +5519,7 @@ "TARGET_HARD_FLOAT && <TARGET_FLOAT>" " { - if (!TARGET_VSX_SMALL_INTEGER) + if (!TARGET_P8_VECTOR) { rtx src = force_reg (<MODE>mode, operands[1]); @@ -5551,7 +5546,7 @@ "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && (<MODE>mode != SFmode || TARGET_SINGLE_FLOAT) && TARGET_STFIWX && can_create_pseudo_p () - && !TARGET_VSX_SMALL_INTEGER" + && !TARGET_P8_VECTOR" "#" "" [(pc)] @@ -5592,7 +5587,7 @@ (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d,<rreg>"))) (clobber (match_operand:DI 2 "gpc_reg_operand" "=1,d")) (clobber (match_operand:DI 3 "offsettable_mem_operand" "=o,o"))] - "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && !TARGET_VSX_SMALL_INTEGER" + "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && !TARGET_P8_VECTOR" "#" "" [(pc)] @@ -5629,8 +5624,7 @@ [(parallel [(set (match_operand:<QHI:MODE> 0 "nonimmediate_operand") (fix:QHI (match_operand:SFDF 1 "gpc_reg_operand"))) (clobber (match_scratch:DI 2))])] - "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT - && TARGET_VSX_SMALL_INTEGER" + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT" { if (MEM_P (operands[0])) operands[0] = rs6000_address_for_fpconvert (operands[0]); @@ -5641,8 +5635,7 @@ (fix:QHI (match_operand:SFDF 1 "gpc_reg_operand" "<SFDF:Fv>,<SFDF:Fv>"))) (clobber (match_scratch:DI 2 "=X,wi"))] - "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT - && TARGET_VSX_SMALL_INTEGER" + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT" "#" "&& reload_completed" [(const_int 0)] @@ -5672,7 +5665,7 @@ "TARGET_HARD_FLOAT && <TARGET_FLOAT> && TARGET_FCTIWUZ && TARGET_STFIWX" " { - if (!TARGET_VSX_SMALL_INTEGER) + if (!TARGET_P8_VECTOR) { emit_insn (gen_fixuns_trunc<mode>si2_stfiwx (operands[0], operands[1])); DONE; @@ -5685,7 +5678,7 @@ (clobber (match_scratch:DI 2 "=d"))] "TARGET_HARD_FLOAT && <TARGET_FLOAT> && TARGET_FCTIWUZ && TARGET_STFIWX && can_create_pseudo_p () - && !TARGET_VSX_SMALL_INTEGER" + && !TARGET_P8_VECTOR" "#" "" [(pc)] @@ -5734,8 +5727,7 @@ [(parallel [(set (match_operand:<QHI:MODE> 0 "nonimmediate_operand") (unsigned_fix:QHI (match_operand:SFDF 1 "gpc_reg_operand"))) (clobber (match_scratch:DI 2))])] - "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT - && TARGET_VSX_SMALL_INTEGER" + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT" { if (MEM_P (operands[0])) operands[0] = rs6000_address_for_fpconvert (operands[0]); @@ -5746,8 +5738,7 @@ (unsigned_fix:QHI (match_operand:SFDF 1 "gpc_reg_operand" "<SFDF:Fv>,<SFDF:Fv>"))) (clobber (match_scratch:DI 2 "=X,wi"))] - "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT - && TARGET_VSX_SMALL_INTEGER" + "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT" "#" "&& reload_completed" [(const_int 0)] @@ -5777,7 +5768,7 @@ (define_insn "*fctiw<u>z_<mode>_smallint" [(set (match_operand:SI 0 "vsx_register_operand" "=d,wi") (any_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))] - "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_VSX_SMALL_INTEGER" + "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_P8_VECTOR" "@ fctiw<u>z %0,%1 xscvdp<su>xws %x0,%x1" @@ -5789,7 +5780,7 @@ [(set (match_operand:SI 0 "memory_operand" "=Z") (any_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "wa"))) (clobber (match_scratch:SI 2 "=wa"))] - "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_VSX_SMALL_INTEGER" + "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_P8_VECTOR" "#" "&& reload_completed" [(set (match_dup 2) @@ -6681,7 +6672,7 @@ UNSPEC_MOVSI_GOT))] "DEFAULT_ABI == ABI_V4 && flag_pic == 1 - && (reload_in_progress || reload_completed)" + && reload_completed" [(set (match_dup 0) (match_dup 2)) (set (match_dup 0) (unspec:SI [(match_dup 1)(match_dup 0)] UNSPEC_MOVSI_GOT))] @@ -6959,7 +6950,7 @@ (define_split [(set (match_operand:DI 0 "altivec_register_operand") (match_operand:DI 1 "xxspltib_constant_split"))] - "TARGET_VSX_SMALL_INTEGER && TARGET_P9_VECTOR && reload_completed" + "TARGET_P9_VECTOR && reload_completed" [(const_int 0)] { rtx op0 = operands[0]; @@ -8234,7 +8225,7 @@ (and:P (plus:P (match_operand:P 1 "gpc_reg_operand" "r") (match_operand:P 2 "reg_or_cint_operand" "rI")) (const_int -16)))] - "TARGET_ALTIVEC && (reload_in_progress || reload_completed)" + "TARGET_ALTIVEC && reload_completed" "#" "&& reload_completed" [(set (match_dup 0) @@ -8664,7 +8655,7 @@ (define_split [(set (match_operand:DI 0 "altivec_register_operand" "") (match_operand:DI 1 "s5bit_cint_operand" ""))] - "TARGET_UPPER_REGS_DI && TARGET_VSX && reload_completed" + "TARGET_VSX && reload_completed" [(const_int 0)] { rtx op0 = operands[0]; @@ -8686,7 +8677,7 @@ (define_split [(set (match_operand:INT_ISA3 0 "altivec_register_operand" "") (match_operand:INT_ISA3 1 "xxspltib_constant_split" ""))] - "TARGET_UPPER_REGS_DI && TARGET_P9_VECTOR && reload_completed" + "TARGET_P9_VECTOR && reload_completed" [(const_int 0)] { rtx op0 = operands[0]; @@ -9766,7 +9757,7 @@ (match_operand:DF 1 "any_operand" "")) (set (match_operand:DF 2 "gpc_reg_operand" "") (match_dup 0))] - "!TARGET_UPPER_REGS_DF + "!TARGET_VSX && peep2_reg_dead_p (2, operands[0])" [(set (match_dup 2) (match_dup 1))]) @@ -9775,7 +9766,7 @@ (match_operand:SF 1 "any_operand" "")) (set (match_operand:SF 2 "gpc_reg_operand" "") (match_dup 0))] - "!TARGET_UPPER_REGS_SF + "!TARGET_P8_VECTOR && peep2_reg_dead_p (2, operands[0])" [(set (match_dup 2) (match_dup 1))]) @@ -13974,8 +13965,7 @@ (match_operand:ALTIVEC_DFORM 2 "simple_offsettable_mem_operand")) (set (match_operand:ALTIVEC_DFORM 3 "altivec_register_operand") (match_dup 1))] - "TARGET_VSX && TARGET_UPPER_REGS_<MODE> && !TARGET_P9_DFORM_SCALAR - && peep2_reg_dead_p (2, operands[1])" + "TARGET_VSX && !TARGET_P9_DFORM_SCALAR && peep2_reg_dead_p (2, operands[1])" [(set (match_dup 0) (match_dup 4)) (set (match_dup 3) @@ -14011,8 +14001,7 @@ (match_operand:ALTIVEC_DFORM 2 "altivec_register_operand")) (set (match_operand:ALTIVEC_DFORM 3 "simple_offsettable_mem_operand") (match_dup 1))] - "TARGET_VSX && TARGET_UPPER_REGS_<MODE> && !TARGET_P9_DFORM_SCALAR - && peep2_reg_dead_p (2, operands[1])" + "TARGET_VSX && !TARGET_P9_DFORM_SCALAR && peep2_reg_dead_p (2, operands[1])" [(set (match_dup 0) (match_dup 4)) (set (match_dup 5) diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 28d899391fb..1ee84cb4dc5 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -148,6 +148,10 @@ maltivec=be Target Report RejectNegative Var(rs6000_altivec_element_order, 2) Generate AltiVec instructions using big-endian element order. +mfold-gimple +Target Report Var(rs6000_fold_gimple) Init(1) +Enable early gimple folding of builtins. + mhard-dfp Target Report Mask(DFP) Var(rs6000_isa_flags) Use decimal floating point instructions. @@ -200,9 +204,6 @@ mvsx-scalar-double Target Undocumented Report Var(TARGET_VSX_SCALAR_DOUBLE) Init(1) ; If -mvsx, use VSX arithmetic instructions for DFmode (on by default) -mvsx-scalar-memory -Target Undocumented Report Alias(mupper-regs-df) - mvsx-align-128 Target Undocumented Report Var(TARGET_VSX_ALIGN_128) Save ; If -mvsx, set alignment to 128 bits instead of 32/64 @@ -433,9 +434,9 @@ mlong-double- Target RejectNegative Joined UInteger Var(rs6000_long_double_type_size) Save -mlong-double-<n> Specify size of long double (64 or 128 bits). +; This option existed in the past, but now is always on. mlra -Target Report Mask(LRA) Var(rs6000_isa_flags) -Enable Local Register Allocation. +Target RejectNegative Undocumented Ignore msched-costly-dep= Target RejectNegative Joined Var(rs6000_sched_costly_dep_str) @@ -549,22 +550,6 @@ mcompat-align-parm Target Report Var(rs6000_compat_align_parm) Init(0) Save Generate aggregate parameter passing code with at most 64-bit alignment. -mupper-regs-df -Target Report Mask(UPPER_REGS_DF) Var(rs6000_isa_flags) -Allow double variables in upper registers with -mcpu=power7 or -mvsx. - -mupper-regs-sf -Target Report Mask(UPPER_REGS_SF) Var(rs6000_isa_flags) -Allow float variables in upper registers with -mcpu=power8 or -mpower8-vector. - -mupper-regs -Target Report Var(TARGET_UPPER_REGS) Init(-1) Save -Allow float/double variables in upper registers if cpu allows it. - -mupper-regs-di -Target Report Mask(UPPER_REGS_DI) Var(rs6000_isa_flags) -Allow 64-bit integer variables in upper registers with -mcpu=power7 or -mvsx. - moptimize-swaps Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save Analyze and remove doubleword swaps from VSX computations. @@ -625,10 +610,6 @@ mfloat128-convert Target Undocumented Mask(FLOAT128_CVT) Var(rs6000_isa_flags) Enable default conversions between __float128 & long double. -mvsx-small-integer -Target Report Mask(VSX_SMALL_INTEGER) Var(rs6000_isa_flags) -Enable small integers to be in VSX registers. - mstack-protector-guard= Target RejectNegative Joined Enum(stack_protector_guard) Var(rs6000_stack_protector_guard) Init(SSP_TLS) Use given stack-protector guard. diff --git a/gcc/config/rs6000/rtems.h b/gcc/config/rs6000/rtems.h index 54a36de6eb4..8a62fdcbaf3 100644 --- a/gcc/config/rs6000/rtems.h +++ b/gcc/config/rs6000/rtems.h @@ -14,33 +14,172 @@ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ -/* Specify predefined symbols in preprocessor. */ +/* Copy and paste from linux64.h and freebsd64.h */ +#ifdef IN_LIBGCC2 +#undef TARGET_64BIT +#ifdef __powerpc64__ +#define TARGET_64BIT 1 +#else +#define TARGET_64BIT 0 +#endif +#endif + +/* Copy and paste from linux64.h and freebsd64.h */ +#undef TARGET_AIX +#define TARGET_AIX TARGET_64BIT #undef TARGET_OS_CPP_BUILTINS -#define TARGET_OS_CPP_BUILTINS() \ - do \ - { \ - builtin_define_std ("PPC"); \ - builtin_define ("__rtems__"); \ - builtin_define ("__USE_INIT_FINI__"); \ - builtin_assert ("system=rtems"); \ - builtin_assert ("cpu=powerpc"); \ - builtin_assert ("machine=powerpc"); \ - TARGET_OS_SYSV_CPP_BUILTINS (); \ - } \ +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__rtems__"); \ + builtin_define ("__USE_INIT_FINI__"); \ + builtin_assert ("system=rtems"); \ + if (TARGET_64BIT) \ + { \ + builtin_define ("__PPC__"); \ + builtin_define ("__PPC64__"); \ + builtin_define ("__powerpc64__"); \ + builtin_assert ("cpu=powerpc64"); \ + builtin_assert ("machine=powerpc64"); \ + } \ + else \ + { \ + builtin_define_std ("PPC"); \ + builtin_define_std ("powerpc"); \ + builtin_assert ("cpu=powerpc"); \ + builtin_assert ("machine=powerpc"); \ + TARGET_OS_SYSV_CPP_BUILTINS (); \ + } \ + } \ + while (0) + +/* Copy and paste from linux64.h and freebsd64.h */ +#define INVALID_64BIT "-m%s not supported in this configuration" + +/* A lot of copy and paste from linux64.h and freebsd64.h */ +#undef SUBSUBTARGET_OVERRIDE_OPTIONS +#define SUBSUBTARGET_OVERRIDE_OPTIONS \ + do \ + { \ + if (rs6000_isa_flags & OPTION_MASK_64BIT) \ + { \ + rs6000_elf_abi = 2; \ + rs6000_current_abi = ABI_ELFv2; \ + if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE) \ + { \ + rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE; \ + error (INVALID_64BIT, "relocatable"); \ + } \ + if (rs6000_isa_flags & OPTION_MASK_EABI) \ + { \ + rs6000_isa_flags &= ~OPTION_MASK_EABI; \ + error (INVALID_64BIT, "eabi"); \ + } \ + if (TARGET_PROTOTYPE) \ + { \ + target_prototype = 0; \ + error (INVALID_64BIT, "prototype"); \ + } \ + if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0) \ + { \ + rs6000_isa_flags |= OPTION_MASK_POWERPC64; \ + error ("-m64 requires a PowerPC64 cpu"); \ + } \ + } \ + } \ while (0) #undef TARGET_LIBGCC_SDATA_SECTION #define TARGET_LIBGCC_SDATA_SECTION ".sdata" -#undef CPP_OS_DEFAULT_SPEC -#define CPP_OS_DEFAULT_SPEC "%(cpp_os_rtems)" +/* Copy and paste from linux64.h and freebsd64.h */ +#undef SIZE_TYPE +#define SIZE_TYPE (TARGET_64BIT ? "long unsigned int" : "unsigned int") + +/* Copy and paste from linux64.h and freebsd64.h */ +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int") + +/* Copy and paste from freebsd64.h */ +#undef WCHAR_TYPE + +/* Copy and paste from freebsd64.h */ +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +/* Copy and paste from linux64.h and freebsd64.h */ +#ifdef __powerpc64__ +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n" \ +" bl " #FUNC "\n" \ +" nop\n" \ +" .previous"); +#endif + +/* This could be also POWERPC_FREEBSD. It is related to the save/restore + defines below. */ +#define POWERPC_LINUX + +/* Copy and paste from linux64.h and freebsd64.h */ +#undef SAVE_FP_PREFIX +#define SAVE_FP_PREFIX (TARGET_64BIT ? "._savef" : "_savefpr_") +#undef SAVE_FP_SUFFIX +#define SAVE_FP_SUFFIX "" +#undef RESTORE_FP_PREFIX +#define RESTORE_FP_PREFIX (TARGET_64BIT ? "._restf" : "_restfpr_") +#undef RESTORE_FP_SUFFIX +#define RESTORE_FP_SUFFIX "" -#define CPP_OS_RTEMS_SPEC "\ +/* Copy and paste from linux64.h and freebsd64.h */ +#undef ASM_DECLARE_FUNCTION_SIZE +#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL) \ + do \ + { \ + if (!flag_inhibit_size_directive) \ + { \ + fputs ("\t.size\t", (FILE)); \ + if (TARGET_64BIT && DOT_SYMBOLS) \ + putc ('.', (FILE)); \ + assemble_name ((FILE), (FNAME)); \ + fputs (",.-", (FILE)); \ + rs6000_output_function_entry (FILE, FNAME); \ + putc ('\n', (FILE)); \ + } \ + } \ + while (0) + +/* Copy and paste from linux64.h and freebsd64.h */ +#undef ASM_OUTPUT_SPECIAL_POOL_ENTRY_P +#define ASM_OUTPUT_SPECIAL_POOL_ENTRY_P(X, MODE) \ + (TARGET_TOC \ + && (GET_CODE (X) == SYMBOL_REF \ + || (GET_CODE (X) == CONST && GET_CODE (XEXP (X, 0)) == PLUS \ + && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF) \ + || GET_CODE (X) == LABEL_REF \ + || (GET_CODE (X) == CONST_INT \ + && GET_MODE_BITSIZE (MODE) <= GET_MODE_BITSIZE (Pmode)) \ + || (GET_CODE (X) == CONST_DOUBLE \ + && ((TARGET_64BIT \ + && (TARGET_MINIMAL_TOC \ + || (SCALAR_FLOAT_MODE_P (GET_MODE (X)) \ + && ! TARGET_NO_FP_IN_TOC))) \ + || (!TARGET_64BIT \ + && !TARGET_NO_FP_IN_TOC \ + && SCALAR_FLOAT_MODE_P (GET_MODE (X)) \ + && BITS_PER_WORD == HOST_BITS_PER_INT))))) + +#undef CPP_OS_DEFAULT_SPEC +#define CPP_OS_DEFAULT_SPEC "\ %{!mcpu*: %{!Dppc*: %{!Dmpc*: -Dmpc750} } }\ %{mcpu=403: %{!Dppc*: %{!Dmpc*: -Dppc403} } } \ %{mcpu=505: %{!Dppc*: %{!Dmpc*: -Dmpc505} } } \ @@ -55,6 +194,37 @@ %{mcpu=8540: %{!Dppc*: %{!Dmpc*: -Dppc8540} } } \ %{mcpu=e6500: -D__PPC_CPU_E6500__}" +#undef ASM_DEFAULT_SPEC +#define ASM_DEFAULT_SPEC "-mppc%{m64:64}" + +#undef ASM_SPEC +#define ASM_SPEC "%{!m64:%(asm_spec32)}%{m64:%(asm_spec64)} %(asm_spec_common)" + +#define ASM_SPEC32 "-a32 \ +%{mrelocatable} %{mrelocatable-lib} %{" FPIE_OR_FPIC_SPEC ":-K PIC} \ +%{memb|msdata=eabi: -memb}" + +#define ASM_SPEC64 "-a64" + +#define ASM_SPEC_COMMON "%(asm_cpu) \ +%{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}}" \ + ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN) + +#undef LINK_OS_DEFAULT_SPEC +#define LINK_OS_DEFAULT_SPEC \ +"%{!m64:%(link_os_spec32)}%{m64:%(link_os_spec64)}" + +#define LINK_OS_SPEC32 ENDIAN_SELECT(" -m elf32ppc", \ + " -m elf32lppc", \ + " -m elf32ppc") +#define LINK_OS_SPEC64 ENDIAN_SELECT(" -m elf64ppc", \ + " -m elf64lppc", \ + " -m elf64ppc") + #undef SUBSUBTARGET_EXTRA_SPECS #define SUBSUBTARGET_EXTRA_SPECS \ - { "cpp_os_rtems", CPP_OS_RTEMS_SPEC } + { "asm_spec_common", ASM_SPEC_COMMON }, \ + { "asm_spec32", ASM_SPEC32 }, \ + { "asm_spec64", ASM_SPEC64 }, \ + { "link_os_spec32", LINK_OS_SPEC32 }, \ + { "link_os_spec64", LINK_OS_SPEC64 }, diff --git a/gcc/config/rs6000/sysv4.h b/gcc/config/rs6000/sysv4.h index de386291a51..cbee89140dd 100644 --- a/gcc/config/rs6000/sysv4.h +++ b/gcc/config/rs6000/sysv4.h @@ -757,24 +757,34 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN) #define CRTOFFLOADEND "" #endif -#ifdef HAVE_LD_PIE -#define STARTFILE_LINUX_SPEC "\ -%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} \ -%{mnewlib:ecrti.o%s;:crti.o%s} \ -%{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s} \ -" CRTOFFLOADBEGIN -#else -#define STARTFILE_LINUX_SPEC "\ -%{!shared: %{pg|p|profile:gcrt1.o%s;:crt1.o%s}} \ -%{mnewlib:ecrti.o%s;:crti.o%s} \ -%{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s} \ -" CRTOFFLOADBEGIN -#endif - -#define ENDFILE_LINUX_SPEC "\ -%{shared|pie:crtendS.o%s;:crtend.o%s} \ -%{mnewlib:ecrtn.o%s;:crtn.o%s} \ -" CRTOFFLOADEND +/* STARTFILE_LINUX_SPEC should be the same as GNU_USER_TARGET_STARTFILE_SPEC + but with the mnewlib ecrti.o%s selection substituted for crti.o%s. */ +#define STARTFILE_LINUX_SPEC \ + "%{shared:; \ + pg|p|profile:gcrt1.o%s; \ + static:crt1.o%s; \ + " PIE_SPEC ":Scrt1.o%s; \ + :crt1.o%s} \ + %{mnewlib:ecrti.o%s;:crti.o%s} \ + %{static:crtbeginT.o%s; \ + shared|" PIE_SPEC ":crtbeginS.o%s; \ + :crtbegin.o%s} \ + %{fvtable-verify=none:%s; \ + fvtable-verify=preinit:vtv_start_preinit.o%s; \ + fvtable-verify=std:vtv_start.o%s} \ + " CRTOFFLOADBEGIN + +/* ENDFILE_LINUX_SPEC should be the same as GNU_USER_TARGET_ENDFILE_SPEC + but with the mnewlib ecrtn.o%s selection substituted for crtn.o%s. */ +#define ENDFILE_LINUX_SPEC \ + "%{fvtable-verify=none:%s; \ + fvtable-verify=preinit:vtv_end_preinit.o%s; \ + fvtable-verify=std:vtv_end.o%s} \ + %{static:crtend.o%s; \ + shared|" PIE_SPEC ":crtendS.o%s; \ + :crtend.o%s} \ + %{mnewlib:ecrtn.o%s;:crtn.o%s} \ + " CRTOFFLOADEND #define LINK_START_LINUX_SPEC "" diff --git a/gcc/config/rs6000/t-rtems b/gcc/config/rs6000/t-rtems index 8290f5c5bdd..0e39c6320c8 100644 --- a/gcc/config/rs6000/t-rtems +++ b/gcc/config/rs6000/t-rtems @@ -27,8 +27,8 @@ MULTILIB_REQUIRED = MULTILIB_OPTIONS += mcpu=403/mcpu=505/mcpu=603e/mcpu=604/mcpu=860/mcpu=7400/mcpu=8540/mcpu=e6500 MULTILIB_DIRNAMES += m403 m505 m603e m604 m860 m7400 m8540 me6500 -MULTILIB_OPTIONS += m32 -MULTILIB_DIRNAMES += m32 +MULTILIB_OPTIONS += m32/m64 +MULTILIB_DIRNAMES += m32 m64 MULTILIB_OPTIONS += msoft-float MULTILIB_DIRNAMES += nof @@ -72,3 +72,5 @@ MULTILIB_REQUIRED += mcpu=8540/msoft-float MULTILIB_REQUIRED += mcpu=860 MULTILIB_REQUIRED += mcpu=e6500/m32 MULTILIB_REQUIRED += mcpu=e6500/m32/msoft-float/mno-altivec +MULTILIB_REQUIRED += mcpu=e6500/m64 +MULTILIB_REQUIRED += mcpu=e6500/m64/msoft-float/mno-altivec diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index a3d53e7f439..d6f2fd13fcb 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -74,6 +74,16 @@ (V1TI "TI") (TI "TI")]) +;; As above, but in lower case +(define_mode_attr VEC_base_l [(V16QI "qi") + (V8HI "hi") + (V4SI "si") + (V2DI "di") + (V4SF "sf") + (V2DF "df") + (V1TI "ti") + (TI "ti")]) + ;; Same size integer type for floating point data (define_mode_attr VEC_int [(V4SF "v4si") (V2DF "v2di")]) @@ -1016,7 +1026,7 @@ ;; Vector initialization, set, extract -(define_expand "vec_init<mode>" +(define_expand "vec_init<mode><VEC_base_l>" [(match_operand:VEC_E 0 "vlogical_operand" "") (match_operand:VEC_E 1 "" "")] "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)" @@ -1035,7 +1045,7 @@ DONE; }) -(define_expand "vec_extract<mode>" +(define_expand "vec_extract<mode><VEC_base_l>" [(match_operand:<VEC_base> 0 "register_operand" "") (match_operand:VEC_E 1 "vlogical_operand" "") (match_operand 2 "const_int_operand" "")] diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index ff65caa35dc..510294d97eb 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -37,6 +37,9 @@ (TI "TARGET_VSX_TIMODE") V1TI]) +;; Iterator for 128-bit integer types that go in a single vector register. +(define_mode_iterator VSX_TI [(TI "TARGET_VSX_TIMODE") V1TI]) + ;; Iterator for the 2 32-bit vector types (define_mode_iterator VSX_W [V4SF V4SI]) @@ -326,6 +329,7 @@ UNSPEC_VSX_CVDPSXWS UNSPEC_VSX_CVDPUXWS UNSPEC_VSX_CVSPDP + UNSPEC_VSX_CVHPSP UNSPEC_VSX_CVSPDPN UNSPEC_VSX_CVDPSPN UNSPEC_VSX_CVSXWDP @@ -348,6 +352,8 @@ UNSPEC_VSX_ROUND_I UNSPEC_VSX_ROUND_IC UNSPEC_VSX_SLDWI + UNSPEC_VSX_XXPERM + UNSPEC_VSX_XXSPLTW UNSPEC_VSX_XXSPLTD UNSPEC_VSX_DIVSD @@ -368,6 +374,8 @@ UNSPEC_VSX_SIEXPQP UNSPEC_VSX_SCMPEXPDP UNSPEC_VSX_STSTDC + UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH + UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL UNSPEC_VSX_VXEXP UNSPEC_VSX_VXSIG UNSPEC_VSX_VIEXP @@ -751,9 +759,9 @@ ;; special V1TI container class, which it is not appropriate to use vec_select ;; for the type. (define_insn "*vsx_le_permute_<mode>" - [(set (match_operand:VSX_LE_128 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z") - (rotate:VSX_LE_128 - (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>") + [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z") + (rotate:VSX_TI + (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>") (const_int 64)))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "@ @@ -764,10 +772,10 @@ (set_attr "type" "vecperm,vecload,vecstore")]) (define_insn_and_split "*vsx_le_undo_permute_<mode>" - [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,<VSa>") - (rotate:VSX_LE_128 - (rotate:VSX_LE_128 - (match_operand:VSX_LE_128 1 "vsx_register_operand" "0,<VSa>") + [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>") + (rotate:VSX_TI + (rotate:VSX_TI + (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>") (const_int 64)) (const_int 64)))] "!BYTES_BIG_ENDIAN && TARGET_VSX" @@ -792,16 +800,15 @@ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "#" "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" - [(set (match_dup 2) - (rotate:VSX_LE_128 (match_dup 1) - (const_int 64))) - (set (match_dup 0) - (rotate:VSX_LE_128 (match_dup 2) - (const_int 64)))] + [(const_int 0)] " { - operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) - : operands[0]; + rtx tmp = (can_create_pseudo_p () + ? gen_reg_rtx_and_attrs (operands[0]) + : operands[0]); + rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode); + rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode); + DONE; } " [(set_attr "type" "vecload") @@ -819,15 +826,14 @@ [(set (match_operand:VSX_LE_128 0 "memory_operand" "") (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR" - [(set (match_dup 2) - (rotate:VSX_LE_128 (match_dup 1) - (const_int 64))) - (set (match_dup 0) - (rotate:VSX_LE_128 (match_dup 2) - (const_int 64)))] + [(const_int 0)] { - operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) - : operands[0]; + rtx tmp = (can_create_pseudo_p () + ? gen_reg_rtx_and_attrs (operands[0]) + : operands[0]); + rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode); + rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode); + DONE; }) ;; Peephole to catch memory to memory transfers for TImode if TImode landed in @@ -851,16 +857,13 @@ [(set (match_operand:VSX_LE_128 0 "memory_operand" "") (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))] "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR" - [(set (match_dup 1) - (rotate:VSX_LE_128 (match_dup 1) - (const_int 64))) - (set (match_dup 0) - (rotate:VSX_LE_128 (match_dup 1) - (const_int 64))) - (set (match_dup 1) - (rotate:VSX_LE_128 (match_dup 1) - (const_int 64)))] - "") + [(const_int 0)] +{ + rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode); + rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode); + rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode); + DONE; +}) ;; Vector constants that can be generated with XXSPLTIB that was added in ISA ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized. @@ -1198,7 +1201,7 @@ UNSPEC_VSX_MULSD))] "VECTOR_MEM_VSX_P (V2DImode)" "#" - "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress" + "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" [(const_int 0)] " { @@ -1236,7 +1239,7 @@ UNSPEC_VSX_DIVSD))] "VECTOR_MEM_VSX_P (V2DImode)" "#" - "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress" + "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" [(const_int 0)] " { @@ -1264,7 +1267,7 @@ UNSPEC_VSX_DIVUD))] "VECTOR_MEM_VSX_P (V2DImode)" "#" - "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress" + "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" [(const_int 0)] " { @@ -1746,6 +1749,15 @@ "xscvspdp %x0,%x1" [(set_attr "type" "fp")]) +;; Generate xvcvhpsp instruction +(define_insn "vsx_xvcvhpsp" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") + (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVHPSP))] + "TARGET_P9_VECTOR" + "xvcvhpsp %x0,%x1" + [(set_attr "type" "vecfloat")]) + ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF ;; format of scalars is actually DF. (define_insn "vsx_xscvdpsp_scalar" @@ -2352,10 +2364,10 @@ ;; Build a V2DF/V2DI vector from two scalars (define_insn "vsx_concat_<mode>" - [(set (match_operand:VSX_D 0 "gpc_reg_operand" "=<VSa>,we") + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we") (vec_concat:VSX_D - (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VS_64reg>,b") - (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VS_64reg>,b")))] + (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b") + (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))] "VECTOR_MEM_VSX_P (<MODE>mode)" { if (which_alternative == 0) @@ -2373,6 +2385,80 @@ } [(set_attr "type" "vecperm")]) +;; Combiner patterns to allow creating XXPERMDI's to access either double +;; word element in a vector register. +(define_insn "*vsx_concat_<mode>_1" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") + (vec_concat:VSX_D + (vec_select:<VS_scalar> + (match_operand:VSX_D 1 "gpc_reg_operand" "wa") + (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")])) + (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + HOST_WIDE_INT dword = INTVAL (operands[2]); + if (BYTES_BIG_ENDIAN) + { + operands[4] = GEN_INT (2*dword); + return "xxpermdi %x0,%x1,%x3,%4"; + } + else + { + operands[4] = GEN_INT (!dword); + return "xxpermdi %x0,%x3,%x1,%4"; + } +} + [(set_attr "type" "vecperm")]) + +(define_insn "*vsx_concat_<mode>_2" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") + (vec_concat:VSX_D + (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa") + (vec_select:<VS_scalar> + (match_operand:VSX_D 2 "gpc_reg_operand" "wa") + (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + HOST_WIDE_INT dword = INTVAL (operands[3]); + if (BYTES_BIG_ENDIAN) + { + operands[4] = GEN_INT (dword); + return "xxpermdi %x0,%x1,%x2,%4"; + } + else + { + operands[4] = GEN_INT (2 * !dword); + return "xxpermdi %x0,%x2,%x1,%4"; + } +} + [(set_attr "type" "vecperm")]) + +(define_insn "*vsx_concat_<mode>_3" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") + (vec_concat:VSX_D + (vec_select:<VS_scalar> + (match_operand:VSX_D 1 "gpc_reg_operand" "wa") + (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")])) + (vec_select:<VS_scalar> + (match_operand:VSX_D 3 "gpc_reg_operand" "wa") + (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + HOST_WIDE_INT dword1 = INTVAL (operands[2]); + HOST_WIDE_INT dword2 = INTVAL (operands[4]); + if (BYTES_BIG_ENDIAN) + { + operands[5] = GEN_INT ((2 * dword1) + dword2); + return "xxpermdi %x0,%x1,%x3,%5"; + } + else + { + operands[5] = GEN_INT ((2 * !dword2) + !dword1); + return "xxpermdi %x0,%x3,%x1,%5"; + } +} + [(set_attr "type" "vecperm")]) + ;; Special purpose concat using xxpermdi to glue two single precision values ;; together, relying on the fact that internally scalar floats are represented ;; as doubles. This is used to initialize a V4SF vector with 4 floats @@ -2573,25 +2659,35 @@ DONE; }) -;; Set the element of a V2DI/VD2F mode -(define_insn "vsx_set_<mode>" - [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?<VSa>") - (unspec:VSX_D - [(match_operand:VSX_D 1 "vsx_register_operand" "wd,<VSa>") - (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>") - (match_operand:QI 3 "u5bit_cint_operand" "i,i")] - UNSPEC_VSX_SET))] +;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT +(define_expand "vsx_set_<mode>" + [(use (match_operand:VSX_D 0 "vsx_register_operand")) + (use (match_operand:VSX_D 1 "vsx_register_operand")) + (use (match_operand:<VS_scalar> 2 "gpc_reg_operand")) + (use (match_operand:QI 3 "const_0_to_1_operand"))] "VECTOR_MEM_VSX_P (<MODE>mode)" { - int idx_first = BYTES_BIG_ENDIAN ? 0 : 1; - if (INTVAL (operands[3]) == idx_first) - return \"xxpermdi %x0,%x2,%x1,1\"; - else if (INTVAL (operands[3]) == 1 - idx_first) - return \"xxpermdi %x0,%x1,%x2,0\"; + rtx dest = operands[0]; + rtx vec_reg = operands[1]; + rtx value = operands[2]; + rtx ele = operands[3]; + rtx tmp = gen_reg_rtx (<VS_scalar>mode); + + if (ele == const0_rtx) + { + emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx)); + emit_insn (gen_vsx_concat_<mode> (dest, value, tmp)); + DONE; + } + else if (ele == const1_rtx) + { + emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx)); + emit_insn (gen_vsx_concat_<mode> (dest, tmp, value)); + DONE; + } else gcc_unreachable (); -} - [(set_attr "type" "vecperm")]) +}) ;; Extract a DF/DI element from V2DF/V2DI ;; Optimize cases were we can do a simple or direct move. @@ -2924,7 +3020,7 @@ "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" { /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */ - if (TARGET_VSX_SMALL_INTEGER && TARGET_P9_VECTOR) + if (TARGET_P9_VECTOR) { emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1], operands[2])); @@ -2938,8 +3034,7 @@ (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>") (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")]))) (clobber (match_scratch:SI 3 "=r,X"))] - "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB - && TARGET_VSX_SMALL_INTEGER" + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" { if (which_alternative == 0) return "#"; @@ -2969,8 +3064,7 @@ (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand") (parallel [(match_operand:QI 2 "const_int_operand")]))) (clobber (match_operand:SI 3 "int_reg_operand"))] - "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB - && TARGET_VSX_SMALL_INTEGER && reload_completed" + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed" [(const_int 0)] { rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0])); @@ -2995,8 +3089,7 @@ (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>") (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))) (clobber (match_scratch:SI 3 "=r,X"))] - "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB - && TARGET_VSX_SMALL_INTEGER" + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" "#" "&& reload_completed" [(parallel [(set (match_dup 4) @@ -3016,8 +3109,7 @@ (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))) (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r")) (clobber (match_scratch:SI 4 "=X,&r"))] - "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB - && TARGET_VSX_SMALL_INTEGER" + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" "#" "&& reload_completed" [(parallel [(set (match_dup 3) @@ -3034,8 +3126,7 @@ (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv") (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")]))) (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))] - "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT - && (!TARGET_P9_VECTOR || !TARGET_VSX_SMALL_INTEGER)" + "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR" "#" "&& reload_completed" [(const_int 0)] @@ -3053,15 +3144,7 @@ instruction. */ value = INTVAL (element); if (value != 1) - { - if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER) - { - rtx si_tmp = gen_rtx_REG (SImode, REGNO (vec_tmp)); - emit_insn (gen_vsx_extract_v4si_p9 (si_tmp,src, element)); - } - else - emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element)); - } + emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element)); else vec_tmp = src; @@ -3070,13 +3153,13 @@ if (can_create_pseudo_p ()) dest = rs6000_address_for_fpconvert (dest); - if (TARGET_VSX_SMALL_INTEGER) + if (TARGET_P8_VECTOR) emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp))); else emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp)))); } - else if (TARGET_VSX_SMALL_INTEGER) + else if (TARGET_P8_VECTOR) emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp))); else emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)), @@ -3094,7 +3177,7 @@ (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))) (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))] "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT - && (!TARGET_P9_VECTOR || !TARGET_VSX_SMALL_INTEGER)" + && !TARGET_P9_VECTOR" "#" "&& reload_completed" [(const_int 0)] @@ -3305,7 +3388,7 @@ (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))] "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT - && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER" + && TARGET_P9_VECTOR" "#" "&& reload_completed" [(parallel [(set (match_dup 3) @@ -3329,7 +3412,7 @@ (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))] "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT - && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER" + && TARGET_P9_VECTOR" "#" "&& reload_completed" [(parallel [(set (match_dup 3) @@ -3351,8 +3434,7 @@ (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>") (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")] UNSPEC_VSX_SET))] - "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER - && TARGET_UPPER_REGS_DI && TARGET_POWERPC64" + "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64" { int ele = INTVAL (operands[3]); int nunits = GET_MODE_NUNITS (<MODE>mode); @@ -3376,8 +3458,7 @@ (match_operand:QI 3 "const_0_to_3_operand" "n")] UNSPEC_VSX_SET)) (clobber (match_scratch:SI 4 "=&wJwK"))] - "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER - && TARGET_UPPER_REGS_DI && TARGET_POWERPC64" + "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64" "#" "&& reload_completed" [(set (match_dup 5) @@ -3412,8 +3493,7 @@ (match_operand:QI 3 "const_0_to_3_operand" "n")] UNSPEC_VSX_SET)) (clobber (match_scratch:SI 4 "=&wJwK"))] - "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER - && TARGET_UPPER_REGS_DI && TARGET_POWERPC64" + "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64" "#" "&& reload_completed" [(set (match_dup 4) @@ -3443,8 +3523,7 @@ [(match_operand:QI 3 "const_0_to_3_operand" "n")])) (match_operand:QI 4 "const_0_to_3_operand" "n")] UNSPEC_VSX_SET))] - "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER - && TARGET_UPPER_REGS_DI && TARGET_POWERPC64 + "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64 && (INTVAL (operands[3]) == (VECTOR_ELT_ORDER_BIG ? 1 : 2))" { int ele = INTVAL (operands[4]); @@ -3472,8 +3551,7 @@ UNSPEC_VSX_SET)) (clobber (match_scratch:SI 5 "=&wJwK"))] "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode) - && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER - && TARGET_UPPER_REGS_DI && TARGET_POWERPC64 + && TARGET_P9_VECTOR && TARGET_POWERPC64 && (INTVAL (operands[3]) != (VECTOR_ELT_ORDER_BIG ? 1 : 2))" "#" "&& 1" @@ -4506,7 +4584,65 @@ "xxinsertw %x0,%x1,%3" [(set_attr "type" "vecperm")]) - +;; Generate vector extract four float 32 values from left four elements +;; of eight element vector of float 16 values. +(define_expand "vextract_fp_from_shorth" + [(set (match_operand:V4SF 0 "register_operand" "=wa") + (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")] + UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))] + "TARGET_P9_VECTOR" +{ + int vals[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0}; + int i; + + rtx rvals[16]; + rtx mask = gen_reg_rtx (V16QImode); + rtx tmp = gen_reg_rtx (V16QImode); + rtvec v; + + for (i = 0; i < 16; i++) + rvals[i] = GEN_INT (vals[i]); + + /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16 + inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move + src half words 0,1,2,3 for the conversion instruction. */ + v = gen_rtvec_v (16, rvals); + emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); + emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1], + operands[1], mask)); + emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp)); + DONE; +}) + +;; Generate vector extract four float 32 values from right four elements +;; of eight element vector of float 16 values. +(define_expand "vextract_fp_from_shortl" + [(set (match_operand:V4SF 0 "register_operand" "=wa") + (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")] + UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))] + "TARGET_P9_VECTOR" +{ + int vals[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0}; + int i; + rtx rvals[16]; + rtx mask = gen_reg_rtx (V16QImode); + rtx tmp = gen_reg_rtx (V16QImode); + rtvec v; + + for (i = 0; i < 16; i++) + rvals[i] = GEN_INT (vals[i]); + + /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16 + inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move + src half words 4,5,6,7 for the conversion instruction. */ + v = gen_rtvec_v (16, rvals); + emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); + emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1], + operands[1], mask)); + emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp)); + DONE; +}) + ;; Support for ISA 3.0 vector byte reverse ;; Swap all bytes with in a vector |