diff options
Diffstat (limited to 'gcc/config/aarch64')
-rw-r--r-- | gcc/config/aarch64/aarch64-cores.def | 4 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-option-extensions.def | 4 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-protos.h | 4 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 61 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 316 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.h | 8 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.md | 185 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/constraints.md | 18 | ||||
-rw-r--r-- | gcc/config/aarch64/cortex-a57-fma-steering.c | 15 | ||||
-rw-r--r-- | gcc/config/aarch64/falkor.md | 681 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 14 | ||||
-rw-r--r-- | gcc/config/aarch64/predicates.md | 4 | ||||
-rw-r--r-- | gcc/config/aarch64/rtems.h | 17 |
14 files changed, 1166 insertions, 167 deletions
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index f8342ca722d..10893324d3f 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -65,8 +65,8 @@ AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1) /* Qualcomm ('Q') cores. */ -AARCH64_CORE("falkor", falkor, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x51, 0xC00, -1) -AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x51, 0xC00, -1) +AARCH64_CORE("falkor", falkor, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) +AARCH64_CORE("qdf24xx", qdf24xx, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) /* Samsung ('S') cores. */ AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index c0752ce3470..c4f059ab7c5 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -63,4 +63,8 @@ AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, 0, "fphp asimdhp") /* Enabling or disabling "rcpc" only changes "rcpc". */ AARCH64_OPT_EXTENSION("rcpc", AARCH64_FL_RCPC, 0, 0, "lrcpc") +/* Enabling "rdma" also enables "fp", "simd". + Disabling "rdma" just disables "rdma". */ +AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, AARCH64_FL_FP | AARCH64_FL_SIMD, 0, "rdma") + #undef AARCH64_OPT_EXTENSION diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index e397ff4afa7..beff28e2272 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -319,6 +319,7 @@ unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in); bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode); int aarch64_branch_cost (bool, bool); enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx); +bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode); bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT); bool aarch64_constant_address_p (rtx); bool aarch64_emit_approx_div (rtx, rtx, rtx); @@ -326,6 +327,7 @@ bool aarch64_emit_approx_sqrt (rtx, rtx, bool); void aarch64_expand_call (rtx, rtx, bool); bool aarch64_expand_movmem (rtx *); bool aarch64_float_const_zero_rtx_p (rtx); +bool aarch64_float_const_rtx_p (rtx); bool aarch64_function_arg_regno_p (unsigned); bool aarch64_fusion_enabled_p (enum aarch64_fusion_pairs); bool aarch64_gen_movmemqi (rtx *); @@ -351,9 +353,9 @@ bool aarch64_pad_arg_upward (machine_mode, const_tree); bool aarch64_pad_reg_upward (machine_mode, const_tree, bool); bool aarch64_regno_ok_for_base_p (int, bool); bool aarch64_regno_ok_for_index_p (int, bool); +bool aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *fail); bool aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode, bool high); -bool aarch64_simd_imm_scalar_p (rtx x, machine_mode mode); bool aarch64_simd_imm_zero_p (rtx, machine_mode); bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode); bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool); diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 1cb6eeb3187..f74b68775cf 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -351,6 +351,35 @@ } ) +(define_expand "xorsign<mode>3" + [(match_operand:VHSDF 0 "register_operand") + (match_operand:VHSDF 1 "register_operand") + (match_operand:VHSDF 2 "register_operand")] + "TARGET_SIMD" +{ + + machine_mode imode = <V_cmp_result>mode; + rtx v_bitmask = gen_reg_rtx (imode); + rtx op1x = gen_reg_rtx (imode); + rtx op2x = gen_reg_rtx (imode); + + rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode); + rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode); + + int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; + + emit_move_insn (v_bitmask, + aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode, + HOST_WIDE_INT_M1U << bits)); + + emit_insn (gen_and<v_cmp_result>3 (op2x, v_bitmask, arg2)); + emit_insn (gen_xor<v_cmp_result>3 (op1x, arg1, op2x)); + emit_move_insn (operands[0], + lowpart_subreg (<MODE>mode, op1x, imode)); + DONE; +} +) + (define_expand "copysign<mode>3" [(match_operand:VHSDF 0 "register_operand") (match_operand:VHSDF 1 "register_operand") @@ -1033,6 +1062,18 @@ [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] ) +(define_insn "*aarch64_mla_elt_merge<mode>" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (plus:VDQHS + (mult:VDQHS (vec_duplicate:VDQHS + (match_operand:<VEL> 1 "register_operand" "w")) + (match_operand:VDQHS 2 "register_operand" "w")) + (match_operand:VDQHS 3 "register_operand" "0")))] + "TARGET_SIMD" + "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" + [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] +) + (define_insn "aarch64_mls<mode>" [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0") @@ -1080,6 +1121,18 @@ [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] ) +(define_insn "*aarch64_mls_elt_merge<mode>" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (minus:VDQHS + (match_operand:VDQHS 1 "register_operand" "0") + (mult:VDQHS (vec_duplicate:VDQHS + (match_operand:<VEL> 2 "register_operand" "w")) + (match_operand:VDQHS 3 "register_operand" "w"))))] + "TARGET_SIMD" + "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]" + [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] +) + ;; Max/Min operations. (define_insn "<su><maxmin><mode>3" [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") @@ -5593,9 +5646,9 @@ DONE; }) -;; Standard pattern name vec_init<mode>. +;; Standard pattern name vec_init<mode><Vel>. -(define_expand "vec_init<mode>" +(define_expand "vec_init<mode><Vel>" [(match_operand:VALL_F16 0 "register_operand" "") (match_operand 1 "" "")] "TARGET_SIMD" @@ -5650,9 +5703,9 @@ "urecpe\\t%0.<Vtype>, %1.<Vtype>" [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]) -;; Standard pattern name vec_extract<mode>. +;; Standard pattern name vec_extract<mode><Vel>. -(define_expand "vec_extract<mode>" +(define_expand "vec_extract<mode><Vel>" [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "") (match_operand:VALL_F16 1 "register_operand" "") (match_operand:SI 2 "immediate_operand" "")] diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index b8a4160d9de..28c4e0e6476 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -33,6 +33,7 @@ #include "df.h" #include "tm_p.h" #include "stringpool.h" +#include "attribs.h" #include "optabs.h" #include "regs.h" #include "emit-rtl.h" @@ -147,6 +148,8 @@ static bool aarch64_builtin_support_vector_misalignment (machine_mode mode, const_tree type, int misalignment, bool is_packed); +static machine_mode +aarch64_simd_container_mode (machine_mode mode, unsigned width); /* Major revision number of the ARM Architecture implemented by the target. */ unsigned aarch64_architecture_version; @@ -206,22 +209,6 @@ static const struct cpu_addrcost_table generic_addrcost_table = 0 /* imm_offset */ }; -static const struct cpu_addrcost_table cortexa57_addrcost_table = -{ - { - 1, /* hi */ - 0, /* si */ - 0, /* di */ - 1, /* ti */ - }, - 0, /* pre_modify */ - 0, /* post_modify */ - 0, /* register_offset */ - 0, /* register_sextend */ - 0, /* register_zextend */ - 0, /* imm_offset */ -}; - static const struct cpu_addrcost_table exynosm1_addrcost_table = { { @@ -254,22 +241,6 @@ static const struct cpu_addrcost_table xgene1_addrcost_table = 0, /* imm_offset */ }; -static const struct cpu_addrcost_table qdf24xx_addrcost_table = -{ - { - 1, /* hi */ - 0, /* si */ - 0, /* di */ - 1, /* ti */ - }, - 0, /* pre_modify */ - 0, /* post_modify */ - 0, /* register_offset */ - 0, /* register_sextend */ - 0, /* register_zextend */ - 0 /* imm_offset */ -}; - static const struct cpu_addrcost_table thunderx2t99_addrcost_table = { { @@ -390,13 +361,13 @@ static const struct cpu_vector_cost thunderx_vector_cost = 3, /* scalar_load_cost */ 1, /* scalar_store_cost */ 4, /* vec_int_stmt_cost */ - 4, /* vec_fp_stmt_cost */ + 1, /* vec_fp_stmt_cost */ 4, /* vec_permute_cost */ 2, /* vec_to_scalar_cost */ 2, /* scalar_to_vec_cost */ 3, /* vec_align_load_cost */ - 10, /* vec_unalign_load_cost */ - 10, /* vec_unalign_store_cost */ + 5, /* vec_unalign_load_cost */ + 5, /* vec_unalign_store_cost */ 1, /* vec_store_cost */ 3, /* cond_taken_branch_cost */ 3 /* cond_not_taken_branch_cost */ @@ -488,20 +459,6 @@ static const struct cpu_branch_cost generic_branch_cost = 3 /* Unpredictable. */ }; -/* Branch costs for Cortex-A57. */ -static const struct cpu_branch_cost cortexa57_branch_cost = -{ - 1, /* Predictable. */ - 3 /* Unpredictable. */ -}; - -/* Branch costs for Vulcan. */ -static const struct cpu_branch_cost thunderx2t99_branch_cost = -{ - 1, /* Predictable. */ - 3 /* Unpredictable. */ -}; - /* Generic approximation modes. */ static const cpu_approx_modes generic_approx_modes = { @@ -612,7 +569,7 @@ static const struct tune_params cortexa35_tunings = &generic_addrcost_table, &cortexa53_regmove_cost, &generic_vector_cost, - &cortexa57_branch_cost, + &generic_branch_cost, &generic_approx_modes, 4, /* memmov_cost */ 1, /* issue_rate */ @@ -638,7 +595,7 @@ static const struct tune_params cortexa53_tunings = &generic_addrcost_table, &cortexa53_regmove_cost, &generic_vector_cost, - &cortexa57_branch_cost, + &generic_branch_cost, &generic_approx_modes, 4, /* memmov_cost */ 2, /* issue_rate */ @@ -661,10 +618,10 @@ static const struct tune_params cortexa53_tunings = static const struct tune_params cortexa57_tunings = { &cortexa57_extra_costs, - &cortexa57_addrcost_table, + &generic_addrcost_table, &cortexa57_regmove_cost, &cortexa57_vector_cost, - &cortexa57_branch_cost, + &generic_branch_cost, &generic_approx_modes, 4, /* memmov_cost */ 3, /* issue_rate */ @@ -687,10 +644,10 @@ static const struct tune_params cortexa57_tunings = static const struct tune_params cortexa72_tunings = { &cortexa57_extra_costs, - &cortexa57_addrcost_table, + &generic_addrcost_table, &cortexa57_regmove_cost, &cortexa57_vector_cost, - &cortexa57_branch_cost, + &generic_branch_cost, &generic_approx_modes, 4, /* memmov_cost */ 3, /* issue_rate */ @@ -713,10 +670,10 @@ static const struct tune_params cortexa72_tunings = static const struct tune_params cortexa73_tunings = { &cortexa57_extra_costs, - &cortexa57_addrcost_table, + &generic_addrcost_table, &cortexa57_regmove_cost, &cortexa57_vector_cost, - &cortexa57_branch_cost, + &generic_branch_cost, &generic_approx_modes, 4, /* memmov_cost. */ 2, /* issue_rate. */ @@ -842,7 +799,7 @@ static const struct tune_params xgene1_tunings = static const struct tune_params qdf24xx_tunings = { &qdf24xx_extra_costs, - &qdf24xx_addrcost_table, + &generic_addrcost_table, &qdf24xx_regmove_cost, &generic_vector_cost, &generic_branch_cost, @@ -871,7 +828,7 @@ static const struct tune_params thunderx2t99_tunings = &thunderx2t99_addrcost_table, &thunderx2t99_regmove_cost, &thunderx2t99_vector_cost, - &thunderx2t99_branch_cost, + &generic_branch_cost, &generic_approx_modes, 4, /* memmov_cost. */ 4, /* issue_rate. */ @@ -1876,6 +1833,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, return 1; } + /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff + (with XXXX non-zero). In that case check to see if the move can be done in + a smaller mode. */ + val2 = val & 0xffffffff; + if (mode == DImode + && aarch64_move_imm (val2, SImode) + && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0)) + { + if (generate) + emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); + + /* Check if we have to emit a second instruction by checking to see + if any of the upper 32 bits of the original DI mode value is set. */ + if (val == val2) + return 1; + + i = (val >> 48) ? 48 : 32; + + if (generate) + emit_insn (gen_insv_immdi (dest, GEN_INT (i), + GEN_INT ((val >> i) & 0xffff))); + + return 2; + } + if ((val >> 32) == 0 || mode == SImode) { if (generate) @@ -3088,7 +3070,7 @@ aarch64_pushwb_single_reg (machine_mode mode, unsigned regno, reg = gen_rtx_REG (mode, regno); mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx, plus_constant (Pmode, base_rtx, -adjustment)); - mem = gen_rtx_MEM (mode, mem); + mem = gen_frame_mem (mode, mem); insn = emit_move_insn (mem, reg); RTX_FRAME_RELATED_P (insn) = 1; @@ -3176,7 +3158,7 @@ aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment, { rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment); mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem); - emit_move_insn (reg1, gen_rtx_MEM (mode, mem)); + emit_move_insn (reg1, gen_frame_mem (mode, mem)); } else { @@ -3252,8 +3234,6 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset, unsigned start, unsigned limit, bool skip_wb) { rtx_insn *insn; - rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed - ? gen_frame_mem : gen_rtx_MEM); unsigned regno; unsigned regno2; @@ -3274,8 +3254,8 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset, reg = gen_rtx_REG (mode, regno); offset = start_offset + cfun->machine->frame.reg_offset[regno]; - mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx, - offset)); + mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, + offset)); regno2 = aarch64_next_callee_save (regno + 1, limit); @@ -3289,8 +3269,8 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset, rtx mem2; offset = start_offset + cfun->machine->frame.reg_offset[regno2]; - mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx, - offset)); + mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, + offset)); insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2)); @@ -3319,8 +3299,6 @@ aarch64_restore_callee_saves (machine_mode mode, unsigned limit, bool skip_wb, rtx *cfi_ops) { rtx base_rtx = stack_pointer_rtx; - rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed - ? gen_frame_mem : gen_rtx_MEM); unsigned regno; unsigned regno2; HOST_WIDE_INT offset; @@ -3341,7 +3319,7 @@ aarch64_restore_callee_saves (machine_mode mode, reg = gen_rtx_REG (mode, regno); offset = start_offset + cfun->machine->frame.reg_offset[regno]; - mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset)); + mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); regno2 = aarch64_next_callee_save (regno + 1, limit); @@ -3354,7 +3332,7 @@ aarch64_restore_callee_saves (machine_mode mode, rtx mem2; offset = start_offset + cfun->machine->frame.reg_offset[regno2]; - mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset)); + mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2)); *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops); @@ -4723,6 +4701,74 @@ aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode) return true; } +/* Return the binary representation of floating point constant VALUE in INTVAL. + If the value cannot be converted, return false without setting INTVAL. + The conversion is done in the given MODE. */ +bool +aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval) +{ + + /* We make a general exception for 0. */ + if (aarch64_float_const_zero_rtx_p (value)) + { + *intval = 0; + return true; + } + + machine_mode mode = GET_MODE (value); + if (GET_CODE (value) != CONST_DOUBLE + || !SCALAR_FLOAT_MODE_P (mode) + || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT + /* Only support up to DF mode. */ + || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode)) + return false; + + unsigned HOST_WIDE_INT ival = 0; + + long res[2]; + real_to_target (res, + CONST_DOUBLE_REAL_VALUE (value), + REAL_MODE_FORMAT (mode)); + + if (mode == DFmode) + { + int order = BYTES_BIG_ENDIAN ? 1 : 0; + ival = zext_hwi (res[order], 32); + ival |= (zext_hwi (res[1 - order], 32) << 32); + } + else + ival = zext_hwi (res[0], 32); + + *intval = ival; + return true; +} + +/* Return TRUE if rtx X is an immediate constant that can be moved using a + single MOV(+MOVK) followed by an FMOV. */ +bool +aarch64_float_const_rtx_p (rtx x) +{ + machine_mode mode = GET_MODE (x); + if (mode == VOIDmode) + return false; + + /* Determine whether it's cheaper to write float constants as + mov/movk pairs over ldr/adrp pairs. */ + unsigned HOST_WIDE_INT ival; + + if (GET_CODE (x) == CONST_DOUBLE + && SCALAR_FLOAT_MODE_P (mode) + && aarch64_reinterpret_float_as_int (x, &ival)) + { + machine_mode imode = mode == HFmode ? SImode : int_mode_for_mode (mode); + int num_instr = aarch64_internal_mov_immediate + (NULL_RTX, gen_int_mode (ival, imode), false, imode); + return num_instr < 3; + } + + return false; +} + /* Return TRUE if rtx X is immediate constant 0.0 */ bool aarch64_float_const_zero_rtx_p (rtx x) @@ -4735,6 +4781,49 @@ aarch64_float_const_zero_rtx_p (rtx x) return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0); } +/* Return TRUE if rtx X is immediate constant that fits in a single + MOVI immediate operation. */ +bool +aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode) +{ + if (!TARGET_SIMD) + return false; + + machine_mode vmode, imode; + unsigned HOST_WIDE_INT ival; + + if (GET_CODE (x) == CONST_DOUBLE + && SCALAR_FLOAT_MODE_P (mode)) + { + if (!aarch64_reinterpret_float_as_int (x, &ival)) + return false; + + /* We make a general exception for 0. */ + if (aarch64_float_const_zero_rtx_p (x)) + return true; + + imode = int_mode_for_mode (mode); + } + else if (GET_CODE (x) == CONST_INT + && SCALAR_INT_MODE_P (mode)) + { + imode = mode; + ival = INTVAL (x); + } + else + return false; + + /* use a 64 bit mode for everything except for DI/DF mode, where we use + a 128 bit vector mode. */ + int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64; + + vmode = aarch64_simd_container_mode (imode, width); + rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival); + + return aarch64_simd_valid_immediate (v_op, vmode, false, NULL); +} + + /* Return the fixed registers used for condition codes. */ static bool @@ -5929,12 +6018,6 @@ aarch64_preferred_reload_class (rtx x, reg_class_t regclass) return NO_REGS; } - /* If it's an integer immediate that MOVI can't handle, then - FP_REGS is not an option, so we return NO_REGS instead. */ - if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS) - && !aarch64_simd_imm_scalar_p (x, GET_MODE (x))) - return NO_REGS; - /* Register eliminiation can result in a request for SP+constant->FP_REGS. We cannot support such operations which use SP as source and an FP_REG as destination, so reject out @@ -6884,6 +6967,25 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, return true; case CONST_DOUBLE: + + /* First determine number of instructions to do the move + as an integer constant. */ + if (!aarch64_float_const_representable_p (x) + && !aarch64_can_const_movi_rtx_p (x, mode) + && aarch64_float_const_rtx_p (x)) + { + unsigned HOST_WIDE_INT ival; + bool succeed = aarch64_reinterpret_float_as_int (x, &ival); + gcc_assert (succeed); + + machine_mode imode = mode == HFmode ? SImode + : int_mode_for_mode (mode); + int ncost = aarch64_internal_mov_immediate + (NULL_RTX, gen_int_mode (ival, imode), false, imode); + *cost += COSTS_N_INSNS (ncost); + return true; + } + if (speed) { /* mov[df,sf]_aarch64. */ @@ -10193,7 +10295,7 @@ aarch64_classify_symbol (rtx x, rtx offset) /* This is alright even in PIC code as the constant pool reference is always PC relative and within the same translation unit. */ - if (CONSTANT_POOL_ADDRESS_P (x)) + if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x)) return SYMBOL_SMALL_ABSOLUTE; else return SYMBOL_FORCE_TO_MEM; @@ -10228,18 +10330,16 @@ aarch64_legitimate_pic_operand_p (rtx x) /* Return true if X holds either a quarter-precision or floating-point +0.0 constant. */ static bool -aarch64_valid_floating_const (machine_mode mode, rtx x) +aarch64_valid_floating_const (rtx x) { if (!CONST_DOUBLE_P (x)) return false; - if (aarch64_float_const_zero_rtx_p (x)) + /* This call determines which constants can be used in mov<mode> + as integer moves instead of constant loads. */ + if (aarch64_float_const_rtx_p (x)) return true; - /* We only handle moving 0.0 to a TFmode register. */ - if (!(mode == SFmode || mode == DFmode)) - return false; - return aarch64_float_const_representable_p (x); } @@ -10251,11 +10351,15 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x) if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode)) return false; - /* This could probably go away because - we now decompose CONST_INTs according to expand_mov_immediate. */ + /* For these cases we never want to use a literal load. + As such we have to prevent the compiler from forcing these + to memory. */ if ((GET_CODE (x) == CONST_VECTOR && aarch64_simd_valid_immediate (x, mode, false, NULL)) - || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x)) + || CONST_INT_P (x) + || aarch64_valid_floating_const (x) + || aarch64_can_const_movi_rtx_p (x, mode) + || aarch64_float_const_rtx_p (x)) return !targetm.cannot_force_const_mem (mode, x); if (GET_CODE (x) == HIGH @@ -11538,23 +11642,6 @@ aarch64_mask_from_zextract_ops (rtx width, rtx pos) } bool -aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED) -{ - HOST_WIDE_INT imm = INTVAL (x); - int i; - - for (i = 0; i < 8; i++) - { - unsigned int byte = imm & 0xff; - if (byte != 0xff && byte != 0) - return false; - imm >>= 8; - } - - return true; -} - -bool aarch64_mov_operand_p (rtx x, machine_mode mode) { if (GET_CODE (x) == HIGH @@ -12945,15 +13032,28 @@ aarch64_output_simd_mov_immediate (rtx const_vector, } char* -aarch64_output_scalar_simd_mov_immediate (rtx immediate, - machine_mode mode) +aarch64_output_scalar_simd_mov_immediate (rtx immediate, machine_mode mode) { + + /* If a floating point number was passed and we desire to use it in an + integer mode do the conversion to integer. */ + if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT) + { + unsigned HOST_WIDE_INT ival; + if (!aarch64_reinterpret_float_as_int (immediate, &ival)) + gcc_unreachable (); + immediate = gen_int_mode (ival, mode); + } + machine_mode vmode; + /* use a 64 bit mode for everything except for DI/DF mode, where we use + a 128 bit vector mode. */ + int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64; gcc_assert (!VECTOR_MODE_P (mode)); - vmode = aarch64_simd_container_mode (mode, 64); + vmode = aarch64_simd_container_mode (mode, width); rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate)); - return aarch64_output_simd_mov_immediate (v_op, vmode, 64); + return aarch64_output_simd_mov_immediate (v_op, vmode, width); } /* Split operands into moves from op[1] + op[2] into op[0]. */ diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 106cf3a5666..7f91edb5713 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -144,7 +144,8 @@ extern unsigned aarch64_architecture_version; #define AARCH64_FL_CRC (1 << 3) /* Has CRC. */ /* ARMv8.1-A architecture extensions. */ #define AARCH64_FL_LSE (1 << 4) /* Has Large System Extensions. */ -#define AARCH64_FL_V8_1 (1 << 5) /* Has ARMv8.1-A extensions. */ +#define AARCH64_FL_RDMA (1 << 5) /* Has Round Double Multiply Add. */ +#define AARCH64_FL_V8_1 (1 << 6) /* Has ARMv8.1-A extensions. */ /* ARMv8.2-A architecture extensions. */ #define AARCH64_FL_V8_2 (1 << 8) /* Has ARMv8.2-A features. */ #define AARCH64_FL_F16 (1 << 9) /* Has ARMv8.2-A FP16 extensions. */ @@ -161,7 +162,8 @@ extern unsigned aarch64_architecture_version; /* Architecture flags that effect instruction selection. */ #define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD) #define AARCH64_FL_FOR_ARCH8_1 \ - (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC | AARCH64_FL_V8_1) + (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC \ + | AARCH64_FL_RDMA | AARCH64_FL_V8_1) #define AARCH64_FL_FOR_ARCH8_2 \ (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2) #define AARCH64_FL_FOR_ARCH8_3 \ @@ -174,7 +176,7 @@ extern unsigned aarch64_architecture_version; #define AARCH64_ISA_FP (aarch64_isa_flags & AARCH64_FL_FP) #define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD) #define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE) -#define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_V8_1) +#define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_RDMA) #define AARCH64_ISA_V8_2 (aarch64_isa_flags & AARCH64_FL_V8_2) #define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16) #define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index f876a2b7208..64b60a903ed 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -181,6 +181,11 @@ ;; will be disabled when !TARGET_FLOAT. (define_attr "fp" "no,yes" (const_string "no")) +;; Attribute that specifies whether or not the instruction touches half +;; precision fp registers. When this is set to yes for an alternative, +;; that alternative will be disabled when !TARGET_FP_F16INST. +(define_attr "fp16" "no,yes" (const_string "no")) + ;; Attribute that specifies whether or not the instruction touches simd ;; registers. When this is set to yes for an alternative, that alternative ;; will be disabled when !TARGET_SIMD. @@ -194,11 +199,14 @@ ;; registers when -mgeneral-regs-only is specified. (define_attr "enabled" "no,yes" (cond [(ior - (and (eq_attr "fp" "yes") - (eq (symbol_ref "TARGET_FLOAT") (const_int 0))) - (and (eq_attr "simd" "yes") - (eq (symbol_ref "TARGET_SIMD") (const_int 0)))) - (const_string "no") + (ior + (and (eq_attr "fp" "yes") + (eq (symbol_ref "TARGET_FLOAT") (const_int 0))) + (and (eq_attr "simd" "yes") + (eq (symbol_ref "TARGET_SIMD") (const_int 0)))) + (and (eq_attr "fp16" "yes") + (eq (symbol_ref "TARGET_FP_F16INST") (const_int 0)))) + (const_string "no") ] (const_string "yes"))) ;; Attribute that specifies whether we are dealing with a branch to a @@ -223,6 +231,7 @@ (include "../arm/cortex-a53.md") (include "../arm/cortex-a57.md") (include "../arm/exynos-m1.md") +(include "falkor.md") (include "thunderx.md") (include "../arm/xgene1.md") (include "thunderx2t99.md") @@ -920,8 +929,8 @@ ) (define_insn_and_split "*movsi_aarch64" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w,r,*w") - (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w, r,*w,w") + (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Ds"))] "(register_operand (operands[0], SImode) || aarch64_reg_or_zero (operands[1], SImode))" "@ @@ -938,8 +947,9 @@ adrp\\t%x0, %A1 fmov\\t%s0, %w1 fmov\\t%w0, %s1 - fmov\\t%s0, %s1" - "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode) + fmov\\t%s0, %s1 + * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);" + "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode) && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" [(const_int 0)] "{ @@ -947,13 +957,14 @@ DONE; }" [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\ - adr,adr,f_mcr,f_mrc,fmov") - (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")] + adr,adr,f_mcr,f_mrc,fmov,neon_move") + (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") + (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] ) (define_insn_and_split "*movdi_aarch64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r, *w,r,*w,w") - (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Dd"))] + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r,*w,m, m,r,r, *w,r,*w,w") + (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Dd"))] "(register_operand (operands[0], DImode) || aarch64_reg_or_zero (operands[1], DImode))" "@ @@ -961,6 +972,7 @@ mov\\t%0, %x1 mov\\t%x0, %1 mov\\t%x0, %1 + mov\\t%w0, %1 # ldr\\t%x0, %1 ldr\\t%d0, %1 @@ -971,7 +983,7 @@ fmov\\t%d0, %x1 fmov\\t%x0, %d1 fmov\\t%d0, %d1 - movi\\t%d0, %1" + * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);" "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode)) && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" [(const_int 0)] @@ -979,10 +991,10 @@ aarch64_expand_mov_immediate (operands[0], operands[1]); DONE; }" - [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\ - adr,adr,f_mcr,f_mrc,fmov,neon_move") - (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") - (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load1,\ + load1,store1,store1,adr,adr,f_mcr,f_mrc,fmov,neon_move") + (set_attr "fp" "*,*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") + (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] ) (define_insn "insv_imm<mode>" @@ -1062,28 +1074,31 @@ ) (define_insn "*movhf_aarch64" - [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w,m,r,m ,r") - (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,m,w,m,rY,r"))] + [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r") + (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))] "TARGET_FLOAT && (register_operand (operands[0], HFmode) || aarch64_reg_or_fp_zero (operands[1], HFmode))" "@ movi\\t%0.4h, #0 - mov\\t%0.h[0], %w1 + fmov\\t%h0, %w1 umov\\t%w0, %1.h[0] mov\\t%0.h[0], %1.h[0] + fmov\\t%h0, %1 + * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); ldr\\t%h0, %1 str\\t%h1, %0 ldrh\\t%w0, %1 strh\\t%w1, %0 mov\\t%w0, %w1" - [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\ - f_loads,f_stores,load1,store1,mov_reg") - (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")] + [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \ + neon_move,f_loads,f_stores,load1,store1,mov_reg") + (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*") + (set_attr "fp16" "*,yes,*,*,yes,*,*,*,*,*,*")] ) (define_insn "*movsf_aarch64" - [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r") - (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))] + [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r") + (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))] "TARGET_FLOAT && (register_operand (operands[0], SFmode) || aarch64_reg_or_fp_zero (operands[1], SFmode))" "@ @@ -1092,19 +1107,22 @@ fmov\\t%w0, %s1 fmov\\t%s0, %s1 fmov\\t%s0, %1 + * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); ldr\\t%s0, %1 str\\t%s1, %0 ldr\\t%w0, %1 str\\t%w1, %0 - mov\\t%w0, %w1" - [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\ - f_loads,f_stores,load1,store1,mov_reg") - (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")] + mov\\t%w0, %w1 + mov\\t%w0, %1" + [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\ + f_loads,f_stores,load1,store1,mov_reg,\ + fconsts") + (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")] ) (define_insn "*movdf_aarch64" - [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r") - (match_operand:DF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))] + [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r") + (match_operand:DF 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))] "TARGET_FLOAT && (register_operand (operands[0], DFmode) || aarch64_reg_or_fp_zero (operands[1], DFmode))" "@ @@ -1113,14 +1131,37 @@ fmov\\t%x0, %d1 fmov\\t%d0, %d1 fmov\\t%d0, %1 + * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode); ldr\\t%d0, %1 str\\t%d1, %0 ldr\\t%x0, %1 str\\t%x1, %0 - mov\\t%x0, %x1" - [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\ - f_loadd,f_stored,load1,store1,mov_reg") - (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")] + mov\\t%x0, %x1 + mov\\t%x0, %1" + [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\ + f_loadd,f_stored,load1,store1,mov_reg,\ + fconstd") + (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")] +) + +(define_split + [(set (match_operand:GPF_HF 0 "nonimmediate_operand") + (match_operand:GPF_HF 1 "general_operand"))] + "can_create_pseudo_p () + && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode) + && !aarch64_float_const_representable_p (operands[1]) + && aarch64_float_const_rtx_p (operands[1])" + [(const_int 0)] + { + unsigned HOST_WIDE_INT ival; + if (!aarch64_reinterpret_float_as_int (operands[1], &ival)) + FAIL; + + rtx tmp = gen_reg_rtx (<FCVT_TARGET>mode); + emit_move_insn (tmp, gen_int_mode (ival, <FCVT_TARGET>mode)); + emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp)); + DONE; + } ) (define_insn "*movtf_aarch64" @@ -3835,6 +3876,22 @@ [(set_attr "type" "logics_reg,logics_imm")] ) +(define_split + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:GPI (match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "aarch64_mov_imm_operand")) + (const_int 0))) + (clobber (match_operand:SI 2 "register_operand"))] + "" + [(set (match_dup 2) (match_dup 1)) + (set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:GPI (match_dup 0) + (match_dup 2)) + (const_int 0)))] +) + (define_insn "*and<mode>3nr_compare0_zextract" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ @@ -3870,6 +3927,26 @@ [(set_attr "type" "logics_shift_imm")] ) +(define_split + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:GPI (SHIFT:GPI + (match_operand:GPI 0 "register_operand") + (match_operand:QI 1 "aarch64_shift_imm_<mode>")) + (match_operand:GPI 2 "aarch64_mov_imm_operand")) + (const_int 0))) + (clobber (match_operand:SI 3 "register_operand"))] + "" + [(set (match_dup 3) (match_dup 2)) + (set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:GPI (SHIFT:GPI + (match_dup 0) + (match_dup 1)) + (match_dup 3)) + (const_int 0)))] +) + ;; ------------------------------------------------------------------- ;; Shifts ;; ------------------------------------------------------------------- @@ -5102,6 +5179,42 @@ } ) +;; For xorsign (x, y), we want to generate: +;; +;; LDR d2, #1<<63 +;; AND v3.8B, v1.8B, v2.8B +;; EOR v0.8B, v0.8B, v3.8B +;; + +(define_expand "xorsign<mode>3" + [(match_operand:GPF 0 "register_operand") + (match_operand:GPF 1 "register_operand") + (match_operand:GPF 2 "register_operand")] + "TARGET_FLOAT && TARGET_SIMD" +{ + + machine_mode imode = <V_cmp_result>mode; + rtx mask = gen_reg_rtx (imode); + rtx op1x = gen_reg_rtx (imode); + rtx op2x = gen_reg_rtx (imode); + + int bits = GET_MODE_BITSIZE (<MODE>mode) - 1; + emit_move_insn (mask, GEN_INT (trunc_int_for_mode (HOST_WIDE_INT_M1U << bits, + imode))); + + emit_insn (gen_and<v_cmp_result>3 (op2x, mask, + lowpart_subreg (imode, operands[2], + <MODE>mode))); + emit_insn (gen_xor<v_cmp_result>3 (op1x, + lowpart_subreg (imode, operands[1], + <MODE>mode), + op2x)); + emit_move_insn (operands[0], + lowpart_subreg (<MODE>mode, op1x, imode)); + DONE; +} +) + ;; ------------------------------------------------------------------- ;; Reload support ;; ------------------------------------------------------------------- diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 0753da32f59..d7b30b0e5ee 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -12162,7 +12162,7 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) /* ARMv8.1-A instrinsics. */ #pragma GCC push_options -#pragma GCC target ("arch=armv8.1-a") +#pragma GCC target ("+nothing+rdma") __extension__ extern __inline int16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md index 88e840f2898..9ce3d4efaf3 100644 --- a/gcc/config/aarch64/constraints.md +++ b/gcc/config/aarch64/constraints.md @@ -176,6 +176,12 @@ (and (match_code "const_double") (match_test "aarch64_float_const_representable_p (op)"))) +(define_constraint "Uvi" + "A floating point constant which can be used with a\ + MOVI immediate operation." + (and (match_code "const_double") + (match_test "aarch64_can_const_movi_rtx_p (op, GET_MODE (op))"))) + (define_constraint "Dn" "@internal A constraint that matches vector of immediates." @@ -220,9 +226,17 @@ (define_constraint "Dd" "@internal - A constraint that matches an immediate operand valid for AdvSIMD scalar." + A constraint that matches an integer immediate operand valid\ + for AdvSIMD scalar operations in DImode." + (and (match_code "const_int") + (match_test "aarch64_can_const_movi_rtx_p (op, DImode)"))) + +(define_constraint "Ds" + "@internal + A constraint that matches an integer immediate operand valid\ + for AdvSIMD scalar operations in SImode." (and (match_code "const_int") - (match_test "aarch64_simd_imm_scalar_p (op, GET_MODE (op))"))) + (match_test "aarch64_can_const_movi_rtx_p (op, SImode)"))) (define_address_constraint "Dp" "@internal diff --git a/gcc/config/aarch64/cortex-a57-fma-steering.c b/gcc/config/aarch64/cortex-a57-fma-steering.c index 6d90acdd4a2..fa8c56aab02 100644 --- a/gcc/config/aarch64/cortex-a57-fma-steering.c +++ b/gcc/config/aarch64/cortex-a57-fma-steering.c @@ -973,10 +973,17 @@ func_fma_steering::analyze () break; } - /* We didn't find a chain with a def for this instruction. */ - gcc_assert (i < dest_op_info->n_chains); - - this->analyze_fma_fmul_insn (forest, chain, head); + /* Due to implementation of regrename, dest register can slip away + from regrename's analysis. As a result, there is no chain for + the destination register of insn. We simply skip the insn even + it is a fmul/fmac instruction. This can happen when the dest + register is also a source register of insn and one of the below + conditions is satisfied: + 1) the source reg is setup in larger mode than this insn; + 2) the source reg is uninitialized; + 3) the source reg is passed in as parameter. */ + if (i < dest_op_info->n_chains) + this->analyze_fma_fmul_insn (forest, chain, head); } } free (bb_dfs_preorder); diff --git a/gcc/config/aarch64/falkor.md b/gcc/config/aarch64/falkor.md new file mode 100644 index 00000000000..b422ab30c44 --- /dev/null +++ b/gcc/config/aarch64/falkor.md @@ -0,0 +1,681 @@ +;; Falkor pipeline description +;; Copyright (C) 2017 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "falkor") + +;; Complex int instructions (e.g. multiply and divide) execute in the X +;; pipeline. Simple int instructions execute in the X, Y, and Z pipelines. + +(define_cpu_unit "falkor_x" "falkor") +(define_cpu_unit "falkor_y" "falkor") +(define_cpu_unit "falkor_z" "falkor") + +;; Branches execute in the B pipeline or in one of the int pipelines depending +;; on how complex it is. Simple int insns (like movz) can also execute here. + +(define_cpu_unit "falkor_b" "falkor") + +;; Vector and FP insns execute in the VX and VY pipelines. + +(define_automaton "falkor_vfp") + +(define_cpu_unit "falkor_vx" "falkor_vfp") +(define_cpu_unit "falkor_vy" "falkor_vfp") + +;; Loads execute in the LD pipeline. +;; Stores execute in the ST, SD, and VSD pipelines, for address, data, and +;; vector data. + +(define_automaton "falkor_mem") + +(define_cpu_unit "falkor_ld" "falkor_mem") +(define_cpu_unit "falkor_st" "falkor_mem") +(define_cpu_unit "falkor_sd" "falkor_mem") +(define_cpu_unit "falkor_vsd" "falkor_mem") + +;; The GTOV and VTOG pipelines are for general to vector reg moves, and vice +;; versa. + +(define_cpu_unit "falkor_gtov" "falkor") +(define_cpu_unit "falkor_vtog" "falkor") + +;; Common reservation combinations. + +(define_reservation "falkor_vxvy" "falkor_vx|falkor_vy") +(define_reservation "falkor_zb" "falkor_z|falkor_b") +(define_reservation "falkor_xyz" "falkor_x|falkor_y|falkor_z") +(define_reservation "falkor_xyzb" "falkor_x|falkor_y|falkor_z|falkor_b") + +;; SIMD Floating-Point Instructions + +(define_insn_reservation "falkor_afp_1_vxvy" 1 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_neg_s,neon_fp_neg_d,neon_fp_abs_s,neon_fp_abs_d")) + "falkor_vxvy") + +(define_insn_reservation "falkor_afp_1_vxvy_vxvy" 1 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_neg_s_q,neon_fp_neg_d_q,neon_fp_abs_s_q,neon_fp_abs_d_q")) + "falkor_vxvy+falkor_vxvy") + +(define_insn_reservation "falkor_afp_2_vxvy" 2 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_minmax_s,neon_fp_minmax_d,neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d,neon_fp_compare_s,neon_fp_compare_d,neon_fp_round_s,neon_fp_round_d")) + "falkor_vxvy") + +(define_insn_reservation "falkor_afp_2_vxvy_vxvy" 2 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q,neon_fp_compare_s_q,neon_fp_compare_d_q,neon_fp_round_s_q,neon_fp_round_d_q")) + "falkor_vxvy+falkor_vxvy") + +(define_insn_reservation "falkor_afp_3_vxvy" 3 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_reduc_minmax_s_q,neon_fp_reduc_minmax_d_q,neon_fp_abd_s,neon_fp_abd_d,neon_fp_addsub_s,neon_fp_addsub_d,neon_fp_reduc_add_s,neon_fp_reduc_add_d")) + "falkor_vxvy") + +(define_insn_reservation "falkor_afp_3_vxvy_vxvy" 3 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_abd_s_q,neon_fp_abd_d_q,neon_fp_addsub_s_q,neon_fp_addsub_d_q,neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q")) + "falkor_vxvy+falkor_vxvy") + +(define_insn_reservation "falkor_afp_4_vxvy" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_to_int_s,neon_fp_to_int_d,neon_int_to_fp_s,neon_int_to_fp_d,neon_fp_cvt_widen_h,neon_fp_cvt_widen_s")) + "falkor_vxvy") + +(define_insn_reservation "falkor_afp_4_vxvy_vxvy" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_to_int_s_q,neon_fp_to_int_d_q,neon_int_to_fp_s_q,neon_int_to_fp_d_q")) + "falkor_vxvy+falkor_vxvy") + +(define_insn_reservation "falkor_afp_5_vxvy_mul" 5 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_mul_s,neon_fp_mul_s_scalar")) + "falkor_vxvy") + +(define_insn_reservation "falkor_afp_5_vxvy_mla" 5 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_mla_s,neon_fp_mla_s_scalar")) + "falkor_vxvy") + +(define_insn_reservation "falkor_afp_5_vxvy_vxvy_mul" 5 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_mul_s_q,neon_fp_mul_s_scalar_q")) + "falkor_vxvy") + +(define_insn_reservation "falkor_afp_5_vxvy_vxvy_mla" 5 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_mla_s_q,neon_fp_mla_s_scalar_q")) + "falkor_vxvy") + +(define_insn_reservation "falkor_afp_6_vxvy_mul" 6 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_mul_d")) + "falkor_vxvy") + +(define_insn_reservation "falkor_afp_6_vxvy_mla" 6 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_mla_d")) + "falkor_vxvy") + +(define_insn_reservation "falkor_afp_6_vxvy_vxvy_mul" 6 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_mul_d_q,neon_fp_mul_d_scalar_q")) + "falkor_vxvy+falkor_vxvy") + +(define_insn_reservation "falkor_afp_6_vxvy_vxvy_mla" 6 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_mla_d_q,neon_fp_mla_d_scalar_q")) + "falkor_vxvy+falkor_vxvy") + +(define_insn_reservation "falkor_afp_4_vxvy_vxvy_vxvy" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_cvt_narrow_s_q,neon_fp_cvt_narrow_d_q")) + "falkor_vxvy+falkor_vxvy,falkor_vxvy") + +(define_insn_reservation "falkor_afp_6_vx_vy" 6 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_div_s")) + "falkor_vx+falkor_vy") + +(define_insn_reservation "falkor_afp_11_vx_vy" 11 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_div_d")) + "falkor_vx+falkor_vy") + +(define_insn_reservation "falkor_afp_6_vx_vy_vx_vy" 6 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_div_s_q")) + "(falkor_vx+falkor_vy),(falkor_vx+falkor_vy)") + +(define_insn_reservation "falkor_afp_11_vx_vy_vx_vy" 11 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_div_d_q")) + "(falkor_vx+falkor_vy),(falkor_vx+falkor_vy)") + +(define_insn_reservation "falkor_afp_12_vx_vy" 12 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_sqrt_s")) + "falkor_vx+falkor_vy") + +(define_insn_reservation "falkor_afp_22_vx_vy" 22 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_sqrt_d")) + "falkor_vx+falkor_vy") + +(define_insn_reservation "falkor_afp_12_vx_vy_vx_vy" 12 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_sqrt_s_q")) + "(falkor_vx+falkor_vy),(falkor_vx+falkor_vy)") + +(define_insn_reservation "falkor_afp_22_vx_vy_vx_vy" 22 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_sqrt_d_q")) + "(falkor_vx+falkor_vy),(falkor_vx+falkor_vy)") + +;; SIMD Integer Instructions + +(define_insn_reservation "falkor_ai_1_vxvy" 1 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_add,neon_reduc_add,neon_logic,neon_neg,neon_sub")) + "falkor_vxvy") + +(define_insn_reservation "falkor_ai_1_vxvy_vxvy" 1 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_shift_imm_long,neon_add_q,neon_reduc_add_q,neon_logic_q,neon_neg_q,neon_sub_q")) + "falkor_vxvy+falkor_vxvy") + +(define_insn_reservation "falkor_ai_2_vxvy" 2 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_add_long,neon_sub_long,neon_add_halve,neon_sub_halve,neon_shift_imm,neon_shift_reg,neon_minmax,neon_abs,neon_compare,neon_compare_zero,neon_tst")) + "falkor_vxvy") + +(define_insn_reservation "falkor_ai_2_vxvy_vxvy" 2 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_add_halve_q,neon_sub_halve_q,neon_shift_imm_q,neon_shift_reg_q,neon_minmax_q,neon_abs_q,neon_compare_q,neon_compare_zero_q,neon_tst_q,neon_reduc_add_long")) + "falkor_vxvy+falkor_vxvy") + +(define_insn_reservation "falkor_ai_3_vxvy" 3 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_shift_acc,neon_reduc_add_acc,neon_abd,neon_qadd,neon_qsub,neon_qabs,neon_qneg,neon_sat_shift_imm,neon_sat_shift_imm_narrow_q,neon_sat_shift_reg,neon_reduc_minmax")) + "falkor_vxvy") + +(define_insn_reservation "falkor_ai_4_vxvy" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_reduc_minmax_q")) + "falkor_vxvy") + +(define_insn_reservation "falkor_ai_3_vxvy_vxvy" 3 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_shift_acc_q,neon_reduc_add_acc_q,neon_abd_q,neon_abd_long,neon_qadd_q,neon_qsub_q,neon_qabs_q,neon_qneg_q,neon_sat_shift_imm_q,neon_sat_shift_reg_q")) + "falkor_vxvy+falkor_vxvy") + +(define_insn_reservation "falkor_ai_4_vxvy_mul" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_mul_b,neon_mul_h,neon_mul_s,neon_mul_h_scalar,neon_mul_s_scalar,neon_sat_mul_b,neon_sat_mul_h,neon_sat_mul_s,neon_sat_mul_h_scalar,neon_sat_mul_s_scalar")) + "falkor_vxvy") + +(define_insn_reservation "falkor_ai_4_vxvy_mla" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_mla_b,neon_mla_h,neon_mla_s,neon_mla_h_scalar,neon_mla_s_scalar")) + "falkor_vxvy") + +(define_insn_reservation "falkor_ai_4_vxvy_vxvy_mul" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_mul_b_q,neon_mul_h_q,neon_mul_s_q,neon_mul_h_scalar_q,neon_mul_s_scalar_q,neon_sat_mul_b_q,neon_sat_mul_h_q,neon_sat_mul_s_q,neon_mul_b_long,neon_mul_h_long,neon_mul_s_long,neon_mul_d_long,neon_mul_h_scalar_long,neon_mul_s_scalar_long,neon_sat_mul_b_long,neon_sat_mul_h_long,neon_sat_mul_s_long,neon_sat_mul_h_scalar_q,neon_sat_mul_s_scalar_q,neon_sat_mul_h_scalar_long,neon_sat_mul_s_scalar_long")) + "falkor_vxvy+falkor_vxvy") + +(define_insn_reservation "falkor_ai_4_vxvy_vxvy_mla" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_mla_b_q,neon_mla_h_q,neon_mla_s_q,neon_mla_h_scalar_q,neon_mla_s_scalar_q,neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,neon_mla_h_scalar_long,neon_mla_s_scalar_long,neon_sat_mla_b_long,neon_sat_mla_h_long,neon_sat_mla_s_long,neon_sat_mla_h_scalar_long,neon_sat_mla_s_scalar_long")) + "falkor_vxvy+falkor_vxvy") + +(define_insn_reservation "falkor_ai_4_vxvy_vxvy" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_add_halve_narrow_q,neon_sub_halve_narrow_q,neon_arith_acc")) + "falkor_vxvy+falkor_vxvy") + +(define_insn_reservation "falkor_2_ai_vxvy_vxvy_vxvy_vxvy" 2 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_add_widen,neon_sub_widen")) + "(falkor_vxvy+falkor_vxvy),(falkor_vxvy+falkor_vxvy)") + +(define_insn_reservation "falkor_4_ai_vxvy_vxvy_vxvy_vxvy" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_arith_acc_q")) + "(falkor_vxvy+falkor_vxvy),(falkor_vxvy+falkor_vxvy)") + +;; SIMD Load Instructions + +(define_insn_reservation "falkor_ald_4_ld" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,neon_load1_all_lanes,neon_load2_one_lane")) + "falkor_ld") + +(define_insn_reservation "falkor_ald_4_ld_none" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_load1_2reg,neon_load2_2reg,neon_load2_all_lanes")) + "falkor_ld") + +(define_insn_reservation "falkor_ald_4_ld_ld" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_load1_2reg_q,neon_load2_2reg_q,neon_load2_all_lanes_q,neon_load3_one_lane,neon_load4_one_lane,neon_ldp,neon_ldp_q")) + "falkor_ld,falkor_ld") + +(define_insn_reservation "falkor_ald_4_ld_ld_none" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_load1_3reg,neon_load3_3reg,neon_load3_all_lanes")) + "falkor_ld,falkor_ld") + +(define_insn_reservation "falkor_ald_4_ld_ld_ld" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_load1_3reg_q,neon_load3_3reg_q,neon_load3_all_lanes_q")) + "falkor_ld,falkor_ld,falkor_ld") + +(define_insn_reservation "falkor_ald_4_ld_ld_none_none" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_load1_4reg,neon_load4_4reg")) + "falkor_ld,falkor_ld") + +(define_insn_reservation "falkor_ald_4_ld_ld_ld_ld" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_load1_4reg_q,neon_load4_4reg_q,neon_load4_all_lanes,neon_load4_all_lanes_q")) + "falkor_ld,falkor_ld,falkor_ld,falkor_ld") + +;; Arithmetic and Logical Instructions + +(define_insn_reservation "falkor_alu_1_xyz" 1 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "alus_sreg,alus_imm,alus_shift_imm,csel,adc_reg,alu_imm,alu_sreg,alu_shift_imm,alu_ext,alus_ext,logic_imm,logic_reg,logic_shift_imm,logics_imm,logics_reg,logics_shift_imm,mov_reg")) + "falkor_xyz") + +;; SIMD Miscellaneous Instructions + +;; No separate type for ins and dup. But this is correct for both. + +(define_insn_reservation "falkor_am_3_gtov" 3 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_from_gp")) + "falkor_gtov") + +;; No separate type for ins and dup. Assuming dup is more common. Ins is +;; gtov+vxvy and latency of 4. + +(define_insn_reservation "falkor_am_3_gtov_gtov" 3 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_from_gp_q")) + "falkor_gtov,falkor_gtov") + +;; neon_to_gp_q is used for 32-bit ARM instructions that move 64-bits of data +;; so no use needed here. + +(define_insn_reservation "falkor_am_3_vtog" 3 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_to_gp")) + "falkor_vtog") + +(define_insn_reservation "falkor_am_1_vxvy" 1 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_bsl,neon_dup,neon_ext,neon_ins,neon_ins_q,neon_move,neon_rev,neon_tbl1,neon_permute,neon_shift_imm_narrow_q")) + "falkor_vxvy") + +(define_insn_reservation "falkor_am_1_vxvy_vxvy" 1 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_bsl_q,neon_dup_q,neon_ext_q,neon_move_q,neon_rev_q,neon_tbl1_q,neon_permute_q")) + "falkor_vxvy+falkor_vxvy") + +(define_insn_reservation "falkor_am_2_vxvy" 2 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_cls,neon_cnt,neon_rbit")) + "falkor_vxvy") + +(define_insn_reservation "falkor_am_4_vxvy_vxvy" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_cls_q,neon_cnt_q,neon_rbit_q,neon_tbl2")) + "falkor_vxvy+falkor_vxvy") + +(define_insn_reservation "falkor_am_3_vxvy" 3 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_recpe_s,neon_fp_recpe_d,neon_fp_rsqrte_s,neon_fp_rsqrte_d,neon_fp_recpx_s,neon_fp_recpx_d")) + "falkor_vxvy") + +(define_insn_reservation "falkor_am_3_vxvy_vxvy" 3 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_recpe_s_q,neon_fp_recpe_d_q,neon_fp_rsqrte_s_q,neon_fp_rsqrte_d_q")) + "falkor_vxvy+falkor_vxvy") + +(define_insn_reservation "falkor_am_5_vxvy" 5 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_recps_s")) + "falkor_vxvy") + +(define_insn_reservation "falkor_am_5_vxvy_vxvy" 5 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_recps_s_q")) + "falkor_vxvy+falkor_vxvy") + +(define_insn_reservation "falkor_am_6_vxvy" 6 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_recps_d,neon_fp_rsqrts_d")) + "falkor_vxvy") + +(define_insn_reservation "falkor_am_6_vxvy_vxvy" 6 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_fp_recps_d_q,neon_fp_rsqrts_d_q")) + "falkor_vxvy+falkor_vxvy") + +(define_insn_reservation "falkor_am_5_vxvy_vxvy_vxvy" 5 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_tbl2_q,neon_tbl3")) + "(falkor_vxvy+falkor_vxvy),falkor_vxvy") + +(define_insn_reservation "falkor_am_6_vxvy_vxvy_vxvy_vxvy" 6 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_tbl3_q,neon_tbl4")) + "(falkor_vxvy+falkor_vxvy),(falkor_vxvy+falkor_vxvy)") + +(define_insn_reservation "falkor_am_7_vxvy_vxvy_vxvy_vxvy_vxvy" 7 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_tbl4_q")) + "(falkor_vxvy+falkor_vxvy),(falkor_vxvy+falkor_vxvy),falkor_vxvy") + +;; SIMD Store Instructions + +;; ??? stp is neon_store1_2reg in aarch64.md, but neon_stp in aarch64-simd.md. +;; Similarly with ldp. + +(define_insn_reservation "falkor_ast_st_vsd" 0 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q,neon_store1_one_lane,neon_store1_one_lane_q,neon_store1_2reg,neon_store2_2reg,neon_store2_one_lane,neon_store2_one_lane_q,neon_stp")) + "falkor_st+falkor_vsd") + +(define_insn_reservation "falkor_as_0_st_vsd_st_vsd" 0 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_store1_2reg_q,neon_store1_3reg,neon_store1_4reg,neon_store2_2reg_q,neon_store3_3reg,neon_store4_4reg,neon_store3_one_lane,neon_store3_one_lane_q,neon_store4_one_lane,neon_store4_one_lane_q,neon_stp_q")) + "(falkor_st+falkor_vsd),(falkor_st+falkor_vsd)") + +(define_insn_reservation "falkor_as_0_st_vsd_st_vsd_st_vsd" 0 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_store1_3reg_q,neon_store3_3reg_q")) + "(falkor_st+falkor_vsd),(falkor_st+falkor_vsd),(falkor_st+falkor_vsd)") + +(define_insn_reservation "falkor_as_0_st_vsd_st_vsd_st_vsd_st_vsd" 0 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "neon_store1_4reg_q,neon_store4_4reg_q")) + "(falkor_st+falkor_vsd),(falkor_st+falkor_vsd),(falkor_st+falkor_vsd),(falkor_st+falkor_vsd)") + +;; Branch Instructions + +(define_insn_reservation "falkor_branch_0_zb" 0 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "branch")) + "falkor_zb") + +(define_insn_reservation "falkor_call_0_xyzb" 0 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "call")) + "falkor_xyzb") + +;; Cryptography Extensions + +(define_insn_reservation "falkor_cry_1_vxvy" 1 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "crypto_sha1_fast")) + "falkor_vxvy") + +(define_insn_reservation "falkor_cry_2_vxvy" 2 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "crypto_aesmc")) + "falkor_vxvy") + +(define_insn_reservation "falkor_cry_2_vxvy_vxvy" 2 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "crypto_sha1_xor,crypto_sha256_fast,crypto_pmull")) + "falkor_vxvy+falkor_vxvy") + +(define_insn_reservation "falkor_cry_4_vy_vx" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "crypto_sha1_slow")) + "falkor_vy+falkor_vx") + +(define_insn_reservation "falkor_cry_6_vy_vx" 6 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "crypto_sha256_slow")) + "falkor_vy+falkor_vx") + +(define_insn_reservation "falkor_cry_3_vxvy_vxvy" 3 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "crypto_aese")) + "falkor_vxvy+falkor_vxvy") + +;; FP Load Instructions + +(define_insn_reservation "falkor_fld_4_ld" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "f_loads,f_loadd")) + "falkor_ld") + +;; No separate FP store section, these are found in the SIMD store section. + +(define_insn_reservation "falkor_fld_0_st_vsd" 0 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "f_stores,f_stored")) + "falkor_st+falkor_vsd") + +;; FP Data Processing Instructions + +(define_insn_reservation "falkor_fpdt_0_vxvy" 0 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd")) + "falkor_vxvy") + +(define_insn_reservation "falkor_fpdt_5_vtog" 5 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "f_cvtf2i")) + "falkor_vtog") + +(define_insn_reservation "falkor_fpdt_1_vxvy" 1 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "ffariths,ffarithd,fcsel")) + "falkor_vxvy") + +(define_insn_reservation "falkor_fpdt_2_vxvy" 2 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "f_minmaxd,f_minmaxs,f_rintd,f_rints")) + "falkor_vxvy") + +;; Scalar FP ABD is handled same as vector FP ABD. + +(define_insn_reservation "falkor_fpdt_3_vxvy" 3 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "faddd,fadds")) + "falkor_vxvy") + +(define_insn_reservation "falkor_fpdt_4_vxvy" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "f_cvt")) + "falkor_vxvy") + +(define_insn_reservation "falkor_fpdt_5_vxvy_mul" 5 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "fmuls")) + "falkor_vxvy") + +(define_insn_reservation "falkor_fpdt_5_vxvy_mla" 5 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "fmacs,ffmas")) + "falkor_vxvy") + +(define_insn_reservation "falkor_fpdt_6_vxvy_mul" 6 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "fmuld")) + "falkor_vxvy") + +(define_insn_reservation "falkor_fpdt_6_vxvy_mla" 6 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "fmacd,ffmad")) + "falkor_vxvy") + +(define_insn_reservation "falkor_fpdt_6_vx_vy" 6 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "fdivs")) + "falkor_vx+falkor_vy") + +(define_insn_reservation "falkor_fpdt_11_vx_vy" 11 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "fdivd")) + "falkor_vx+falkor_vy") + +(define_insn_reservation "falkor_fpdt_12_vx_vy" 12 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "fsqrts")) + "falkor_vxvy") + +(define_insn_reservation "falkor_fpdt_22_vx_vy" 22 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "fsqrtd")) + "falkor_vxvy") + +;; FP Miscellaneous Instructions + +(define_insn_reservation "falkor_fpmsc_3_vtog" 3 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "f_mrc")) + "falkor_vtog") + +(define_insn_reservation "falkor_fpmsc_3_gtov" 3 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "f_mcr")) + "falkor_gtov") + +(define_insn_reservation "falkor_fpmsc_1_vxvy" 1 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "fmov,fconsts,fconstd")) + "falkor_vxvy") + +;; No separate type for float-to-fixed conversions. Same type as +;; float-to-int conversions. They schedule the same though, so no problem. + +(define_insn_reservation "falkor_fpmsc_6_gtov" 6 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "f_cvti2f")) + "falkor_gtov") + +;; Load Instructions + +(define_insn_reservation "falkor_ld_3_ld" 3 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "load1,load2")) + "falkor_ld") + +;; Miscellaneous Data-Processing Instructions + +(define_insn_reservation "falkor_misc_1_xyz" 1 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "bfx,bfm,extend,rotate_imm,shift_imm")) + "falkor_xyz") + +(define_insn_reservation "falkor_misc_2_x" 2 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "crc")) + "falkor_x") + +(define_insn_reservation "falkor_misc_2_xyz" 2 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "clz,rbit,rev")) + "falkor_xyz") + +;; Divide and Multiply Instructions + +(define_insn_reservation "falkor_muldiv_4_x_mul" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "mul")) + "falkor_x") + +(define_insn_reservation "falkor_muldiv_4_x_mla" 4 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "mla,smlal,umlal")) + "falkor_x") + +(define_insn_reservation "falkor_muldiv_5_x_mul" 5 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "smull,umull")) + "falkor_x") + +(define_insn_reservation "falkor_md_11_x_z" 11 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "sdiv,udiv")) + "falkor_x+falkor_z") + +;; Move and Shift Instructions + +(define_insn_reservation "falkor_mvs_1_xyz" 1 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "mov_imm,shift_reg")) + "falkor_xyz") + +(define_insn_reservation "falkor_mvs_1_xyzb" 1 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "adr")) + "falkor_xyzb") + +;; Other Instructions + +;; Block is for instruction scheduling blockage insns in RTL. There are no +;; hardware instructions emitted for them, so don't use any resources. + +(define_insn_reservation "falkor_other_0_nothing" 0 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "no_insn,trap,block")) + "nothing") + +(define_insn_reservation "falkor_other_2_z" 2 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "mrs")) + "falkor_z") + +;; Assume multiple instructions use all pipes. + +(define_insn_reservation "falkor_extra" 1 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "multiple")) + "falkor_x+falkor_y+falkor_z+falkor_b+falkor_vx+falkor_vy+falkor_ld+falkor_st+falkor_sd+falkor_vsd+falkor_gtov+falkor_vtog") + +;; Store Instructions + +;; No use of store_rel, store3, or store4 in aarch64. + +(define_insn_reservation "falkor_st_0_st_sd" 0 + (and (eq_attr "tune" "falkor") + (eq_attr "type" "store1,store2")) + "falkor_st+falkor_sd") + +;; Muliply bypasses. + +;; 1 cycle latency (0 bubble) for an integer mul or mac feeding into a mac. + +(define_bypass 1 + "falkor_ai_4_vxvy_mul,falkor_ai_4_vxvy_mla,falkor_ai_4_vxvy_vxvy_mul,falkor_ai_4_vxvy_vxvy_mla,falkor_muldiv_4_x_mul,falkor_muldiv_4_x_mla,falkor_muldiv_5_x_mul" + "falkor_ai_4_vxvy_mla,falkor_ai_4_vxvy_vxvy_mla,falkor_muldiv_4_x_mla") + +;; 3 cycle latency (2 bubbles) for an FP mul or mac feeding into a mac. + +(define_bypass 3 + "falkor_afp_5_vxvy_mul,falkor_afp_5_vxvy_mla,falkor_afp_5_vxvy_vxvy_mul,falkor_afp_5_vxvy_vxvy_mla,falkor_afp_6_vxvy_mul,falkor_afp_6_vxvy_mla,falkor_afp_6_vxvy_vxvy_mul,falkor_afp_6_vxvy_vxvy_mla,falkor_fpdt_5_vxvy_mul,falkor_fpdt_5_vxvy_mla,falkor_fpdt_6_vxvy_mul,falkor_fpdt_6_vxvy_mla" + "falkor_afp_5_vxvy_mla,falkor_afp_5_vxvy_vxvy_mla,falkor_afp_6_vxvy_mla,falkor_afp_6_vxvy_vxvy_mla,falkor_fpdt_5_vxvy_mla,falkor_fpdt_6_vxvy_mla") diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 43be7fd3611..cceb57525c7 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -44,6 +44,9 @@ ;; Iterator for all scalar floating point modes (HF, SF, DF) (define_mode_iterator GPF_F16 [(HF "AARCH64_ISA_F16") SF DF]) +;; Iterator for all scalar floating point modes (HF, SF, DF) +(define_mode_iterator GPF_HF [HF SF DF]) + ;; Iterator for all scalar floating point modes (HF, SF, DF and TF) (define_mode_iterator GPF_TF_F16 [HF SF DF TF]) @@ -520,6 +523,17 @@ (SI "SI") (HI "HI") (QI "QI")]) +;; Define element mode for each vector mode (lower case). +(define_mode_attr Vel [(V8QI "qi") (V16QI "qi") + (V4HI "hi") (V8HI "hi") + (V2SI "si") (V4SI "si") + (DI "di") (V2DI "di") + (V4HF "hf") (V8HF "hf") + (V2SF "sf") (V4SF "sf") + (V2DF "df") (DF "df") + (SI "si") (HI "hi") + (QI "qi")]) + ;; 64-bit container modes the inner or scalar source mode. (define_mode_attr VCOND [(HI "V4HI") (SI "V2SI") (V4HI "V4HI") (V8HI "V4HI") diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index ad8a43c2b2c..11243c4ce00 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -114,6 +114,10 @@ (ior (match_operand 0 "register_operand") (match_operand 0 "aarch64_logical_immediate"))) +(define_predicate "aarch64_mov_imm_operand" + (and (match_code "const_int") + (match_test "aarch64_move_imm (INTVAL (op), mode)"))) + (define_predicate "aarch64_logical_and_immediate" (and (match_code "const_int") (match_test "aarch64_and_bitmask_imm (INTVAL (op), mode)"))) diff --git a/gcc/config/aarch64/rtems.h b/gcc/config/aarch64/rtems.h index b48e28afda0..07c5679d5c1 100644 --- a/gcc/config/aarch64/rtems.h +++ b/gcc/config/aarch64/rtems.h @@ -1,20 +1,25 @@ /* Definitions for RTEMS based AARCH64 system. Copyright (C) 2016-2017 Free Software Foundation, Inc. - + This file is part of GCC. - + GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. - + GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ #define HAS_INIT_SECTION |