aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/aarch64
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/aarch64')
-rw-r--r--gcc/config/aarch64/aarch64-cores.def4
-rw-r--r--gcc/config/aarch64/aarch64-option-extensions.def4
-rw-r--r--gcc/config/aarch64/aarch64-protos.h4
-rw-r--r--gcc/config/aarch64/aarch64-simd.md61
-rw-r--r--gcc/config/aarch64/aarch64.c316
-rw-r--r--gcc/config/aarch64/aarch64.h8
-rw-r--r--gcc/config/aarch64/aarch64.md185
-rw-r--r--gcc/config/aarch64/arm_neon.h2
-rw-r--r--gcc/config/aarch64/constraints.md18
-rw-r--r--gcc/config/aarch64/cortex-a57-fma-steering.c15
-rw-r--r--gcc/config/aarch64/falkor.md681
-rw-r--r--gcc/config/aarch64/iterators.md14
-rw-r--r--gcc/config/aarch64/predicates.md4
-rw-r--r--gcc/config/aarch64/rtems.h17
14 files changed, 1166 insertions, 167 deletions
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index f8342ca722d..10893324d3f 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -65,8 +65,8 @@ AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH
AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
/* Qualcomm ('Q') cores. */
-AARCH64_CORE("falkor", falkor, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x51, 0xC00, -1)
-AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x51, 0xC00, -1)
+AARCH64_CORE("falkor", falkor, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
+AARCH64_CORE("qdf24xx", qdf24xx, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
/* Samsung ('S') cores. */
AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1)
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index c0752ce3470..c4f059ab7c5 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -63,4 +63,8 @@ AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, 0, "fphp asimdhp")
/* Enabling or disabling "rcpc" only changes "rcpc". */
AARCH64_OPT_EXTENSION("rcpc", AARCH64_FL_RCPC, 0, 0, "lrcpc")
+/* Enabling "rdma" also enables "fp", "simd".
+ Disabling "rdma" just disables "rdma". */
+AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, AARCH64_FL_FP | AARCH64_FL_SIMD, 0, "rdma")
+
#undef AARCH64_OPT_EXTENSION
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index e397ff4afa7..beff28e2272 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -319,6 +319,7 @@ unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
int aarch64_branch_cost (bool, bool);
enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx);
+bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode);
bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
bool aarch64_constant_address_p (rtx);
bool aarch64_emit_approx_div (rtx, rtx, rtx);
@@ -326,6 +327,7 @@ bool aarch64_emit_approx_sqrt (rtx, rtx, bool);
void aarch64_expand_call (rtx, rtx, bool);
bool aarch64_expand_movmem (rtx *);
bool aarch64_float_const_zero_rtx_p (rtx);
+bool aarch64_float_const_rtx_p (rtx);
bool aarch64_function_arg_regno_p (unsigned);
bool aarch64_fusion_enabled_p (enum aarch64_fusion_pairs);
bool aarch64_gen_movmemqi (rtx *);
@@ -351,9 +353,9 @@ bool aarch64_pad_arg_upward (machine_mode, const_tree);
bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
bool aarch64_regno_ok_for_base_p (int, bool);
bool aarch64_regno_ok_for_index_p (int, bool);
+bool aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *fail);
bool aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
bool high);
-bool aarch64_simd_imm_scalar_p (rtx x, machine_mode mode);
bool aarch64_simd_imm_zero_p (rtx, machine_mode);
bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode);
bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 1cb6eeb3187..f74b68775cf 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -351,6 +351,35 @@
}
)
+(define_expand "xorsign<mode>3"
+ [(match_operand:VHSDF 0 "register_operand")
+ (match_operand:VHSDF 1 "register_operand")
+ (match_operand:VHSDF 2 "register_operand")]
+ "TARGET_SIMD"
+{
+
+ machine_mode imode = <V_cmp_result>mode;
+ rtx v_bitmask = gen_reg_rtx (imode);
+ rtx op1x = gen_reg_rtx (imode);
+ rtx op2x = gen_reg_rtx (imode);
+
+ rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
+ rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
+
+ int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+
+ emit_move_insn (v_bitmask,
+ aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode,
+ HOST_WIDE_INT_M1U << bits));
+
+ emit_insn (gen_and<v_cmp_result>3 (op2x, v_bitmask, arg2));
+ emit_insn (gen_xor<v_cmp_result>3 (op1x, arg1, op2x));
+ emit_move_insn (operands[0],
+ lowpart_subreg (<MODE>mode, op1x, imode));
+ DONE;
+}
+)
+
(define_expand "copysign<mode>3"
[(match_operand:VHSDF 0 "register_operand")
(match_operand:VHSDF 1 "register_operand")
@@ -1033,6 +1062,18 @@
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
+(define_insn "*aarch64_mla_elt_merge<mode>"
+ [(set (match_operand:VDQHS 0 "register_operand" "=w")
+ (plus:VDQHS
+ (mult:VDQHS (vec_duplicate:VDQHS
+ (match_operand:<VEL> 1 "register_operand" "w"))
+ (match_operand:VDQHS 2 "register_operand" "w"))
+ (match_operand:VDQHS 3 "register_operand" "0")))]
+ "TARGET_SIMD"
+ "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
+ [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
+)
+
(define_insn "aarch64_mls<mode>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
(minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
@@ -1080,6 +1121,18 @@
[(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
)
+(define_insn "*aarch64_mls_elt_merge<mode>"
+ [(set (match_operand:VDQHS 0 "register_operand" "=w")
+ (minus:VDQHS
+ (match_operand:VDQHS 1 "register_operand" "0")
+ (mult:VDQHS (vec_duplicate:VDQHS
+ (match_operand:<VEL> 2 "register_operand" "w"))
+ (match_operand:VDQHS 3 "register_operand" "w"))))]
+ "TARGET_SIMD"
+ "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
+ [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
+)
+
;; Max/Min operations.
(define_insn "<su><maxmin><mode>3"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
@@ -5593,9 +5646,9 @@
DONE;
})
-;; Standard pattern name vec_init<mode>.
+;; Standard pattern name vec_init<mode><Vel>.
-(define_expand "vec_init<mode>"
+(define_expand "vec_init<mode><Vel>"
[(match_operand:VALL_F16 0 "register_operand" "")
(match_operand 1 "" "")]
"TARGET_SIMD"
@@ -5650,9 +5703,9 @@
"urecpe\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
-;; Standard pattern name vec_extract<mode>.
+;; Standard pattern name vec_extract<mode><Vel>.
-(define_expand "vec_extract<mode>"
+(define_expand "vec_extract<mode><Vel>"
[(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
(match_operand:VALL_F16 1 "register_operand" "")
(match_operand:SI 2 "immediate_operand" "")]
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b8a4160d9de..28c4e0e6476 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -33,6 +33,7 @@
#include "df.h"
#include "tm_p.h"
#include "stringpool.h"
+#include "attribs.h"
#include "optabs.h"
#include "regs.h"
#include "emit-rtl.h"
@@ -147,6 +148,8 @@ static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
const_tree type,
int misalignment,
bool is_packed);
+static machine_mode
+aarch64_simd_container_mode (machine_mode mode, unsigned width);
/* Major revision number of the ARM Architecture implemented by the target. */
unsigned aarch64_architecture_version;
@@ -206,22 +209,6 @@ static const struct cpu_addrcost_table generic_addrcost_table =
0 /* imm_offset */
};
-static const struct cpu_addrcost_table cortexa57_addrcost_table =
-{
- {
- 1, /* hi */
- 0, /* si */
- 0, /* di */
- 1, /* ti */
- },
- 0, /* pre_modify */
- 0, /* post_modify */
- 0, /* register_offset */
- 0, /* register_sextend */
- 0, /* register_zextend */
- 0, /* imm_offset */
-};
-
static const struct cpu_addrcost_table exynosm1_addrcost_table =
{
{
@@ -254,22 +241,6 @@ static const struct cpu_addrcost_table xgene1_addrcost_table =
0, /* imm_offset */
};
-static const struct cpu_addrcost_table qdf24xx_addrcost_table =
-{
- {
- 1, /* hi */
- 0, /* si */
- 0, /* di */
- 1, /* ti */
- },
- 0, /* pre_modify */
- 0, /* post_modify */
- 0, /* register_offset */
- 0, /* register_sextend */
- 0, /* register_zextend */
- 0 /* imm_offset */
-};
-
static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
{
{
@@ -390,13 +361,13 @@ static const struct cpu_vector_cost thunderx_vector_cost =
3, /* scalar_load_cost */
1, /* scalar_store_cost */
4, /* vec_int_stmt_cost */
- 4, /* vec_fp_stmt_cost */
+ 1, /* vec_fp_stmt_cost */
4, /* vec_permute_cost */
2, /* vec_to_scalar_cost */
2, /* scalar_to_vec_cost */
3, /* vec_align_load_cost */
- 10, /* vec_unalign_load_cost */
- 10, /* vec_unalign_store_cost */
+ 5, /* vec_unalign_load_cost */
+ 5, /* vec_unalign_store_cost */
1, /* vec_store_cost */
3, /* cond_taken_branch_cost */
3 /* cond_not_taken_branch_cost */
@@ -488,20 +459,6 @@ static const struct cpu_branch_cost generic_branch_cost =
3 /* Unpredictable. */
};
-/* Branch costs for Cortex-A57. */
-static const struct cpu_branch_cost cortexa57_branch_cost =
-{
- 1, /* Predictable. */
- 3 /* Unpredictable. */
-};
-
-/* Branch costs for Vulcan. */
-static const struct cpu_branch_cost thunderx2t99_branch_cost =
-{
- 1, /* Predictable. */
- 3 /* Unpredictable. */
-};
-
/* Generic approximation modes. */
static const cpu_approx_modes generic_approx_modes =
{
@@ -612,7 +569,7 @@ static const struct tune_params cortexa35_tunings =
&generic_addrcost_table,
&cortexa53_regmove_cost,
&generic_vector_cost,
- &cortexa57_branch_cost,
+ &generic_branch_cost,
&generic_approx_modes,
4, /* memmov_cost */
1, /* issue_rate */
@@ -638,7 +595,7 @@ static const struct tune_params cortexa53_tunings =
&generic_addrcost_table,
&cortexa53_regmove_cost,
&generic_vector_cost,
- &cortexa57_branch_cost,
+ &generic_branch_cost,
&generic_approx_modes,
4, /* memmov_cost */
2, /* issue_rate */
@@ -661,10 +618,10 @@ static const struct tune_params cortexa53_tunings =
static const struct tune_params cortexa57_tunings =
{
&cortexa57_extra_costs,
- &cortexa57_addrcost_table,
+ &generic_addrcost_table,
&cortexa57_regmove_cost,
&cortexa57_vector_cost,
- &cortexa57_branch_cost,
+ &generic_branch_cost,
&generic_approx_modes,
4, /* memmov_cost */
3, /* issue_rate */
@@ -687,10 +644,10 @@ static const struct tune_params cortexa57_tunings =
static const struct tune_params cortexa72_tunings =
{
&cortexa57_extra_costs,
- &cortexa57_addrcost_table,
+ &generic_addrcost_table,
&cortexa57_regmove_cost,
&cortexa57_vector_cost,
- &cortexa57_branch_cost,
+ &generic_branch_cost,
&generic_approx_modes,
4, /* memmov_cost */
3, /* issue_rate */
@@ -713,10 +670,10 @@ static const struct tune_params cortexa72_tunings =
static const struct tune_params cortexa73_tunings =
{
&cortexa57_extra_costs,
- &cortexa57_addrcost_table,
+ &generic_addrcost_table,
&cortexa57_regmove_cost,
&cortexa57_vector_cost,
- &cortexa57_branch_cost,
+ &generic_branch_cost,
&generic_approx_modes,
4, /* memmov_cost. */
2, /* issue_rate. */
@@ -842,7 +799,7 @@ static const struct tune_params xgene1_tunings =
static const struct tune_params qdf24xx_tunings =
{
&qdf24xx_extra_costs,
- &qdf24xx_addrcost_table,
+ &generic_addrcost_table,
&qdf24xx_regmove_cost,
&generic_vector_cost,
&generic_branch_cost,
@@ -871,7 +828,7 @@ static const struct tune_params thunderx2t99_tunings =
&thunderx2t99_addrcost_table,
&thunderx2t99_regmove_cost,
&thunderx2t99_vector_cost,
- &thunderx2t99_branch_cost,
+ &generic_branch_cost,
&generic_approx_modes,
4, /* memmov_cost. */
4, /* issue_rate. */
@@ -1876,6 +1833,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
return 1;
}
+ /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
+ (with XXXX non-zero). In that case check to see if the move can be done in
+ a smaller mode. */
+ val2 = val & 0xffffffff;
+ if (mode == DImode
+ && aarch64_move_imm (val2, SImode)
+ && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
+ {
+ if (generate)
+ emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
+
+ /* Check if we have to emit a second instruction by checking to see
+ if any of the upper 32 bits of the original DI mode value is set. */
+ if (val == val2)
+ return 1;
+
+ i = (val >> 48) ? 48 : 32;
+
+ if (generate)
+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+ GEN_INT ((val >> i) & 0xffff)));
+
+ return 2;
+ }
+
if ((val >> 32) == 0 || mode == SImode)
{
if (generate)
@@ -3088,7 +3070,7 @@ aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
reg = gen_rtx_REG (mode, regno);
mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
plus_constant (Pmode, base_rtx, -adjustment));
- mem = gen_rtx_MEM (mode, mem);
+ mem = gen_frame_mem (mode, mem);
insn = emit_move_insn (mem, reg);
RTX_FRAME_RELATED_P (insn) = 1;
@@ -3176,7 +3158,7 @@ aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
{
rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
- emit_move_insn (reg1, gen_rtx_MEM (mode, mem));
+ emit_move_insn (reg1, gen_frame_mem (mode, mem));
}
else
{
@@ -3252,8 +3234,6 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
unsigned start, unsigned limit, bool skip_wb)
{
rtx_insn *insn;
- rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
- ? gen_frame_mem : gen_rtx_MEM);
unsigned regno;
unsigned regno2;
@@ -3274,8 +3254,8 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
reg = gen_rtx_REG (mode, regno);
offset = start_offset + cfun->machine->frame.reg_offset[regno];
- mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
- offset));
+ mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
+ offset));
regno2 = aarch64_next_callee_save (regno + 1, limit);
@@ -3289,8 +3269,8 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
rtx mem2;
offset = start_offset + cfun->machine->frame.reg_offset[regno2];
- mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
- offset));
+ mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
+ offset));
insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
reg2));
@@ -3319,8 +3299,6 @@ aarch64_restore_callee_saves (machine_mode mode,
unsigned limit, bool skip_wb, rtx *cfi_ops)
{
rtx base_rtx = stack_pointer_rtx;
- rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
- ? gen_frame_mem : gen_rtx_MEM);
unsigned regno;
unsigned regno2;
HOST_WIDE_INT offset;
@@ -3341,7 +3319,7 @@ aarch64_restore_callee_saves (machine_mode mode,
reg = gen_rtx_REG (mode, regno);
offset = start_offset + cfun->machine->frame.reg_offset[regno];
- mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
+ mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
regno2 = aarch64_next_callee_save (regno + 1, limit);
@@ -3354,7 +3332,7 @@ aarch64_restore_callee_saves (machine_mode mode,
rtx mem2;
offset = start_offset + cfun->machine->frame.reg_offset[regno2];
- mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
+ mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
*cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
@@ -4723,6 +4701,74 @@ aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
return true;
}
+/* Return the binary representation of floating point constant VALUE in INTVAL.
+ If the value cannot be converted, return false without setting INTVAL.
+ The conversion is done in the given MODE. */
+bool
+aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
+{
+
+ /* We make a general exception for 0. */
+ if (aarch64_float_const_zero_rtx_p (value))
+ {
+ *intval = 0;
+ return true;
+ }
+
+ machine_mode mode = GET_MODE (value);
+ if (GET_CODE (value) != CONST_DOUBLE
+ || !SCALAR_FLOAT_MODE_P (mode)
+ || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
+ /* Only support up to DF mode. */
+ || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode))
+ return false;
+
+ unsigned HOST_WIDE_INT ival = 0;
+
+ long res[2];
+ real_to_target (res,
+ CONST_DOUBLE_REAL_VALUE (value),
+ REAL_MODE_FORMAT (mode));
+
+ if (mode == DFmode)
+ {
+ int order = BYTES_BIG_ENDIAN ? 1 : 0;
+ ival = zext_hwi (res[order], 32);
+ ival |= (zext_hwi (res[1 - order], 32) << 32);
+ }
+ else
+ ival = zext_hwi (res[0], 32);
+
+ *intval = ival;
+ return true;
+}
+
+/* Return TRUE if rtx X is an immediate constant that can be moved using a
+ single MOV(+MOVK) followed by an FMOV. */
+bool
+aarch64_float_const_rtx_p (rtx x)
+{
+ machine_mode mode = GET_MODE (x);
+ if (mode == VOIDmode)
+ return false;
+
+ /* Determine whether it's cheaper to write float constants as
+ mov/movk pairs over ldr/adrp pairs. */
+ unsigned HOST_WIDE_INT ival;
+
+ if (GET_CODE (x) == CONST_DOUBLE
+ && SCALAR_FLOAT_MODE_P (mode)
+ && aarch64_reinterpret_float_as_int (x, &ival))
+ {
+ machine_mode imode = mode == HFmode ? SImode : int_mode_for_mode (mode);
+ int num_instr = aarch64_internal_mov_immediate
+ (NULL_RTX, gen_int_mode (ival, imode), false, imode);
+ return num_instr < 3;
+ }
+
+ return false;
+}
+
/* Return TRUE if rtx X is immediate constant 0.0 */
bool
aarch64_float_const_zero_rtx_p (rtx x)
@@ -4735,6 +4781,49 @@ aarch64_float_const_zero_rtx_p (rtx x)
return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
}
+/* Return TRUE if rtx X is immediate constant that fits in a single
+ MOVI immediate operation. */
+bool
+aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
+{
+ if (!TARGET_SIMD)
+ return false;
+
+ machine_mode vmode, imode;
+ unsigned HOST_WIDE_INT ival;
+
+ if (GET_CODE (x) == CONST_DOUBLE
+ && SCALAR_FLOAT_MODE_P (mode))
+ {
+ if (!aarch64_reinterpret_float_as_int (x, &ival))
+ return false;
+
+ /* We make a general exception for 0. */
+ if (aarch64_float_const_zero_rtx_p (x))
+ return true;
+
+ imode = int_mode_for_mode (mode);
+ }
+ else if (GET_CODE (x) == CONST_INT
+ && SCALAR_INT_MODE_P (mode))
+ {
+ imode = mode;
+ ival = INTVAL (x);
+ }
+ else
+ return false;
+
+ /* use a 64 bit mode for everything except for DI/DF mode, where we use
+ a 128 bit vector mode. */
+ int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
+
+ vmode = aarch64_simd_container_mode (imode, width);
+ rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
+
+ return aarch64_simd_valid_immediate (v_op, vmode, false, NULL);
+}
+
+
/* Return the fixed registers used for condition codes. */
static bool
@@ -5929,12 +6018,6 @@ aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
return NO_REGS;
}
- /* If it's an integer immediate that MOVI can't handle, then
- FP_REGS is not an option, so we return NO_REGS instead. */
- if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
- && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
- return NO_REGS;
-
/* Register eliminiation can result in a request for
SP+constant->FP_REGS. We cannot support such operations which
use SP as source and an FP_REG as destination, so reject out
@@ -6884,6 +6967,25 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
return true;
case CONST_DOUBLE:
+
+ /* First determine number of instructions to do the move
+ as an integer constant. */
+ if (!aarch64_float_const_representable_p (x)
+ && !aarch64_can_const_movi_rtx_p (x, mode)
+ && aarch64_float_const_rtx_p (x))
+ {
+ unsigned HOST_WIDE_INT ival;
+ bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
+ gcc_assert (succeed);
+
+ machine_mode imode = mode == HFmode ? SImode
+ : int_mode_for_mode (mode);
+ int ncost = aarch64_internal_mov_immediate
+ (NULL_RTX, gen_int_mode (ival, imode), false, imode);
+ *cost += COSTS_N_INSNS (ncost);
+ return true;
+ }
+
if (speed)
{
/* mov[df,sf]_aarch64. */
@@ -10193,7 +10295,7 @@ aarch64_classify_symbol (rtx x, rtx offset)
/* This is alright even in PIC code as the constant
pool reference is always PC relative and within
the same translation unit. */
- if (CONSTANT_POOL_ADDRESS_P (x))
+ if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x))
return SYMBOL_SMALL_ABSOLUTE;
else
return SYMBOL_FORCE_TO_MEM;
@@ -10228,18 +10330,16 @@ aarch64_legitimate_pic_operand_p (rtx x)
/* Return true if X holds either a quarter-precision or
floating-point +0.0 constant. */
static bool
-aarch64_valid_floating_const (machine_mode mode, rtx x)
+aarch64_valid_floating_const (rtx x)
{
if (!CONST_DOUBLE_P (x))
return false;
- if (aarch64_float_const_zero_rtx_p (x))
+ /* This call determines which constants can be used in mov<mode>
+ as integer moves instead of constant loads. */
+ if (aarch64_float_const_rtx_p (x))
return true;
- /* We only handle moving 0.0 to a TFmode register. */
- if (!(mode == SFmode || mode == DFmode))
- return false;
-
return aarch64_float_const_representable_p (x);
}
@@ -10251,11 +10351,15 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)
if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
return false;
- /* This could probably go away because
- we now decompose CONST_INTs according to expand_mov_immediate. */
+ /* For these cases we never want to use a literal load.
+ As such we have to prevent the compiler from forcing these
+ to memory. */
if ((GET_CODE (x) == CONST_VECTOR
&& aarch64_simd_valid_immediate (x, mode, false, NULL))
- || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
+ || CONST_INT_P (x)
+ || aarch64_valid_floating_const (x)
+ || aarch64_can_const_movi_rtx_p (x, mode)
+ || aarch64_float_const_rtx_p (x))
return !targetm.cannot_force_const_mem (mode, x);
if (GET_CODE (x) == HIGH
@@ -11538,23 +11642,6 @@ aarch64_mask_from_zextract_ops (rtx width, rtx pos)
}
bool
-aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
-{
- HOST_WIDE_INT imm = INTVAL (x);
- int i;
-
- for (i = 0; i < 8; i++)
- {
- unsigned int byte = imm & 0xff;
- if (byte != 0xff && byte != 0)
- return false;
- imm >>= 8;
- }
-
- return true;
-}
-
-bool
aarch64_mov_operand_p (rtx x, machine_mode mode)
{
if (GET_CODE (x) == HIGH
@@ -12945,15 +13032,28 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
}
char*
-aarch64_output_scalar_simd_mov_immediate (rtx immediate,
- machine_mode mode)
+aarch64_output_scalar_simd_mov_immediate (rtx immediate, machine_mode mode)
{
+
+ /* If a floating point number was passed and we desire to use it in an
+ integer mode do the conversion to integer. */
+ if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT)
+ {
+ unsigned HOST_WIDE_INT ival;
+ if (!aarch64_reinterpret_float_as_int (immediate, &ival))
+ gcc_unreachable ();
+ immediate = gen_int_mode (ival, mode);
+ }
+
machine_mode vmode;
+ /* use a 64 bit mode for everything except for DI/DF mode, where we use
+ a 128 bit vector mode. */
+ int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
gcc_assert (!VECTOR_MODE_P (mode));
- vmode = aarch64_simd_container_mode (mode, 64);
+ vmode = aarch64_simd_container_mode (mode, width);
rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
- return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
+ return aarch64_output_simd_mov_immediate (v_op, vmode, width);
}
/* Split operands into moves from op[1] + op[2] into op[0]. */
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 106cf3a5666..7f91edb5713 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -144,7 +144,8 @@ extern unsigned aarch64_architecture_version;
#define AARCH64_FL_CRC (1 << 3) /* Has CRC. */
/* ARMv8.1-A architecture extensions. */
#define AARCH64_FL_LSE (1 << 4) /* Has Large System Extensions. */
-#define AARCH64_FL_V8_1 (1 << 5) /* Has ARMv8.1-A extensions. */
+#define AARCH64_FL_RDMA (1 << 5) /* Has Round Double Multiply Add. */
+#define AARCH64_FL_V8_1 (1 << 6) /* Has ARMv8.1-A extensions. */
/* ARMv8.2-A architecture extensions. */
#define AARCH64_FL_V8_2 (1 << 8) /* Has ARMv8.2-A features. */
#define AARCH64_FL_F16 (1 << 9) /* Has ARMv8.2-A FP16 extensions. */
@@ -161,7 +162,8 @@ extern unsigned aarch64_architecture_version;
/* Architecture flags that effect instruction selection. */
#define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD)
#define AARCH64_FL_FOR_ARCH8_1 \
- (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC | AARCH64_FL_V8_1)
+ (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC \
+ | AARCH64_FL_RDMA | AARCH64_FL_V8_1)
#define AARCH64_FL_FOR_ARCH8_2 \
(AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2)
#define AARCH64_FL_FOR_ARCH8_3 \
@@ -174,7 +176,7 @@ extern unsigned aarch64_architecture_version;
#define AARCH64_ISA_FP (aarch64_isa_flags & AARCH64_FL_FP)
#define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD)
#define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE)
-#define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_V8_1)
+#define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_RDMA)
#define AARCH64_ISA_V8_2 (aarch64_isa_flags & AARCH64_FL_V8_2)
#define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16)
#define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index f876a2b7208..64b60a903ed 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -181,6 +181,11 @@
;; will be disabled when !TARGET_FLOAT.
(define_attr "fp" "no,yes" (const_string "no"))
+;; Attribute that specifies whether or not the instruction touches half
+;; precision fp registers. When this is set to yes for an alternative,
+;; that alternative will be disabled when !TARGET_FP_F16INST.
+(define_attr "fp16" "no,yes" (const_string "no"))
+
;; Attribute that specifies whether or not the instruction touches simd
;; registers. When this is set to yes for an alternative, that alternative
;; will be disabled when !TARGET_SIMD.
@@ -194,11 +199,14 @@
;; registers when -mgeneral-regs-only is specified.
(define_attr "enabled" "no,yes"
(cond [(ior
- (and (eq_attr "fp" "yes")
- (eq (symbol_ref "TARGET_FLOAT") (const_int 0)))
- (and (eq_attr "simd" "yes")
- (eq (symbol_ref "TARGET_SIMD") (const_int 0))))
- (const_string "no")
+ (ior
+ (and (eq_attr "fp" "yes")
+ (eq (symbol_ref "TARGET_FLOAT") (const_int 0)))
+ (and (eq_attr "simd" "yes")
+ (eq (symbol_ref "TARGET_SIMD") (const_int 0))))
+ (and (eq_attr "fp16" "yes")
+ (eq (symbol_ref "TARGET_FP_F16INST") (const_int 0))))
+ (const_string "no")
] (const_string "yes")))
;; Attribute that specifies whether we are dealing with a branch to a
@@ -223,6 +231,7 @@
(include "../arm/cortex-a53.md")
(include "../arm/cortex-a57.md")
(include "../arm/exynos-m1.md")
+(include "falkor.md")
(include "thunderx.md")
(include "../arm/xgene1.md")
(include "thunderx2t99.md")
@@ -920,8 +929,8 @@
)
(define_insn_and_split "*movsi_aarch64"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w,r,*w")
- (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w, r,*w,w")
+ (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Ds"))]
"(register_operand (operands[0], SImode)
|| aarch64_reg_or_zero (operands[1], SImode))"
"@
@@ -938,8 +947,9 @@
adrp\\t%x0, %A1
fmov\\t%s0, %w1
fmov\\t%w0, %s1
- fmov\\t%s0, %s1"
- "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
+ fmov\\t%s0, %s1
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
+ "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
[(const_int 0)]
"{
@@ -947,13 +957,14 @@
DONE;
}"
[(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
- adr,adr,f_mcr,f_mrc,fmov")
- (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
+ adr,adr,f_mcr,f_mrc,fmov,neon_move")
+ (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
+ (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
)
(define_insn_and_split "*movdi_aarch64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r, *w,r,*w,w")
- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Dd"))]
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r,*w,m, m,r,r, *w,r,*w,w")
+ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,m, m,rZ,*w,Usa,Ush,rZ,w,*w,Dd"))]
"(register_operand (operands[0], DImode)
|| aarch64_reg_or_zero (operands[1], DImode))"
"@
@@ -961,6 +972,7 @@
mov\\t%0, %x1
mov\\t%x0, %1
mov\\t%x0, %1
+ mov\\t%w0, %1
#
ldr\\t%x0, %1
ldr\\t%d0, %1
@@ -971,7 +983,7 @@
fmov\\t%d0, %x1
fmov\\t%x0, %d1
fmov\\t%d0, %d1
- movi\\t%d0, %1"
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);"
"(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode))
&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
[(const_int 0)]
@@ -979,10 +991,10 @@
aarch64_expand_mov_immediate (operands[0], operands[1]);
DONE;
}"
- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
- adr,adr,f_mcr,f_mrc,fmov,neon_move")
- (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
- (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
+ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load1,\
+ load1,store1,store1,adr,adr,f_mcr,f_mrc,fmov,neon_move")
+ (set_attr "fp" "*,*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
+ (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
)
(define_insn "insv_imm<mode>"
@@ -1062,28 +1074,31 @@
)
(define_insn "*movhf_aarch64"
- [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w,m,r,m ,r")
- (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,m,w,m,rY,r"))]
+ [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r")
+ (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))]
"TARGET_FLOAT && (register_operand (operands[0], HFmode)
|| aarch64_reg_or_fp_zero (operands[1], HFmode))"
"@
movi\\t%0.4h, #0
- mov\\t%0.h[0], %w1
+ fmov\\t%h0, %w1
umov\\t%w0, %1.h[0]
mov\\t%0.h[0], %1.h[0]
+ fmov\\t%h0, %1
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
ldr\\t%h0, %1
str\\t%h1, %0
ldrh\\t%w0, %1
strh\\t%w1, %0
mov\\t%w0, %w1"
- [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\
- f_loads,f_stores,load1,store1,mov_reg")
- (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")]
+ [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \
+ neon_move,f_loads,f_stores,load1,store1,mov_reg")
+ (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")
+ (set_attr "fp16" "*,yes,*,*,yes,*,*,*,*,*,*")]
)
(define_insn "*movsf_aarch64"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r")
- (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r")
+ (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
"TARGET_FLOAT && (register_operand (operands[0], SFmode)
|| aarch64_reg_or_fp_zero (operands[1], SFmode))"
"@
@@ -1092,19 +1107,22 @@
fmov\\t%w0, %s1
fmov\\t%s0, %s1
fmov\\t%s0, %1
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
ldr\\t%s0, %1
str\\t%s1, %0
ldr\\t%w0, %1
str\\t%w1, %0
- mov\\t%w0, %w1"
- [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\
- f_loads,f_stores,load1,store1,mov_reg")
- (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
+ mov\\t%w0, %w1
+ mov\\t%w0, %1"
+ [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\
+ f_loads,f_stores,load1,store1,mov_reg,\
+ fconsts")
+ (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
)
(define_insn "*movdf_aarch64"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r")
- (match_operand:DF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))]
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
+ (match_operand:DF 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
"TARGET_FLOAT && (register_operand (operands[0], DFmode)
|| aarch64_reg_or_fp_zero (operands[1], DFmode))"
"@
@@ -1113,14 +1131,37 @@
fmov\\t%x0, %d1
fmov\\t%d0, %d1
fmov\\t%d0, %1
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);
ldr\\t%d0, %1
str\\t%d1, %0
ldr\\t%x0, %1
str\\t%x1, %0
- mov\\t%x0, %x1"
- [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
- f_loadd,f_stored,load1,store1,mov_reg")
- (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
+ mov\\t%x0, %x1
+ mov\\t%x0, %1"
+ [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
+ f_loadd,f_stored,load1,store1,mov_reg,\
+ fconstd")
+ (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")]
+)
+
+(define_split
+ [(set (match_operand:GPF_HF 0 "nonimmediate_operand")
+ (match_operand:GPF_HF 1 "general_operand"))]
+ "can_create_pseudo_p ()
+ && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
+ && !aarch64_float_const_representable_p (operands[1])
+ && aarch64_float_const_rtx_p (operands[1])"
+ [(const_int 0)]
+ {
+ unsigned HOST_WIDE_INT ival;
+ if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
+ FAIL;
+
+ rtx tmp = gen_reg_rtx (<FCVT_TARGET>mode);
+ emit_move_insn (tmp, gen_int_mode (ival, <FCVT_TARGET>mode));
+ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
+ DONE;
+ }
)
(define_insn "*movtf_aarch64"
@@ -3835,6 +3876,22 @@
[(set_attr "type" "logics_reg,logics_imm")]
)
+(define_split
+ [(set (reg:CC_NZ CC_REGNUM)
+ (compare:CC_NZ
+ (and:GPI (match_operand:GPI 0 "register_operand")
+ (match_operand:GPI 1 "aarch64_mov_imm_operand"))
+ (const_int 0)))
+ (clobber (match_operand:SI 2 "register_operand"))]
+ ""
+ [(set (match_dup 2) (match_dup 1))
+ (set (reg:CC_NZ CC_REGNUM)
+ (compare:CC_NZ
+ (and:GPI (match_dup 0)
+ (match_dup 2))
+ (const_int 0)))]
+)
+
(define_insn "*and<mode>3nr_compare0_zextract"
[(set (reg:CC_NZ CC_REGNUM)
(compare:CC_NZ
@@ -3870,6 +3927,26 @@
[(set_attr "type" "logics_shift_imm")]
)
+(define_split
+ [(set (reg:CC_NZ CC_REGNUM)
+ (compare:CC_NZ
+ (and:GPI (SHIFT:GPI
+ (match_operand:GPI 0 "register_operand")
+ (match_operand:QI 1 "aarch64_shift_imm_<mode>"))
+ (match_operand:GPI 2 "aarch64_mov_imm_operand"))
+ (const_int 0)))
+ (clobber (match_operand:SI 3 "register_operand"))]
+ ""
+ [(set (match_dup 3) (match_dup 2))
+ (set (reg:CC_NZ CC_REGNUM)
+ (compare:CC_NZ
+ (and:GPI (SHIFT:GPI
+ (match_dup 0)
+ (match_dup 1))
+ (match_dup 3))
+ (const_int 0)))]
+)
+
;; -------------------------------------------------------------------
;; Shifts
;; -------------------------------------------------------------------
@@ -5102,6 +5179,42 @@
}
)
+;; For xorsign (x, y), we want to generate:
+;;
+;; LDR d2, #1<<63
+;; AND v3.8B, v1.8B, v2.8B
+;; EOR v0.8B, v0.8B, v3.8B
+;;
+
+(define_expand "xorsign<mode>3"
+ [(match_operand:GPF 0 "register_operand")
+ (match_operand:GPF 1 "register_operand")
+ (match_operand:GPF 2 "register_operand")]
+ "TARGET_FLOAT && TARGET_SIMD"
+{
+
+ machine_mode imode = <V_cmp_result>mode;
+ rtx mask = gen_reg_rtx (imode);
+ rtx op1x = gen_reg_rtx (imode);
+ rtx op2x = gen_reg_rtx (imode);
+
+ int bits = GET_MODE_BITSIZE (<MODE>mode) - 1;
+ emit_move_insn (mask, GEN_INT (trunc_int_for_mode (HOST_WIDE_INT_M1U << bits,
+ imode)));
+
+ emit_insn (gen_and<v_cmp_result>3 (op2x, mask,
+ lowpart_subreg (imode, operands[2],
+ <MODE>mode)));
+ emit_insn (gen_xor<v_cmp_result>3 (op1x,
+ lowpart_subreg (imode, operands[1],
+ <MODE>mode),
+ op2x));
+ emit_move_insn (operands[0],
+ lowpart_subreg (<MODE>mode, op1x, imode));
+ DONE;
+}
+)
+
;; -------------------------------------------------------------------
;; Reload support
;; -------------------------------------------------------------------
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 0753da32f59..d7b30b0e5ee 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -12162,7 +12162,7 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
/* ARMv8.1-A instrinsics. */
#pragma GCC push_options
-#pragma GCC target ("arch=armv8.1-a")
+#pragma GCC target ("+nothing+rdma")
__extension__ extern __inline int16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 88e840f2898..9ce3d4efaf3 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -176,6 +176,12 @@
(and (match_code "const_double")
(match_test "aarch64_float_const_representable_p (op)")))
+(define_constraint "Uvi"
+ "A floating point constant which can be used with a\
+ MOVI immediate operation."
+ (and (match_code "const_double")
+ (match_test "aarch64_can_const_movi_rtx_p (op, GET_MODE (op))")))
+
(define_constraint "Dn"
"@internal
A constraint that matches vector of immediates."
@@ -220,9 +226,17 @@
(define_constraint "Dd"
"@internal
- A constraint that matches an immediate operand valid for AdvSIMD scalar."
+ A constraint that matches an integer immediate operand valid\
+ for AdvSIMD scalar operations in DImode."
+ (and (match_code "const_int")
+ (match_test "aarch64_can_const_movi_rtx_p (op, DImode)")))
+
+(define_constraint "Ds"
+ "@internal
+ A constraint that matches an integer immediate operand valid\
+ for AdvSIMD scalar operations in SImode."
(and (match_code "const_int")
- (match_test "aarch64_simd_imm_scalar_p (op, GET_MODE (op))")))
+ (match_test "aarch64_can_const_movi_rtx_p (op, SImode)")))
(define_address_constraint "Dp"
"@internal
diff --git a/gcc/config/aarch64/cortex-a57-fma-steering.c b/gcc/config/aarch64/cortex-a57-fma-steering.c
index 6d90acdd4a2..fa8c56aab02 100644
--- a/gcc/config/aarch64/cortex-a57-fma-steering.c
+++ b/gcc/config/aarch64/cortex-a57-fma-steering.c
@@ -973,10 +973,17 @@ func_fma_steering::analyze ()
break;
}
- /* We didn't find a chain with a def for this instruction. */
- gcc_assert (i < dest_op_info->n_chains);
-
- this->analyze_fma_fmul_insn (forest, chain, head);
+ /* Due to implementation of regrename, dest register can slip away
+ from regrename's analysis. As a result, there is no chain for
+ the destination register of insn. We simply skip the insn even
+ it is a fmul/fmac instruction. This can happen when the dest
+ register is also a source register of insn and one of the below
+ conditions is satisfied:
+ 1) the source reg is setup in larger mode than this insn;
+ 2) the source reg is uninitialized;
+ 3) the source reg is passed in as parameter. */
+ if (i < dest_op_info->n_chains)
+ this->analyze_fma_fmul_insn (forest, chain, head);
}
}
free (bb_dfs_preorder);
diff --git a/gcc/config/aarch64/falkor.md b/gcc/config/aarch64/falkor.md
new file mode 100644
index 00000000000..b422ab30c44
--- /dev/null
+++ b/gcc/config/aarch64/falkor.md
@@ -0,0 +1,681 @@
+;; Falkor pipeline description
+;; Copyright (C) 2017 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "falkor")
+
+;; Complex int instructions (e.g. multiply and divide) execute in the X
+;; pipeline. Simple int instructions execute in the X, Y, and Z pipelines.
+
+(define_cpu_unit "falkor_x" "falkor")
+(define_cpu_unit "falkor_y" "falkor")
+(define_cpu_unit "falkor_z" "falkor")
+
+;; Branches execute in the B pipeline or in one of the int pipelines depending
+;; on how complex it is. Simple int insns (like movz) can also execute here.
+
+(define_cpu_unit "falkor_b" "falkor")
+
+;; Vector and FP insns execute in the VX and VY pipelines.
+
+(define_automaton "falkor_vfp")
+
+(define_cpu_unit "falkor_vx" "falkor_vfp")
+(define_cpu_unit "falkor_vy" "falkor_vfp")
+
+;; Loads execute in the LD pipeline.
+;; Stores execute in the ST, SD, and VSD pipelines, for address, data, and
+;; vector data.
+
+(define_automaton "falkor_mem")
+
+(define_cpu_unit "falkor_ld" "falkor_mem")
+(define_cpu_unit "falkor_st" "falkor_mem")
+(define_cpu_unit "falkor_sd" "falkor_mem")
+(define_cpu_unit "falkor_vsd" "falkor_mem")
+
+;; The GTOV and VTOG pipelines are for general to vector reg moves, and vice
+;; versa.
+
+(define_cpu_unit "falkor_gtov" "falkor")
+(define_cpu_unit "falkor_vtog" "falkor")
+
+;; Common reservation combinations.
+
+(define_reservation "falkor_vxvy" "falkor_vx|falkor_vy")
+(define_reservation "falkor_zb" "falkor_z|falkor_b")
+(define_reservation "falkor_xyz" "falkor_x|falkor_y|falkor_z")
+(define_reservation "falkor_xyzb" "falkor_x|falkor_y|falkor_z|falkor_b")
+
+;; SIMD Floating-Point Instructions
+
+(define_insn_reservation "falkor_afp_1_vxvy" 1
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_neg_s,neon_fp_neg_d,neon_fp_abs_s,neon_fp_abs_d"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_1_vxvy_vxvy" 1
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_neg_s_q,neon_fp_neg_d_q,neon_fp_abs_s_q,neon_fp_abs_d_q"))
+ "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_2_vxvy" 2
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_minmax_s,neon_fp_minmax_d,neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d,neon_fp_compare_s,neon_fp_compare_d,neon_fp_round_s,neon_fp_round_d"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_2_vxvy_vxvy" 2
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q,neon_fp_compare_s_q,neon_fp_compare_d_q,neon_fp_round_s_q,neon_fp_round_d_q"))
+ "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_3_vxvy" 3
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,neon_fp_reduc_minmax_d_q,neon_fp_abd_s,neon_fp_abd_d,neon_fp_addsub_s,neon_fp_addsub_d,neon_fp_reduc_add_s,neon_fp_reduc_add_d"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_3_vxvy_vxvy" 3
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_abd_s_q,neon_fp_abd_d_q,neon_fp_addsub_s_q,neon_fp_addsub_d_q,neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q"))
+ "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_4_vxvy" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_to_int_s,neon_fp_to_int_d,neon_int_to_fp_s,neon_int_to_fp_d,neon_fp_cvt_widen_h,neon_fp_cvt_widen_s"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_4_vxvy_vxvy" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_to_int_s_q,neon_fp_to_int_d_q,neon_int_to_fp_s_q,neon_int_to_fp_d_q"))
+ "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_5_vxvy_mul" 5
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_mul_s,neon_fp_mul_s_scalar"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_5_vxvy_mla" 5
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_mla_s,neon_fp_mla_s_scalar"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_5_vxvy_vxvy_mul" 5
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_mul_s_q,neon_fp_mul_s_scalar_q"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_5_vxvy_vxvy_mla" 5
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_mla_s_q,neon_fp_mla_s_scalar_q"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_6_vxvy_mul" 6
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_mul_d"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_6_vxvy_mla" 6
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_mla_d"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_6_vxvy_vxvy_mul" 6
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_mul_d_q,neon_fp_mul_d_scalar_q"))
+ "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_6_vxvy_vxvy_mla" 6
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_mla_d_q,neon_fp_mla_d_scalar_q"))
+ "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_4_vxvy_vxvy_vxvy" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q,neon_fp_cvt_narrow_d_q"))
+ "falkor_vxvy+falkor_vxvy,falkor_vxvy")
+
+(define_insn_reservation "falkor_afp_6_vx_vy" 6
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_div_s"))
+ "falkor_vx+falkor_vy")
+
+(define_insn_reservation "falkor_afp_11_vx_vy" 11
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_div_d"))
+ "falkor_vx+falkor_vy")
+
+(define_insn_reservation "falkor_afp_6_vx_vy_vx_vy" 6
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_div_s_q"))
+ "(falkor_vx+falkor_vy),(falkor_vx+falkor_vy)")
+
+(define_insn_reservation "falkor_afp_11_vx_vy_vx_vy" 11
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_div_d_q"))
+ "(falkor_vx+falkor_vy),(falkor_vx+falkor_vy)")
+
+(define_insn_reservation "falkor_afp_12_vx_vy" 12
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_sqrt_s"))
+ "falkor_vx+falkor_vy")
+
+(define_insn_reservation "falkor_afp_22_vx_vy" 22
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_sqrt_d"))
+ "falkor_vx+falkor_vy")
+
+(define_insn_reservation "falkor_afp_12_vx_vy_vx_vy" 12
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_sqrt_s_q"))
+ "(falkor_vx+falkor_vy),(falkor_vx+falkor_vy)")
+
+(define_insn_reservation "falkor_afp_22_vx_vy_vx_vy" 22
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_sqrt_d_q"))
+ "(falkor_vx+falkor_vy),(falkor_vx+falkor_vy)")
+
+;; SIMD Integer Instructions
+
+(define_insn_reservation "falkor_ai_1_vxvy" 1
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_add,neon_reduc_add,neon_logic,neon_neg,neon_sub"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_1_vxvy_vxvy" 1
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_shift_imm_long,neon_add_q,neon_reduc_add_q,neon_logic_q,neon_neg_q,neon_sub_q"))
+ "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_2_vxvy" 2
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_add_long,neon_sub_long,neon_add_halve,neon_sub_halve,neon_shift_imm,neon_shift_reg,neon_minmax,neon_abs,neon_compare,neon_compare_zero,neon_tst"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_2_vxvy_vxvy" 2
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_add_halve_q,neon_sub_halve_q,neon_shift_imm_q,neon_shift_reg_q,neon_minmax_q,neon_abs_q,neon_compare_q,neon_compare_zero_q,neon_tst_q,neon_reduc_add_long"))
+ "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_3_vxvy" 3
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_shift_acc,neon_reduc_add_acc,neon_abd,neon_qadd,neon_qsub,neon_qabs,neon_qneg,neon_sat_shift_imm,neon_sat_shift_imm_narrow_q,neon_sat_shift_reg,neon_reduc_minmax"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_4_vxvy" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_reduc_minmax_q"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_3_vxvy_vxvy" 3
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_shift_acc_q,neon_reduc_add_acc_q,neon_abd_q,neon_abd_long,neon_qadd_q,neon_qsub_q,neon_qabs_q,neon_qneg_q,neon_sat_shift_imm_q,neon_sat_shift_reg_q"))
+ "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_4_vxvy_mul" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_mul_b,neon_mul_h,neon_mul_s,neon_mul_h_scalar,neon_mul_s_scalar,neon_sat_mul_b,neon_sat_mul_h,neon_sat_mul_s,neon_sat_mul_h_scalar,neon_sat_mul_s_scalar"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_4_vxvy_mla" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_mla_b,neon_mla_h,neon_mla_s,neon_mla_h_scalar,neon_mla_s_scalar"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_4_vxvy_vxvy_mul" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_mul_b_q,neon_mul_h_q,neon_mul_s_q,neon_mul_h_scalar_q,neon_mul_s_scalar_q,neon_sat_mul_b_q,neon_sat_mul_h_q,neon_sat_mul_s_q,neon_mul_b_long,neon_mul_h_long,neon_mul_s_long,neon_mul_d_long,neon_mul_h_scalar_long,neon_mul_s_scalar_long,neon_sat_mul_b_long,neon_sat_mul_h_long,neon_sat_mul_s_long,neon_sat_mul_h_scalar_q,neon_sat_mul_s_scalar_q,neon_sat_mul_h_scalar_long,neon_sat_mul_s_scalar_long"))
+ "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_4_vxvy_vxvy_mla" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_mla_b_q,neon_mla_h_q,neon_mla_s_q,neon_mla_h_scalar_q,neon_mla_s_scalar_q,neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,neon_mla_h_scalar_long,neon_mla_s_scalar_long,neon_sat_mla_b_long,neon_sat_mla_h_long,neon_sat_mla_s_long,neon_sat_mla_h_scalar_long,neon_sat_mla_s_scalar_long"))
+ "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_ai_4_vxvy_vxvy" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_add_halve_narrow_q,neon_sub_halve_narrow_q,neon_arith_acc"))
+ "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_2_ai_vxvy_vxvy_vxvy_vxvy" 2
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_add_widen,neon_sub_widen"))
+ "(falkor_vxvy+falkor_vxvy),(falkor_vxvy+falkor_vxvy)")
+
+(define_insn_reservation "falkor_4_ai_vxvy_vxvy_vxvy_vxvy" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_arith_acc_q"))
+ "(falkor_vxvy+falkor_vxvy),(falkor_vxvy+falkor_vxvy)")
+
+;; SIMD Load Instructions
+
+(define_insn_reservation "falkor_ald_4_ld" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,neon_load1_all_lanes,neon_load2_one_lane"))
+ "falkor_ld")
+
+(define_insn_reservation "falkor_ald_4_ld_none" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_load1_2reg,neon_load2_2reg,neon_load2_all_lanes"))
+ "falkor_ld")
+
+(define_insn_reservation "falkor_ald_4_ld_ld" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_load1_2reg_q,neon_load2_2reg_q,neon_load2_all_lanes_q,neon_load3_one_lane,neon_load4_one_lane,neon_ldp,neon_ldp_q"))
+ "falkor_ld,falkor_ld")
+
+(define_insn_reservation "falkor_ald_4_ld_ld_none" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_load1_3reg,neon_load3_3reg,neon_load3_all_lanes"))
+ "falkor_ld,falkor_ld")
+
+(define_insn_reservation "falkor_ald_4_ld_ld_ld" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_load1_3reg_q,neon_load3_3reg_q,neon_load3_all_lanes_q"))
+ "falkor_ld,falkor_ld,falkor_ld")
+
+(define_insn_reservation "falkor_ald_4_ld_ld_none_none" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_load1_4reg,neon_load4_4reg"))
+ "falkor_ld,falkor_ld")
+
+(define_insn_reservation "falkor_ald_4_ld_ld_ld_ld" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_load1_4reg_q,neon_load4_4reg_q,neon_load4_all_lanes,neon_load4_all_lanes_q"))
+ "falkor_ld,falkor_ld,falkor_ld,falkor_ld")
+
+;; Arithmetic and Logical Instructions
+
+(define_insn_reservation "falkor_alu_1_xyz" 1
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "alus_sreg,alus_imm,alus_shift_imm,csel,adc_reg,alu_imm,alu_sreg,alu_shift_imm,alu_ext,alus_ext,logic_imm,logic_reg,logic_shift_imm,logics_imm,logics_reg,logics_shift_imm,mov_reg"))
+ "falkor_xyz")
+
+;; SIMD Miscellaneous Instructions
+
+;; No separate type for ins and dup. But this is correct for both.
+
+(define_insn_reservation "falkor_am_3_gtov" 3
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_from_gp"))
+ "falkor_gtov")
+
+;; No separate type for ins and dup. Assuming dup is more common. Ins is
+;; gtov+vxvy and latency of 4.
+
+(define_insn_reservation "falkor_am_3_gtov_gtov" 3
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_from_gp_q"))
+ "falkor_gtov,falkor_gtov")
+
+;; neon_to_gp_q is used for 32-bit ARM instructions that move 64-bits of data
+;; so no use needed here.
+
+(define_insn_reservation "falkor_am_3_vtog" 3
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_to_gp"))
+ "falkor_vtog")
+
+(define_insn_reservation "falkor_am_1_vxvy" 1
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_bsl,neon_dup,neon_ext,neon_ins,neon_ins_q,neon_move,neon_rev,neon_tbl1,neon_permute,neon_shift_imm_narrow_q"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_am_1_vxvy_vxvy" 1
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_bsl_q,neon_dup_q,neon_ext_q,neon_move_q,neon_rev_q,neon_tbl1_q,neon_permute_q"))
+ "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_am_2_vxvy" 2
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_cls,neon_cnt,neon_rbit"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_am_4_vxvy_vxvy" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_cls_q,neon_cnt_q,neon_rbit_q,neon_tbl2"))
+ "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_am_3_vxvy" 3
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_recpe_s,neon_fp_recpe_d,neon_fp_rsqrte_s,neon_fp_rsqrte_d,neon_fp_recpx_s,neon_fp_recpx_d"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_am_3_vxvy_vxvy" 3
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_recpe_s_q,neon_fp_recpe_d_q,neon_fp_rsqrte_s_q,neon_fp_rsqrte_d_q"))
+ "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_am_5_vxvy" 5
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_recps_s"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_am_5_vxvy_vxvy" 5
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_recps_s_q"))
+ "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_am_6_vxvy" 6
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_recps_d,neon_fp_rsqrts_d"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_am_6_vxvy_vxvy" 6
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_fp_recps_d_q,neon_fp_rsqrts_d_q"))
+ "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_am_5_vxvy_vxvy_vxvy" 5
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_tbl2_q,neon_tbl3"))
+ "(falkor_vxvy+falkor_vxvy),falkor_vxvy")
+
+(define_insn_reservation "falkor_am_6_vxvy_vxvy_vxvy_vxvy" 6
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_tbl3_q,neon_tbl4"))
+ "(falkor_vxvy+falkor_vxvy),(falkor_vxvy+falkor_vxvy)")
+
+(define_insn_reservation "falkor_am_7_vxvy_vxvy_vxvy_vxvy_vxvy" 7
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_tbl4_q"))
+ "(falkor_vxvy+falkor_vxvy),(falkor_vxvy+falkor_vxvy),falkor_vxvy")
+
+;; SIMD Store Instructions
+
+;; ??? stp is neon_store1_2reg in aarch64.md, but neon_stp in aarch64-simd.md.
+;; Similarly with ldp.
+
+(define_insn_reservation "falkor_ast_st_vsd" 0
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q,neon_store1_one_lane,neon_store1_one_lane_q,neon_store1_2reg,neon_store2_2reg,neon_store2_one_lane,neon_store2_one_lane_q,neon_stp"))
+ "falkor_st+falkor_vsd")
+
+(define_insn_reservation "falkor_as_0_st_vsd_st_vsd" 0
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_store1_2reg_q,neon_store1_3reg,neon_store1_4reg,neon_store2_2reg_q,neon_store3_3reg,neon_store4_4reg,neon_store3_one_lane,neon_store3_one_lane_q,neon_store4_one_lane,neon_store4_one_lane_q,neon_stp_q"))
+ "(falkor_st+falkor_vsd),(falkor_st+falkor_vsd)")
+
+(define_insn_reservation "falkor_as_0_st_vsd_st_vsd_st_vsd" 0
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_store1_3reg_q,neon_store3_3reg_q"))
+ "(falkor_st+falkor_vsd),(falkor_st+falkor_vsd),(falkor_st+falkor_vsd)")
+
+(define_insn_reservation "falkor_as_0_st_vsd_st_vsd_st_vsd_st_vsd" 0
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "neon_store1_4reg_q,neon_store4_4reg_q"))
+ "(falkor_st+falkor_vsd),(falkor_st+falkor_vsd),(falkor_st+falkor_vsd),(falkor_st+falkor_vsd)")
+
+;; Branch Instructions
+
+(define_insn_reservation "falkor_branch_0_zb" 0
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "branch"))
+ "falkor_zb")
+
+(define_insn_reservation "falkor_call_0_xyzb" 0
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "call"))
+ "falkor_xyzb")
+
+;; Cryptography Extensions
+
+(define_insn_reservation "falkor_cry_1_vxvy" 1
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "crypto_sha1_fast"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_cry_2_vxvy" 2
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "crypto_aesmc"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_cry_2_vxvy_vxvy" 2
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "crypto_sha1_xor,crypto_sha256_fast,crypto_pmull"))
+ "falkor_vxvy+falkor_vxvy")
+
+(define_insn_reservation "falkor_cry_4_vy_vx" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "crypto_sha1_slow"))
+ "falkor_vy+falkor_vx")
+
+(define_insn_reservation "falkor_cry_6_vy_vx" 6
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "crypto_sha256_slow"))
+ "falkor_vy+falkor_vx")
+
+(define_insn_reservation "falkor_cry_3_vxvy_vxvy" 3
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "crypto_aese"))
+ "falkor_vxvy+falkor_vxvy")
+
+;; FP Load Instructions
+
+(define_insn_reservation "falkor_fld_4_ld" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "f_loads,f_loadd"))
+ "falkor_ld")
+
+;; No separate FP store section, these are found in the SIMD store section.
+
+(define_insn_reservation "falkor_fld_0_st_vsd" 0
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "f_stores,f_stored"))
+ "falkor_st+falkor_vsd")
+
+;; FP Data Processing Instructions
+
+(define_insn_reservation "falkor_fpdt_0_vxvy" 0
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_fpdt_5_vtog" 5
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "f_cvtf2i"))
+ "falkor_vtog")
+
+(define_insn_reservation "falkor_fpdt_1_vxvy" 1
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "ffariths,ffarithd,fcsel"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_fpdt_2_vxvy" 2
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "f_minmaxd,f_minmaxs,f_rintd,f_rints"))
+ "falkor_vxvy")
+
+;; Scalar FP ABD is handled same as vector FP ABD.
+
+(define_insn_reservation "falkor_fpdt_3_vxvy" 3
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "faddd,fadds"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_fpdt_4_vxvy" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "f_cvt"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_fpdt_5_vxvy_mul" 5
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "fmuls"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_fpdt_5_vxvy_mla" 5
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "fmacs,ffmas"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_fpdt_6_vxvy_mul" 6
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "fmuld"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_fpdt_6_vxvy_mla" 6
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "fmacd,ffmad"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_fpdt_6_vx_vy" 6
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "fdivs"))
+ "falkor_vx+falkor_vy")
+
+(define_insn_reservation "falkor_fpdt_11_vx_vy" 11
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "fdivd"))
+ "falkor_vx+falkor_vy")
+
+(define_insn_reservation "falkor_fpdt_12_vx_vy" 12
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "fsqrts"))
+ "falkor_vxvy")
+
+(define_insn_reservation "falkor_fpdt_22_vx_vy" 22
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "fsqrtd"))
+ "falkor_vxvy")
+
+;; FP Miscellaneous Instructions
+
+(define_insn_reservation "falkor_fpmsc_3_vtog" 3
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "f_mrc"))
+ "falkor_vtog")
+
+(define_insn_reservation "falkor_fpmsc_3_gtov" 3
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "f_mcr"))
+ "falkor_gtov")
+
+(define_insn_reservation "falkor_fpmsc_1_vxvy" 1
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "fmov,fconsts,fconstd"))
+ "falkor_vxvy")
+
+;; No separate type for float-to-fixed conversions. Same type as
+;; float-to-int conversions. They schedule the same though, so no problem.
+
+(define_insn_reservation "falkor_fpmsc_6_gtov" 6
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "f_cvti2f"))
+ "falkor_gtov")
+
+;; Load Instructions
+
+(define_insn_reservation "falkor_ld_3_ld" 3
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "load1,load2"))
+ "falkor_ld")
+
+;; Miscellaneous Data-Processing Instructions
+
+(define_insn_reservation "falkor_misc_1_xyz" 1
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "bfx,bfm,extend,rotate_imm,shift_imm"))
+ "falkor_xyz")
+
+(define_insn_reservation "falkor_misc_2_x" 2
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "crc"))
+ "falkor_x")
+
+(define_insn_reservation "falkor_misc_2_xyz" 2
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "clz,rbit,rev"))
+ "falkor_xyz")
+
+;; Divide and Multiply Instructions
+
+(define_insn_reservation "falkor_muldiv_4_x_mul" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "mul"))
+ "falkor_x")
+
+(define_insn_reservation "falkor_muldiv_4_x_mla" 4
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "mla,smlal,umlal"))
+ "falkor_x")
+
+(define_insn_reservation "falkor_muldiv_5_x_mul" 5
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "smull,umull"))
+ "falkor_x")
+
+(define_insn_reservation "falkor_md_11_x_z" 11
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "sdiv,udiv"))
+ "falkor_x+falkor_z")
+
+;; Move and Shift Instructions
+
+(define_insn_reservation "falkor_mvs_1_xyz" 1
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "mov_imm,shift_reg"))
+ "falkor_xyz")
+
+(define_insn_reservation "falkor_mvs_1_xyzb" 1
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "adr"))
+ "falkor_xyzb")
+
+;; Other Instructions
+
+;; Block is for instruction scheduling blockage insns in RTL. There are no
+;; hardware instructions emitted for them, so don't use any resources.
+
+(define_insn_reservation "falkor_other_0_nothing" 0
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "no_insn,trap,block"))
+ "nothing")
+
+(define_insn_reservation "falkor_other_2_z" 2
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "mrs"))
+ "falkor_z")
+
+;; Assume multiple instructions use all pipes.
+
+(define_insn_reservation "falkor_extra" 1
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "multiple"))
+ "falkor_x+falkor_y+falkor_z+falkor_b+falkor_vx+falkor_vy+falkor_ld+falkor_st+falkor_sd+falkor_vsd+falkor_gtov+falkor_vtog")
+
+;; Store Instructions
+
+;; No use of store_rel, store3, or store4 in aarch64.
+
+(define_insn_reservation "falkor_st_0_st_sd" 0
+ (and (eq_attr "tune" "falkor")
+ (eq_attr "type" "store1,store2"))
+ "falkor_st+falkor_sd")
+
+;; Muliply bypasses.
+
+;; 1 cycle latency (0 bubble) for an integer mul or mac feeding into a mac.
+
+(define_bypass 1
+ "falkor_ai_4_vxvy_mul,falkor_ai_4_vxvy_mla,falkor_ai_4_vxvy_vxvy_mul,falkor_ai_4_vxvy_vxvy_mla,falkor_muldiv_4_x_mul,falkor_muldiv_4_x_mla,falkor_muldiv_5_x_mul"
+ "falkor_ai_4_vxvy_mla,falkor_ai_4_vxvy_vxvy_mla,falkor_muldiv_4_x_mla")
+
+;; 3 cycle latency (2 bubbles) for an FP mul or mac feeding into a mac.
+
+(define_bypass 3
+ "falkor_afp_5_vxvy_mul,falkor_afp_5_vxvy_mla,falkor_afp_5_vxvy_vxvy_mul,falkor_afp_5_vxvy_vxvy_mla,falkor_afp_6_vxvy_mul,falkor_afp_6_vxvy_mla,falkor_afp_6_vxvy_vxvy_mul,falkor_afp_6_vxvy_vxvy_mla,falkor_fpdt_5_vxvy_mul,falkor_fpdt_5_vxvy_mla,falkor_fpdt_6_vxvy_mul,falkor_fpdt_6_vxvy_mla"
+ "falkor_afp_5_vxvy_mla,falkor_afp_5_vxvy_vxvy_mla,falkor_afp_6_vxvy_mla,falkor_afp_6_vxvy_vxvy_mla,falkor_fpdt_5_vxvy_mla,falkor_fpdt_6_vxvy_mla")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 43be7fd3611..cceb57525c7 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -44,6 +44,9 @@
;; Iterator for all scalar floating point modes (HF, SF, DF)
(define_mode_iterator GPF_F16 [(HF "AARCH64_ISA_F16") SF DF])
+;; Iterator for all scalar floating point modes (HF, SF, DF)
+(define_mode_iterator GPF_HF [HF SF DF])
+
;; Iterator for all scalar floating point modes (HF, SF, DF and TF)
(define_mode_iterator GPF_TF_F16 [HF SF DF TF])
@@ -520,6 +523,17 @@
(SI "SI") (HI "HI")
(QI "QI")])
+;; Define element mode for each vector mode (lower case).
+(define_mode_attr Vel [(V8QI "qi") (V16QI "qi")
+ (V4HI "hi") (V8HI "hi")
+ (V2SI "si") (V4SI "si")
+ (DI "di") (V2DI "di")
+ (V4HF "hf") (V8HF "hf")
+ (V2SF "sf") (V4SF "sf")
+ (V2DF "df") (DF "df")
+ (SI "si") (HI "hi")
+ (QI "qi")])
+
;; 64-bit container modes the inner or scalar source mode.
(define_mode_attr VCOND [(HI "V4HI") (SI "V2SI")
(V4HI "V4HI") (V8HI "V4HI")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index ad8a43c2b2c..11243c4ce00 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -114,6 +114,10 @@
(ior (match_operand 0 "register_operand")
(match_operand 0 "aarch64_logical_immediate")))
+(define_predicate "aarch64_mov_imm_operand"
+ (and (match_code "const_int")
+ (match_test "aarch64_move_imm (INTVAL (op), mode)")))
+
(define_predicate "aarch64_logical_and_immediate"
(and (match_code "const_int")
(match_test "aarch64_and_bitmask_imm (INTVAL (op), mode)")))
diff --git a/gcc/config/aarch64/rtems.h b/gcc/config/aarch64/rtems.h
index b48e28afda0..07c5679d5c1 100644
--- a/gcc/config/aarch64/rtems.h
+++ b/gcc/config/aarch64/rtems.h
@@ -1,20 +1,25 @@
/* Definitions for RTEMS based AARCH64 system.
Copyright (C) 2016-2017 Free Software Foundation, Inc.
-
+
This file is part of GCC.
-
+
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published
by the Free Software Foundation; either version 3, or (at your
option) any later version.
-
+
GCC is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
-
- You should have received a copy of the GNU General Public License
- along with GCC; see the file COPYING3. If not see
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#define HAS_INIT_SECTION