diff options
Diffstat (limited to 'gcc/config/arm/arm.c')
-rw-r--r-- | gcc/config/arm/arm.c | 1061 |
1 files changed, 976 insertions, 85 deletions
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 488991ebefa..0880b58a0cc 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -452,6 +452,8 @@ static int thumb_call_reg_needed; #define FL_NOTM (1 << 17) /* Instructions not present in the 'M' profile. */ #define FL_DIV (1 << 18) /* Hardware divde. */ +#define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */ +#define FL_NEON (1 << 20) /* Neon instructions. */ #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */ @@ -695,7 +697,9 @@ static const struct fpu_desc all_fpus[] = {"fpe2", FPUTYPE_FPA_EMU2}, {"fpe3", FPUTYPE_FPA_EMU2}, {"maverick", FPUTYPE_MAVERICK}, - {"vfp", FPUTYPE_VFP} + {"vfp", FPUTYPE_VFP}, + {"vfp3", FPUTYPE_VFP3}, + {"neon", FPUTYPE_NEON} }; @@ -710,7 +714,9 @@ static const enum fputype fp_model_for_fpu[] = ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */ ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */ ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */ - ARM_FP_MODEL_VFP /* FPUTYPE_VFP */ + ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */ + ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */ + ARM_FP_MODEL_VFP /* FPUTYPE_NEON */ }; @@ -2633,15 +2639,20 @@ arm_return_in_memory (tree type) { HOST_WIDE_INT size; + size = int_size_in_bytes (type); + + /* Vector values should be returned using ARM registers, not memory (unless + they're over 16 bytes, which will break since we only have four + call-clobbered registers to play with). */ + if (TREE_CODE (type) == VECTOR_TYPE && size <= 16) + return 0; + if (!AGGREGATE_TYPE_P (type) && - (TREE_CODE (type) != VECTOR_TYPE) && !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE)) /* All simple types are returned in registers. For AAPCS, complex types are treated the same as aggregates. */ return 0; - size = int_size_in_bytes (type); - if (arm_abi != ARM_ABI_APCS) { /* ATPCS and later return aggregate types in memory only if they are @@ -2882,7 +2893,7 @@ arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode, { int nregs = pcum->nregs; - if (arm_vector_mode_supported_p (mode)) + if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode)) return 0; if (NUM_ARG_REGS > nregs @@ -4869,7 +4880,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer) return 6; case CONST_DOUBLE: - if (arm_const_double_rtx (x)) + if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x)) return outer == SET ? 2 : -1; else if ((outer == COMPARE || outer == PLUS) && neg_const_double_rtx_ok_for_fpa (x)) @@ -5568,6 +5579,458 @@ neg_const_double_rtx_ok_for_fpa (rtx x) return 0; } + +/* VFPv3 has a fairly wide range of representable immediates, formed from + "quarter-precision" floating-point values. These can be evaluated using this + formula (with ^ for exponentiation): + + -1^s * n * 2^-r + + Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that + 16 <= n <= 31 and 0 <= r <= 7. + + These values are mapped onto an 8-bit integer ABCDEFGH s.t. + + - A (most-significant) is the sign bit. + - BCD are the exponent (encoded as r XOR 3). + - EFGH are the mantissa (encoded as n - 16). +*/ + +/* Return an integer index for a VFPv3 immediate operand X suitable for the + fconst[sd] instruction, or -1 if X isn't suitable. */ +static int +vfp3_const_double_index (rtx x) +{ + REAL_VALUE_TYPE r, m; + int sign, exponent; + unsigned HOST_WIDE_INT mantissa, mant_hi; + unsigned HOST_WIDE_INT mask; + int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1; + + if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE) + return -1; + + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + + /* We can't represent these things, so detect them first. */ + if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r)) + return -1; + + /* Extract sign, exponent and mantissa. */ + sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0; + r = REAL_VALUE_ABS (r); + exponent = REAL_EXP (&r); + /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the + highest (sign) bit, with a fixed binary point at bit point_pos. + WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1 + bits for the mantissa, this may fail (low bits would be lost). */ + real_ldexp (&m, &r, point_pos - exponent); + REAL_VALUE_TO_INT (&mantissa, &mant_hi, m); + + /* If there are bits set in the low part of the mantissa, we can't + represent this value. */ + if (mantissa != 0) + return -1; + + /* Now make it so that mantissa contains the most-significant bits, and move + the point_pos to indicate that the least-significant bits have been + discarded. */ + point_pos -= HOST_BITS_PER_WIDE_INT; + mantissa = mant_hi; + + /* We can permit four significant bits of mantissa only, plus a high bit + which is always 1. */ + mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1; + if ((mantissa & mask) != 0) + return -1; + + /* Now we know the mantissa is in range, chop off the unneeded bits. */ + mantissa >>= point_pos - 5; + + /* The mantissa may be zero. Disallow that case. (It's possible to load the + floating-point immediate zero with Neon using an integer-zero load, but + that case is handled elsewhere.) */ + if (mantissa == 0) + return -1; + + gcc_assert (mantissa >= 16 && mantissa <= 31); + + /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where + normalised significands are in the range [1, 2). (Our mantissa is shifted + left 4 places at this point relative to normalised IEEE754 values). GCC + internally uses [0.5, 1) (see real.c), so the exponent returned from + REAL_EXP must be altered. */ + exponent = 5 - exponent; + + if (exponent < 0 || exponent > 7) + return -1; + + /* Sign, mantissa and exponent are now in the correct form to plug into the + formulae described in the comment above. */ + return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16); +} + +/* Return TRUE if rtx X is a valid immediate VFPv3 constant. */ +int +vfp3_const_double_rtx (rtx x) +{ + if (!TARGET_VFP3) + return 0; + + return vfp3_const_double_index (x) != -1; +} + +/* Recognize immediates which can be used in various Neon instructions. Legal + immediates are described by the following table (for VMVN variants, the + bitwise inverse of the constant shown is recognized. In either case, VMOV + is output and the correct instruction to use for a given constant is chosen + by the assembler). The constant shown is replicated across all elements of + the destination vector. + + insn elems variant constant (binary) + ---- ----- ------- ----------------- + vmov i32 0 00000000 00000000 00000000 abcdefgh + vmov i32 1 00000000 00000000 abcdefgh 00000000 + vmov i32 2 00000000 abcdefgh 00000000 00000000 + vmov i32 3 abcdefgh 00000000 00000000 00000000 + vmov i16 4 00000000 abcdefgh + vmov i16 5 abcdefgh 00000000 + vmvn i32 6 00000000 00000000 00000000 abcdefgh + vmvn i32 7 00000000 00000000 abcdefgh 00000000 + vmvn i32 8 00000000 abcdefgh 00000000 00000000 + vmvn i32 9 abcdefgh 00000000 00000000 00000000 + vmvn i16 10 00000000 abcdefgh + vmvn i16 11 abcdefgh 00000000 + vmov i32 12 00000000 00000000 abcdefgh 11111111 + vmvn i32 13 00000000 00000000 abcdefgh 11111111 + vmov i32 14 00000000 abcdefgh 11111111 11111111 + vmvn i32 15 00000000 abcdefgh 11111111 11111111 + vmov i8 16 abcdefgh + vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd + eeeeeeee ffffffff gggggggg hhhhhhhh + vmov f32 18 aBbbbbbc defgh000 00000000 00000000 + + For case 18, B = !b. Representable values are exactly those accepted by + vfp3_const_double_index, but are output as floating-point numbers rather + than indices. + + Variants 0-5 (inclusive) may also be used as immediates for the second + operand of VORR/VBIC instructions. + + The INVERSE argument causes the bitwise inverse of the given operand to be + recognized instead (used for recognizing legal immediates for the VAND/VORN + pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is + *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be + output, rather than the real insns vbic/vorr). + + INVERSE makes no difference to the recognition of float vectors. + + The return value is the variant of immediate as shown in the above table, or + -1 if the given value doesn't match any of the listed patterns. +*/ +static int +neon_valid_immediate (rtx op, enum machine_mode mode, int inverse, + rtx *modconst, int *elementwidth) +{ +#define CHECK(STRIDE, ELSIZE, CLASS, TEST) \ + matches = 1; \ + for (i = 0; i < idx; i += (STRIDE)) \ + if (!(TEST)) \ + matches = 0; \ + if (matches) \ + { \ + immtype = (CLASS); \ + elsize = (ELSIZE); \ + break; \ + } + + unsigned int i, elsize, idx = 0, n_elts = CONST_VECTOR_NUNITS (op); + unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode)); + unsigned char bytes[16]; + int immtype = -1, matches; + unsigned int invmask = inverse ? 0xff : 0; + + /* Vectors of float constants. */ + if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + { + rtx el0 = CONST_VECTOR_ELT (op, 0); + REAL_VALUE_TYPE r0; + + if (!vfp3_const_double_rtx (el0)) + return -1; + + REAL_VALUE_FROM_CONST_DOUBLE (r0, el0); + + for (i = 1; i < n_elts; i++) + { + rtx elt = CONST_VECTOR_ELT (op, i); + REAL_VALUE_TYPE re; + + REAL_VALUE_FROM_CONST_DOUBLE (re, elt); + + if (!REAL_VALUES_EQUAL (r0, re)) + return -1; + } + + if (modconst) + *modconst = CONST_VECTOR_ELT (op, 0); + + if (elementwidth) + *elementwidth = 0; + + return 18; + } + + /* Splat vector constant out into a byte vector. */ + for (i = 0; i < n_elts; i++) + { + rtx el = CONST_VECTOR_ELT (op, i); + unsigned HOST_WIDE_INT elpart; + unsigned int part, parts; + + if (GET_CODE (el) == CONST_INT) + { + elpart = INTVAL (el); + parts = 1; + } + else if (GET_CODE (el) == CONST_DOUBLE) + { + elpart = CONST_DOUBLE_LOW (el); + parts = 2; + } + else + gcc_unreachable (); + + for (part = 0; part < parts; part++) + { + unsigned int byte; + for (byte = 0; byte < innersize; byte++) + { + bytes[idx++] = (elpart & 0xff) ^ invmask; + elpart >>= BITS_PER_UNIT; + } + if (GET_CODE (el) == CONST_DOUBLE) + elpart = CONST_DOUBLE_HIGH (el); + } + } + + /* Sanity check. */ + gcc_assert (idx == GET_MODE_SIZE (mode)); + + do + { + CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0 + && bytes[i + 2] == 0 && bytes[i + 3] == 0); + + CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0 && bytes[i + 3] == 0); + + CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0 + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0); + + CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0 + && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]); + + CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0); + + CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]); + + CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff); + + CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff); + + CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff); + + CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff + && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]); + + CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff); + + CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]); + + CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0 && bytes[i + 3] == 0); + + CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff); + + CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0); + + CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0 + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff); + + CHECK (1, 8, 16, bytes[i] == bytes[0]); + + CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff) + && bytes[i] == bytes[(i + 8) % idx]); + } + while (0); + + if (immtype == -1) + return -1; + + if (elementwidth) + *elementwidth = elsize; + + if (modconst) + { + unsigned HOST_WIDE_INT imm = 0; + + /* Un-invert bytes of recognized vector, if neccessary. */ + if (invmask != 0) + for (i = 0; i < idx; i++) + bytes[i] ^= invmask; + + if (immtype == 17) + { + /* FIXME: Broken on 32-bit H_W_I hosts. */ + gcc_assert (sizeof (HOST_WIDE_INT) == 8); + + for (i = 0; i < 8; i++) + imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0) + << (i * BITS_PER_UNIT); + + *modconst = GEN_INT (imm); + } + else + { + unsigned HOST_WIDE_INT imm = 0; + + for (i = 0; i < elsize / BITS_PER_UNIT; i++) + imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT); + + *modconst = GEN_INT (imm); + } + } + + return immtype; +#undef CHECK +} + +/* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly, + VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for + float elements), and a modified constant (whatever should be output for a + VMOV) in *MODCONST. */ + +int +neon_immediate_valid_for_move (rtx op, enum machine_mode mode, + rtx *modconst, int *elementwidth) +{ + rtx tmpconst; + int tmpwidth; + int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth); + + if (retval == -1) + return 0; + + if (modconst) + *modconst = tmpconst; + + if (elementwidth) + *elementwidth = tmpwidth; + + return 1; +} + +/* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If + the immediate is valid, write a constant suitable for using as an operand + to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to + *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */ + +int +neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse, + rtx *modconst, int *elementwidth) +{ + rtx tmpconst; + int tmpwidth; + int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth); + + if (retval < 0 || retval > 5) + return 0; + + if (modconst) + *modconst = tmpconst; + + if (elementwidth) + *elementwidth = tmpwidth; + + return 1; +} + +/* Return a string suitable for output of Neon immediate logic operation + MNEM. */ + +char * +neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode, + int inverse, int quad) +{ + int width, is_valid; + static char templ[40]; + + is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width); + + gcc_assert (is_valid != 0); + + if (quad) + sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width); + else + sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width); + + return templ; +} + +/* Output a sequence of pairwise operations to implement a reduction. + NOTE: We do "too much work" here, because pairwise operations work on two + registers-worth of operands in one go. Unfortunately we can't exploit those + extra calculations to do the full operation in fewer steps, I don't think. + Although all vector elements of the result but the first are ignored, we + actually calculate the same result in each of the elements. An alternative + such as initially loading a vector with zero to use as each of the second + operands would use up an additional register and take an extra instruction, + for no particular gain. */ + +void +neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode, + rtx (*reduc) (rtx, rtx, rtx)) +{ + enum machine_mode inner = GET_MODE_INNER (mode); + unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner); + rtx tmpsum = op1; + + for (i = parts / 2; i >= 1; i /= 2) + { + rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode); + emit_insn (reduc (dest, tmpsum, tmpsum)); + tmpsum = dest; + } +} + +/* Initialise a vector with non-constant elements. FIXME: We can do better + than the current implementation (building a vector on the stack and then + loading it) in many cases. See rs6000.c. */ + +void +neon_expand_vector_init (rtx target, rtx vals) +{ + enum machine_mode mode = GET_MODE (target); + enum machine_mode inner = GET_MODE_INNER (mode); + unsigned int i, n_elts = GET_MODE_NUNITS (mode); + rtx mem; + + gcc_assert (VECTOR_MODE_P (mode)); + + mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0); + for (i = 0; i < n_elts; i++) + emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)), + XVECEXP (vals, 0, i)); + + emit_move_insn (target, mem); +} + /* Predicates for `match_operand' and `match_operator'. */ @@ -5679,6 +6142,72 @@ arm_coproc_mem_operand (rtx op, bool wb) return FALSE; } +/* Return TRUE if OP is a memory operand which we can load or store a vector + to/from. If CORE is true, we're moving from ARM registers not Neon + registers. */ +int +neon_vector_mem_operand (rtx op, bool core) +{ + rtx ind; + + /* Reject eliminable registers. */ + if (! (reload_in_progress || reload_completed) + && ( reg_mentioned_p (frame_pointer_rtx, op) + || reg_mentioned_p (arg_pointer_rtx, op) + || reg_mentioned_p (virtual_incoming_args_rtx, op) + || reg_mentioned_p (virtual_outgoing_args_rtx, op) + || reg_mentioned_p (virtual_stack_dynamic_rtx, op) + || reg_mentioned_p (virtual_stack_vars_rtx, op))) + return FALSE; + + /* Constants are converted into offsets from labels. */ + if (GET_CODE (op) != MEM) + return FALSE; + + ind = XEXP (op, 0); + + if (reload_completed + && (GET_CODE (ind) == LABEL_REF + || (GET_CODE (ind) == CONST + && GET_CODE (XEXP (ind, 0)) == PLUS + && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF + && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT))) + return TRUE; + + /* Match: (mem (reg)). */ + if (GET_CODE (ind) == REG) + return arm_address_register_rtx_p (ind, 0); + + /* Allow post-increment with Neon registers. */ + if (!core && GET_CODE (ind) == POST_INC) + return arm_address_register_rtx_p (XEXP (ind, 0), 0); + +#if 0 + /* FIXME: We can support this too if we use VLD1/VST1. */ + if (!core + && GET_CODE (ind) == POST_MODIFY + && arm_address_register_rtx_p (XEXP (ind, 0), 0) + && GET_CODE (XEXP (ind, 1)) == PLUS + && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0))) + ind = XEXP (ind, 1); +#endif + + /* Match: + (plus (reg) + (const)). */ + if (!core + && GET_CODE (ind) == PLUS + && GET_CODE (XEXP (ind, 0)) == REG + && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode) + && GET_CODE (XEXP (ind, 1)) == CONST_INT + && INTVAL (XEXP (ind, 1)) > -1024 + && INTVAL (XEXP (ind, 1)) < 1016 + && (INTVAL (XEXP (ind, 1)) & 3) == 0) + return TRUE; + + return FALSE; +} + /* Return true if X is a register that will be eliminated later on. */ int arm_eliminable_register (rtx x) @@ -5695,6 +6224,12 @@ arm_eliminable_register (rtx x) enum reg_class vfp_secondary_reload_class (enum machine_mode mode, rtx x) { + if (TARGET_NEON + && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT + || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + && neon_vector_mem_operand (x, FALSE)) + return NO_REGS; + if (arm_coproc_mem_operand (x, FALSE) || s_register_operand (x, mode)) return NO_REGS; @@ -7767,8 +8302,8 @@ add_minipool_forward_ref (Mfix *fix) placed at the start of the pool. */ if (ARM_DOUBLEWORD_ALIGN && max_mp == NULL - && fix->fix_size == 8 - && mp->fix_size != 8) + && fix->fix_size >= 8 + && mp->fix_size < 8) { max_mp = mp; max_address = mp->max_address; @@ -7948,7 +8483,7 @@ add_minipool_backward_ref (Mfix *fix) /* For now, we do not allow the insertion of 8-byte alignment requiring nodes anywhere but at the start of the pool. */ if (ARM_DOUBLEWORD_ALIGN - && fix->fix_size == 8 && mp->fix_size != 8) + && fix->fix_size >= 8 && mp->fix_size < 8) return NULL; else min_mp = mp; @@ -7969,7 +8504,7 @@ add_minipool_backward_ref (Mfix *fix) placed at the start of the pool. */ else if (ARM_DOUBLEWORD_ALIGN && min_mp == NULL - && fix->fix_size == 8 + && fix->fix_size >= 8 && mp->fix_size < 8) { min_mp = mp; @@ -8067,7 +8602,7 @@ dump_minipool (rtx scan) if (ARM_DOUBLEWORD_ALIGN) for (mp = minipool_vector_head; mp != NULL; mp = mp->next) - if (mp->refcount > 0 && mp->fix_size == 8) + if (mp->refcount > 0 && mp->fix_size >= 8) { align64 = 1; break; @@ -8122,6 +8657,12 @@ dump_minipool (rtx scan) break; #endif +#ifdef HAVE_consttable_16 + case 16: + scan = emit_insn_after (gen_consttable_16 (mp->value), scan); + break; + +#endif default: gcc_unreachable (); } @@ -8314,7 +8855,7 @@ push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc, /* If an entry requires 8-byte alignment then assume all constant pools require 4 bytes of padding. Trying to do this later on a per-pool basis is awkward becuse existing pool entries have to be modified. */ - if (ARM_DOUBLEWORD_ALIGN && fix->fix_size == 8) + if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8) minipool_pad = 4; if (dump_file) @@ -8730,6 +9271,17 @@ arm_output_fldmx (FILE * stream, unsigned int base, int reg, int count) count++; } + /* FLDMX may not load more than 16 doubleword registers at a time. Split the + load into multiple parts if we have to handle more than 16 registers. + FIXME: This will increase the maximum size of the epilogue, which will + need altering elsewhere. */ + if (count > 16) + { + arm_output_fldmx (stream, base, reg, 16); + arm_output_fldmx (stream, base, reg + 16, count - 16); + return; + } + fputc ('\t', stream); asm_fprintf (stream, "fldmfd%c\t%r!, {", TARGET_FLDMX ? 'x' : 'd', base); @@ -8793,6 +9345,19 @@ vfp_emit_fstmx (int base_reg, int count) count++; } + /* FSTMX may not store more than 16 doubleword registers at once. Split + larger stores into multiple parts (up to a maximum of two, in + practice). */ + if (count > 16) + { + int saved; + /* FIXME: When ARMv6 support is added to this function, the padding used + by f{ldm,stm}x may need attention here. */ + saved = vfp_emit_fstmx (base_reg + 16, count - 16); + saved += vfp_emit_fstmx (base_reg, 16); + return saved; + } + /* ??? The frame layout is implementation defined. We describe standard format 1 (equivalent to a FSTMD insn and unused pad word) for architectures pre-v6, and FSTMD insn format without the pad @@ -9279,6 +9844,85 @@ output_move_double (rtx *operands) return ""; } +/* Output a move, load or store for quad-word vectors in ARM registers. Only + handles MEMs accepted by neon_vector_mem_operand with CORE=true. */ + +const char * +output_move_quad (rtx *operands) +{ + if (REG_P (operands[0])) + { + /* Load, or reg->reg move. */ + + if (MEM_P (operands[1])) + { + switch (GET_CODE (XEXP (operands[1], 0))) + { + case REG: + output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands); + break; + + case LABEL_REF: + case CONST: + output_asm_insn ("adr%?\t%0, %1", operands); + output_asm_insn ("ldm%(ia%)\t%0, %M0", operands); + break; + + default: + gcc_unreachable (); + } + } + else + { + rtx ops[2]; + int dest, src, i; + + gcc_assert (REG_P (operands[1])); + + dest = REGNO (operands[0]); + src = REGNO (operands[1]); + + /* This seems pretty dumb, but hopefully GCC won't try to do it + very often. */ + if (dest < src) + for (i = 0; i < 4; i++) + { + ops[0] = gen_rtx_REG (SImode, dest + i); + ops[1] = gen_rtx_REG (SImode, src + i); + output_asm_insn ("mov%?\t%0, %1", ops); + } + else + for (i = 3; i >= 0; i--) + { + ops[0] = gen_rtx_REG (SImode, dest + i); + ops[1] = gen_rtx_REG (SImode, src + i); + output_asm_insn ("mov%?\t%0, %1", ops); + } + } + } + else + { + int i, regno; + rtx ops[3]; + + gcc_assert (MEM_P (operands[0])); + gcc_assert (REG_P (operands[1])); + gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0])); + + switch (GET_CODE (XEXP (operands[0], 0))) + { + case REG: + output_asm_insn ("stm%(ia%)\t%m0, %M1", operands); + break; + + default: + gcc_unreachable (); + } + } + + return ""; +} + /* Output a VFP load or store instruction. */ const char * @@ -9290,16 +9934,20 @@ output_move_vfp (rtx *operands) int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT; const char *template; char buff[50]; + enum machine_mode mode; reg = operands[!load]; mem = operands[load]; + mode = GET_MODE (reg); + gcc_assert (REG_P (reg)); gcc_assert (IS_VFP_REGNUM (REGNO (reg))); - gcc_assert (GET_MODE (reg) == SFmode - || GET_MODE (reg) == DFmode - || GET_MODE (reg) == SImode - || GET_MODE (reg) == DImode); + gcc_assert (mode == SFmode + || mode == DFmode + || mode == SImode + || mode == DImode + || (TARGET_NEON && VALID_NEON_DREG_MODE (mode))); gcc_assert (MEM_P (mem)); addr = XEXP (mem, 0); @@ -9335,6 +9983,112 @@ output_move_vfp (rtx *operands) return ""; } +/* Output a Neon quad-word load or store. We could also support post-modify + forms using VLD1/VST1, but we don't do that yet. + WARNING, FIXME: The ordering of elements in memory is going to be weird in + big-endian mode at present, because we use VSTM instead of VST1, to make + it easy to make vector stores via ARM registers write values in the same + order as stores direct from Neon registers. For example, the byte ordering + of a quadword vector with 16-byte elements like this: + + [e7:e6:e5:e4:e3:e2:e1:e0] (highest-numbered element first) + + will be (with lowest address first, h = most-significant byte, + l = least-significant byte of element): + + [e3h, e3l, e2h, e2l, e1h, e1l, e0h, e0l, + e7h, e7l, e6h, e6l, e5h, e5l, e4h, e4l] + + When necessary, quadword registers (dN, dN+1) are moved to ARM registers from + rN in the order: + + dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2) + + So that STM/LDM can be used on vectors in ARM registers, and the same memory + layout will result as if VSTM/VLDM were used. + + This memory format (in BE mode) is very likely to change in the future. */ + +const char * +output_move_neon (rtx *operands) +{ + rtx reg, mem, addr, ops[2]; + int regno, load = REG_P (operands[0]); + const char *template; + char buff[50]; + enum machine_mode mode; + + reg = operands[!load]; + mem = operands[load]; + + mode = GET_MODE (reg); + + gcc_assert (REG_P (reg)); + regno = REGNO (reg); + gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno) + || NEON_REGNO_OK_FOR_QUAD (regno)); + gcc_assert (VALID_NEON_DREG_MODE (mode) + || VALID_NEON_QREG_MODE (mode)); + gcc_assert (MEM_P (mem)); + + addr = XEXP (mem, 0); + + switch (GET_CODE (addr)) + { + case POST_INC: + /* FIXME: We should be using vld1/vst1 here in BE mode? */ + template = "v%smia%%?\t%%0!, %%h1"; + ops[0] = XEXP (addr, 0); + ops[1] = reg; + break; + + case POST_MODIFY: + /* FIXME: Not currently enabled in neon_vector_mem_operand. */ + gcc_unreachable (); + + case PLUS: + { + rtx otherops[2]; + /* We're only using DImode here because it's a convenient size. + FIXME: This will need updating if the memory format of vectors + changes. */ + ops[0] = gen_rtx_REG (DImode, REGNO (reg)); + ops[1] = mem; + + otherops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2); + otherops[1] = adjust_address (ops[1], SImode, 8); + + if (reg_overlap_mentioned_p (ops[0], mem)) + { + sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st"); + output_asm_insn (buff, otherops); + sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st"); + output_asm_insn (buff, ops); + } + else + { + sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st"); + output_asm_insn (buff, ops); + sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st"); + output_asm_insn (buff, otherops); + } + + return ""; + } + + default: + /* FIXME: See POST_INC. */ + template = "v%smia%%?\t%%m0, %%h1"; + ops[0] = mem; + ops[1] = reg; + } + + sprintf (buff, template, load ? "ld" : "st"); + output_asm_insn (buff, ops); + + return ""; +} + /* Output an ADD r, s, #n where n may be too big for one instruction. If adding zero to one register, output nothing. */ const char * @@ -9855,7 +10609,10 @@ arm_get_vfp_saved_size (void) /* Workaround ARM10 VFPr1 bug. */ if (count == 2 && !arm_arch6) count++; - saved += count * 8 + space_for_format_word; + /* Count extra padding for transfers > 16 words, which are + split into multiple parts. */ + saved += count * 8 + + ((count + 15) / 16) * space_for_format_word; } count = 0; } @@ -9866,7 +10623,7 @@ arm_get_vfp_saved_size (void) { if (count == 2 && !arm_arch6) count++; - saved += count * 8 + space_for_format_word; + saved += count * 8 + ((count + 15) / 16) * space_for_format_word; } } return saved; @@ -11591,6 +12348,11 @@ arm_print_operand (FILE *stream, rtx x, int code) } return; + /* An integer without a preceding # sign. */ + case 'n': + fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); + return; + case 'B': if (GET_CODE (x) == CONST_INT) { @@ -11708,6 +12470,26 @@ arm_print_operand (FILE *stream, rtx x, int code) asm_fprintf (stream, "%r", REGNO (x) + 1); return; + case 'J': + if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2)); + return; + + case 'K': + if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3)); + return; + case 'm': asm_fprintf (stream, "%r", GET_CODE (XEXP (x, 0)) == REG @@ -11720,6 +12502,19 @@ arm_print_operand (FILE *stream, rtx x, int code) REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1); return; + /* Like 'M', but writing doubleword vector registers, for use by Neon + insns. */ + case 'h': + { + int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2; + int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2; + if (numregs == 1) + asm_fprintf (stream, "{d%d}", regno); + else + asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1); + } + return; + case 'd': /* CONST_TRUE_RTX means always -- that's the default. */ if (x == const_true_rtx) @@ -11832,13 +12627,15 @@ arm_print_operand (FILE *stream, rtx x, int code) } return; - /* Print a VFP double precision register name. */ + /* Print a VFP/Neon double precision or quad precision register name. */ case 'P': + case 'q': { int mode = GET_MODE (x); - int num; + int is_quad = (code == 'q'); + int regno; - if (mode != DImode && mode != DFmode) + if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8)) { output_operand_lossage ("invalid operand for code '%c'", code); return; @@ -11851,14 +12648,52 @@ arm_print_operand (FILE *stream, rtx x, int code) return; } - num = REGNO(x) - FIRST_VFP_REGNUM; - if (num & 1) + regno = REGNO (x); + if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno)) + || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno))) { output_operand_lossage ("invalid operand for code '%c'", code); return; } - fprintf (stream, "d%d", num >> 1); + fprintf (stream, "%c%d", is_quad ? 'q' : 'd', + (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1)); + } + return; + + /* These two codes print the low/high doubleword register of a Neon quad + register, respectively. */ + case 'e': + case 'f': + { + int mode = GET_MODE (x); + int regno; + + if (GET_MODE_SIZE (mode) != 16 || GET_CODE (x) != REG) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = REGNO (x); + if (!NEON_REGNO_OK_FOR_QUAD (regno)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1) + + (code == 'f' ? 1 : 0)); + } + return; + + /* Print a VFPv3 floating-point constant, represented as an integer + index. */ + case 'G': + { + int index = vfp3_const_double_index (x); + gcc_assert (index != -1); + fprintf (stream, "%d", index); } return; @@ -11881,7 +12716,15 @@ arm_print_operand (FILE *stream, rtx x, int code) break; case CONST_DOUBLE: - fprintf (stream, "#%s", fp_immediate_constant (x)); + if (TARGET_NEON) + { + char fpstr[20]; + real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x), + sizeof (fpstr), 0, 1); + fprintf (stream, "#%s", fpstr); + } + else + fprintf (stream, "#%s", fp_immediate_constant (x)); break; default: @@ -11899,6 +12742,8 @@ arm_print_operand (FILE *stream, rtx x, int code) static bool arm_assemble_integer (rtx x, unsigned int size, int aligned_p) { + enum machine_mode mode; + if (size == UNITS_PER_WORD && aligned_p) { fputs ("\t.word\t", asm_out_file); @@ -11922,31 +12767,48 @@ arm_assemble_integer (rtx x, unsigned int size, int aligned_p) return true; } - if (arm_vector_mode_supported_p (GET_MODE (x))) + mode = GET_MODE (x); + + if (arm_vector_mode_supported_p (mode)) { int i, units; + unsigned int invmask = 0, parts_per_word; gcc_assert (GET_CODE (x) == CONST_VECTOR); units = CONST_VECTOR_NUNITS (x); - - switch (GET_MODE (x)) - { - case V2SImode: size = 4; break; - case V4HImode: size = 2; break; - case V8QImode: size = 1; break; - default: - gcc_unreachable (); - } - - for (i = 0; i < units; i++) - { - rtx elt; - - elt = CONST_VECTOR_ELT (x, i); - assemble_integer - (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1); - } + size = GET_MODE_SIZE (GET_MODE_INNER (mode)); + + /* For big-endian Neon vectors, we must permute the vector to the form + which, when loaded by a VLDR or VLDM instruction, will give a vector + with the elements in the right order. */ + if (TARGET_NEON && WORDS_BIG_ENDIAN) + { + parts_per_word = UNITS_PER_WORD / size; + /* FIXME: This might be wrong for 64-bit vector elements, but we don't + support those anywhere yet. */ + invmask = (parts_per_word == 0) ? 0 : (1 << (parts_per_word - 1)) - 1; + } + + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + for (i = 0; i < units; i++) + { + rtx elt = CONST_VECTOR_ELT (x, i ^ invmask); + assemble_integer + (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1); + } + else + for (i = 0; i < units; i++) + { + rtx elt = CONST_VECTOR_ELT (x, i); + REAL_VALUE_TYPE rval; + + REAL_VALUE_FROM_CONST_DOUBLE (rval, elt); + + assemble_real + (rval, GET_MODE_INNER (mode), + i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT); + } return true; } @@ -12622,11 +13484,16 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode) && IS_VFP_REGNUM (regno)) { if (mode == SFmode || mode == SImode) - return TRUE; + return VFP_REGNO_OK_FOR_SINGLE (regno); - /* DFmode values are only valid in even register pairs. */ if (mode == DFmode) - return ((regno - FIRST_VFP_REGNUM) & 1) == 0; + return VFP_REGNO_OK_FOR_DOUBLE (regno); + + if (TARGET_NEON) + return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno)) + || (VALID_NEON_QREG_MODE (mode) + && NEON_REGNO_OK_FOR_QUAD (regno)); + return FALSE; } @@ -12689,7 +13556,12 @@ arm_regno_class (int regno) return CIRRUS_REGS; if (IS_VFP_REGNUM (regno)) - return VFP_REGS; + { + if (regno >= FIRST_VFP_REGNUM && regno <= LAST_LO_VFP_REGNUM) + return VFP_LO_REGS; + else + return VFP_HI_REGS; + } if (IS_IWMMXT_REGNUM (regno)) return IWMMXT_REGS; @@ -12871,38 +13743,38 @@ static const struct builtin_description bdesc_2arg[] = #define IWMMXT_BUILTIN2(code, builtin) \ { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 }, - IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS) - IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS) - IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS) - IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS) - IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS) - IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS) - IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH) - IWMMXT_BUILTIN2 (ashlv4hi3, WSLLHI) - IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW) - IWMMXT_BUILTIN2 (ashlv2si3, WSLLWI) - IWMMXT_BUILTIN2 (ashldi3_di, WSLLD) - IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI) - IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH) - IWMMXT_BUILTIN2 (lshrv4hi3, WSRLHI) - IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW) - IWMMXT_BUILTIN2 (lshrv2si3, WSRLWI) - IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD) - IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI) - IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH) - IWMMXT_BUILTIN2 (ashrv4hi3, WSRAHI) - IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW) - IWMMXT_BUILTIN2 (ashrv2si3, WSRAWI) - IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD) - IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI) - IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH) - IWMMXT_BUILTIN2 (rorv4hi3, WRORHI) - IWMMXT_BUILTIN2 (rorv2si3_di, WRORW) - IWMMXT_BUILTIN2 (rorv2si3, WRORWI) - IWMMXT_BUILTIN2 (rordi3_di, WRORD) - IWMMXT_BUILTIN2 (rordi3, WRORDI) - IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ) - IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ) + IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS) + IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS) + IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS) + IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS) + IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS) + IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS) + IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH) + IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI) + IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW) + IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI) + IWMMXT_BUILTIN2 (ashldi3_di, WSLLD) + IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI) + IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH) + IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI) + IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW) + IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI) + IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD) + IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI) + IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH) + IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI) + IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW) + IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI) + IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD) + IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI) + IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH) + IWMMXT_BUILTIN2 (rorv4hi3, WRORHI) + IWMMXT_BUILTIN2 (rorv2si3_di, WRORW) + IWMMXT_BUILTIN2 (rorv2si3, WRORWI) + IWMMXT_BUILTIN2 (rordi3_di, WRORD) + IWMMXT_BUILTIN2 (rordi3, WRORDI) + IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ) + IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ) }; static const struct builtin_description bdesc_1arg[] = @@ -15136,6 +16008,7 @@ arm_file_start (void) } else { + int set_float_abi_attributes = 0; switch (arm_fpu_arch) { case FPUTYPE_FPA: @@ -15151,15 +16024,27 @@ arm_file_start (void) fpu_name = "maverick"; break; case FPUTYPE_VFP: - if (TARGET_HARD_FLOAT) - asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n"); - if (TARGET_HARD_FLOAT_ABI) - asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n"); fpu_name = "vfp"; + set_float_abi_attributes = 1; + break; + case FPUTYPE_VFP3: + fpu_name = "vfp3"; + set_float_abi_attributes = 1; + break; + case FPUTYPE_NEON: + fpu_name = "neon"; + set_float_abi_attributes = 1; break; default: abort(); } + if (set_float_abi_attributes) + { + if (TARGET_HARD_FLOAT) + asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n"); + if (TARGET_HARD_FLOAT_ABI) + asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n"); + } } asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name); @@ -16004,6 +16889,11 @@ thumb_set_return_address (rtx source, rtx scratch) bool arm_vector_mode_supported_p (enum machine_mode mode) { + /* Neon also supports V2SImode, etc. listed in the clause below. */ + if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode + || mode == V16QImode || mode == V4SFmode)) + return true; + if ((mode == V2SImode) || (mode == V4HImode) || (mode == V8QImode)) @@ -16037,6 +16927,7 @@ arm_dbx_register_number (unsigned int regno) if (IS_FPA_REGNUM (regno)) return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM; + /* FIXME: VFPv3 register numbering. */ if (IS_VFP_REGNUM (regno)) return 64 + regno - FIRST_VFP_REGNUM; |