diff options
author | Matthew Gretton-Dann <matthew.gretton-dann@linaro.org> | 2013-05-14 09:55:27 +0000 |
---|---|---|
committer | Matthew Gretton-Dann <matthew.gretton-dann@linaro.org> | 2013-05-14 09:55:27 +0000 |
commit | b5ae17f88f50279e4573f2959d42922c0c30b1db (patch) | |
tree | 5569c9b0222f5ff5603ae258582f94936128c806 /gcc/config | |
parent | 9c29cfa31a86897535882b5042585a7eb4bf3e2b (diff) | |
parent | f9bf04177d912f484abfb9c3985d5e3b5c8e0bf0 (diff) |
Merge from branches/gcc-4_8-branch r198615.
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_8-branch@198871 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 34 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 47 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.md | 4 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 480 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 9 | ||||
-rw-r--r-- | gcc/config/arm/iterators.md | 6 | ||||
-rw-r--r-- | gcc/config/arm/neon.md | 150 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 22 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 52 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 5 | ||||
-rw-r--r-- | gcc/config/pa/pa.c | 4 | ||||
-rw-r--r-- | gcc/config/rs6000/vsx.md | 13 | ||||
-rw-r--r-- | gcc/config/s390/s390.c | 3 | ||||
-rw-r--r-- | gcc/config/sh/sh.md | 2 | ||||
-rw-r--r-- | gcc/config/sparc/sparc.c | 27 | ||||
-rw-r--r-- | gcc/config/sparc/sparc.h | 2 |
16 files changed, 499 insertions, 361 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index f85446b988e..37416901d9c 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1794,6 +1794,7 @@ "TARGET_SIMD" { int inverse = 0; + int use_zero_form = 0; int swap_bsl_operands = 0; rtx op1 = operands[1]; rtx op2 = operands[2]; @@ -1806,12 +1807,16 @@ switch (GET_CODE (operands[3])) { case GE: + case GT: case LE: + case LT: case EQ: - if (!REG_P (operands[5]) - && (operands[5] != CONST0_RTX (<MODE>mode))) - operands[5] = force_reg (<MODE>mode, operands[5]); - break; + if (operands[5] == CONST0_RTX (<MODE>mode)) + { + use_zero_form = 1; + break; + } + /* Fall through. */ default: if (!REG_P (operands[5])) operands[5] = force_reg (<MODE>mode, operands[5]); @@ -1862,7 +1867,26 @@ a GT b -> a GT b a LE b -> b GE a a LT b -> b GT a - a EQ b -> a EQ b */ + a EQ b -> a EQ b + Note that there also exist direct comparison against 0 forms, + so catch those as a special case. */ + if (use_zero_form) + { + inverse = 0; + switch (GET_CODE (operands[3])) + { + case LT: + base_comparison = gen_aarch64_cmlt<mode>; + break; + case LE: + base_comparison = gen_aarch64_cmle<mode>; + break; + default: + /* Do nothing, other zero form cases already have the correct + base_comparison. */ + break; + } + } if (!inverse) emit_insn (base_comparison (mask, operands[4], operands[5])); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 1615a16b53a..9164911673c 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -3366,6 +3366,16 @@ aarch64_print_operand (FILE *f, rtx x, char code) asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S')); break; + case 'X': + /* Print integer constant in hex. */ + if (GET_CODE (x) != CONST_INT) + { + output_operand_lossage ("invalid operand for '%%%c'", code); + return; + } + asm_fprintf (f, "0x%x", UINTVAL (x)); + break; + case 'w': case 'x': /* Print a general register name or the zero register (32-bit or @@ -3870,14 +3880,21 @@ aarch64_can_eliminate (const int from, const int to) } else { - /* If we decided that we didn't need a frame pointer but then used - LR in the function, then we do need a frame pointer after all, so - prevent this elimination to ensure a frame pointer is used. */ + /* If we decided that we didn't need a leaf frame pointer but then used + LR in the function, then we'll want a frame pointer after all, so + prevent this elimination to ensure a frame pointer is used. + NOTE: the original value of flag_omit_frame_pointer gets trashed + IFF flag_omit_leaf_frame_pointer is true, so we check the value + of faked_omit_frame_pointer here (which is true when we always + wish to keep non-leaf frame pointers but only wish to keep leaf frame + pointers when LR is clobbered). */ if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM - && df_regs_ever_live_p (LR_REGNUM)) + && df_regs_ever_live_p (LR_REGNUM) + && faked_omit_frame_pointer) return false; } + return true; } @@ -7064,12 +7081,30 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, } static void +aarch64_print_extension (void) +{ + const struct aarch64_option_extension *opt = NULL; + + for (opt = all_extensions; opt->name != NULL; opt++) + if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on) + asm_fprintf (asm_out_file, "+%s", opt->name); + + asm_fprintf (asm_out_file, "\n"); +} + +static void aarch64_start_file (void) { if (selected_arch) - asm_fprintf (asm_out_file, "\t.arch %s\n", selected_arch->name); + { + asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name); + aarch64_print_extension (); + } else if (selected_cpu) - asm_fprintf (asm_out_file, "\t.cpu %s\n", selected_cpu->name); + { + asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name); + aarch64_print_extension (); + } default_file_start(); } diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index b4e03a7d812..222dc69cb2f 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -860,8 +860,8 @@ (match_operand:GPI 2 "const_int_operand" "n"))] "INTVAL (operands[1]) < GET_MODE_BITSIZE (<MODE>mode) && INTVAL (operands[1]) % 16 == 0 - && INTVAL (operands[2]) <= 0xffff" - "movk\\t%<w>0, %2, lsl %1" + && UINTVAL (operands[2]) <= 0xffff" + "movk\\t%<w>0, %X2, lsl %1" [(set_attr "v8type" "movk") (set_attr "mode" "<MODE>")] ) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 29e4d64e052..5f1ab115c52 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -7191,10 +7191,10 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vld1_dup_f32 (const float32_t * a) { float32x2_t result; - __asm__ ("ld1r {%0.2s},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.2s}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7202,10 +7202,10 @@ __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) vld1_dup_f64 (const float64_t * a) { float64x1_t result; - __asm__ ("ld1 {%0.1d},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.1d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7213,10 +7213,10 @@ __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vld1_dup_p8 (const poly8_t * a) { poly8x8_t result; - __asm__ ("ld1r {%0.8b},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.8b}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7224,10 +7224,10 @@ __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vld1_dup_p16 (const poly16_t * a) { poly16x4_t result; - __asm__ ("ld1r {%0.4h},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.4h}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7235,10 +7235,10 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vld1_dup_s8 (const int8_t * a) { int8x8_t result; - __asm__ ("ld1r {%0.8b},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.8b}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7246,10 +7246,10 @@ __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vld1_dup_s16 (const int16_t * a) { int16x4_t result; - __asm__ ("ld1r {%0.4h},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.4h}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7257,10 +7257,10 @@ __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vld1_dup_s32 (const int32_t * a) { int32x2_t result; - __asm__ ("ld1r {%0.2s},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.2s}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7268,10 +7268,10 @@ __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vld1_dup_s64 (const int64_t * a) { int64x1_t result; - __asm__ ("ld1 {%0.1d},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.1d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7279,10 +7279,10 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vld1_dup_u8 (const uint8_t * a) { uint8x8_t result; - __asm__ ("ld1r {%0.8b},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.8b}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7290,10 +7290,10 @@ __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vld1_dup_u16 (const uint16_t * a) { uint16x4_t result; - __asm__ ("ld1r {%0.4h},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.4h}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7301,10 +7301,10 @@ __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vld1_dup_u32 (const uint32_t * a) { uint32x2_t result; - __asm__ ("ld1r {%0.2s},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.2s}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7312,10 +7312,10 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vld1_dup_u64 (const uint64_t * a) { uint64x1_t result; - __asm__ ("ld1 {%0.1d},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.1d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7323,10 +7323,10 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vld1_f32 (const float32_t * a) { float32x2_t result; - __asm__ ("ld1 {%0.2s},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.2s}, %1" + : "=w"(result) + : "Utv"(({const float32x2_t *_a = (float32x2_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -7334,10 +7334,10 @@ __extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) vld1_f64 (const float64_t * a) { float64x1_t result; - __asm__ ("ld1 {%0.1d},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.1d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7347,9 +7347,9 @@ vld1_f64 (const float64_t * a) float32x2_t b_ = (b); \ const float32_t * a_ = (a); \ float32x2_t result; \ - __asm__ ("ld1 {%0.s}[%3],[%1]" \ + __asm__ ("ld1 {%0.s}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7360,9 +7360,9 @@ vld1_f64 (const float64_t * a) float64x1_t b_ = (b); \ const float64_t * a_ = (a); \ float64x1_t result; \ - __asm__ ("ld1 {%0.d}[%3],[%1]" \ + __asm__ ("ld1 {%0.d}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7373,9 +7373,9 @@ vld1_f64 (const float64_t * a) poly8x8_t b_ = (b); \ const poly8_t * a_ = (a); \ poly8x8_t result; \ - __asm__ ("ld1 {%0.b}[%3],[%1]" \ + __asm__ ("ld1 {%0.b}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7386,9 +7386,9 @@ vld1_f64 (const float64_t * a) poly16x4_t b_ = (b); \ const poly16_t * a_ = (a); \ poly16x4_t result; \ - __asm__ ("ld1 {%0.h}[%3],[%1]" \ + __asm__ ("ld1 {%0.h}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7399,9 +7399,9 @@ vld1_f64 (const float64_t * a) int8x8_t b_ = (b); \ const int8_t * a_ = (a); \ int8x8_t result; \ - __asm__ ("ld1 {%0.b}[%3],[%1]" \ + __asm__ ("ld1 {%0.b}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7412,9 +7412,9 @@ vld1_f64 (const float64_t * a) int16x4_t b_ = (b); \ const int16_t * a_ = (a); \ int16x4_t result; \ - __asm__ ("ld1 {%0.h}[%3],[%1]" \ + __asm__ ("ld1 {%0.h}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7425,9 +7425,9 @@ vld1_f64 (const float64_t * a) int32x2_t b_ = (b); \ const int32_t * a_ = (a); \ int32x2_t result; \ - __asm__ ("ld1 {%0.s}[%3],[%1]" \ + __asm__ ("ld1 {%0.s}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7438,9 +7438,9 @@ vld1_f64 (const float64_t * a) int64x1_t b_ = (b); \ const int64_t * a_ = (a); \ int64x1_t result; \ - __asm__ ("ld1 {%0.d}[%3],[%1]" \ + __asm__ ("ld1 {%0.d}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7451,9 +7451,9 @@ vld1_f64 (const float64_t * a) uint8x8_t b_ = (b); \ const uint8_t * a_ = (a); \ uint8x8_t result; \ - __asm__ ("ld1 {%0.b}[%3],[%1]" \ + __asm__ ("ld1 {%0.b}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7464,9 +7464,9 @@ vld1_f64 (const float64_t * a) uint16x4_t b_ = (b); \ const uint16_t * a_ = (a); \ uint16x4_t result; \ - __asm__ ("ld1 {%0.h}[%3],[%1]" \ + __asm__ ("ld1 {%0.h}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7477,9 +7477,9 @@ vld1_f64 (const float64_t * a) uint32x2_t b_ = (b); \ const uint32_t * a_ = (a); \ uint32x2_t result; \ - __asm__ ("ld1 {%0.s}[%3],[%1]" \ + __asm__ ("ld1 {%0.s}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7490,9 +7490,9 @@ vld1_f64 (const float64_t * a) uint64x1_t b_ = (b); \ const uint64_t * a_ = (a); \ uint64x1_t result; \ - __asm__ ("ld1 {%0.d}[%3],[%1]" \ + __asm__ ("ld1 {%0.d}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7501,10 +7501,10 @@ __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vld1_p8 (const poly8_t * a) { poly8x8_t result; - __asm__ ("ld1 {%0.8b}, [%1]" - : "=w"(result) - : "r"(a) - : /* No clobbers */); + __asm__ ("ld1 {%0.8b}, %1" + : "=w"(result) + : "Utv"(({const poly8x8_t *_a = (poly8x8_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -7512,10 +7512,10 @@ __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vld1_p16 (const poly16_t * a) { poly16x4_t result; - __asm__ ("ld1 {%0.4h}, [%1]" - : "=w"(result) - : "r"(a) - : /* No clobbers */); + __asm__ ("ld1 {%0.4h}, %1" + : "=w"(result) + : "Utv"(({const poly16x4_t *_a = (poly16x4_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -7523,10 +7523,10 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vld1_s8 (const int8_t * a) { int8x8_t result; - __asm__ ("ld1 {%0.8b},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.8b}, %1" + : "=w"(result) + : "Utv"(({const int8x8_t *_a = (int8x8_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -7534,10 +7534,10 @@ __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vld1_s16 (const int16_t * a) { int16x4_t result; - __asm__ ("ld1 {%0.4h},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.4h}, %1" + : "=w"(result) + : "Utv"(({const int16x4_t *_a = (int16x4_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -7545,10 +7545,10 @@ __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vld1_s32 (const int32_t * a) { int32x2_t result; - __asm__ ("ld1 {%0.2s},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.2s}, %1" + : "=w"(result) + : "Utv"(({const int32x2_t *_a = (int32x2_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -7556,10 +7556,10 @@ __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vld1_s64 (const int64_t * a) { int64x1_t result; - __asm__ ("ld1 {%0.1d},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.1d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7567,10 +7567,10 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vld1_u8 (const uint8_t * a) { uint8x8_t result; - __asm__ ("ld1 {%0.8b},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.8b}, %1" + : "=w"(result) + : "Utv"(({const uint8x8_t *_a = (uint8x8_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -7578,10 +7578,10 @@ __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vld1_u16 (const uint16_t * a) { uint16x4_t result; - __asm__ ("ld1 {%0.4h},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.4h}, %1" + : "=w"(result) + : "Utv"(({const uint16x4_t *_a = (uint16x4_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -7589,10 +7589,10 @@ __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vld1_u32 (const uint32_t * a) { uint32x2_t result; - __asm__ ("ld1 {%0.2s},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.2s}, %1" + : "=w"(result) + : "Utv"(({const uint32x2_t *_a = (uint32x2_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -7600,10 +7600,10 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vld1_u64 (const uint64_t * a) { uint64x1_t result; - __asm__ ("ld1 {%0.1d},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.1d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7611,10 +7611,10 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vld1q_dup_f32 (const float32_t * a) { float32x4_t result; - __asm__ ("ld1r {%0.4s},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.4s}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7622,10 +7622,10 @@ __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) vld1q_dup_f64 (const float64_t * a) { float64x2_t result; - __asm__ ("ld1r {%0.2d},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.2d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7633,10 +7633,10 @@ __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vld1q_dup_p8 (const poly8_t * a) { poly8x16_t result; - __asm__ ("ld1r {%0.16b},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.16b}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7644,10 +7644,10 @@ __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) vld1q_dup_p16 (const poly16_t * a) { poly16x8_t result; - __asm__ ("ld1r {%0.8h},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.8h}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7655,10 +7655,10 @@ __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vld1q_dup_s8 (const int8_t * a) { int8x16_t result; - __asm__ ("ld1r {%0.16b},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.16b}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7666,10 +7666,10 @@ __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vld1q_dup_s16 (const int16_t * a) { int16x8_t result; - __asm__ ("ld1r {%0.8h},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.8h}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7677,10 +7677,10 @@ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vld1q_dup_s32 (const int32_t * a) { int32x4_t result; - __asm__ ("ld1r {%0.4s},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.4s}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7688,10 +7688,10 @@ __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vld1q_dup_s64 (const int64_t * a) { int64x2_t result; - __asm__ ("ld1r {%0.2d},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.2d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7699,10 +7699,10 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vld1q_dup_u8 (const uint8_t * a) { uint8x16_t result; - __asm__ ("ld1r {%0.16b},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.16b}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7710,10 +7710,10 @@ __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vld1q_dup_u16 (const uint16_t * a) { uint16x8_t result; - __asm__ ("ld1r {%0.8h},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.8h}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7721,10 +7721,10 @@ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vld1q_dup_u32 (const uint32_t * a) { uint32x4_t result; - __asm__ ("ld1r {%0.4s},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.4s}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7732,10 +7732,10 @@ __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vld1q_dup_u64 (const uint64_t * a) { uint64x2_t result; - __asm__ ("ld1r {%0.2d},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1r {%0.2d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); return result; } @@ -7743,10 +7743,10 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vld1q_f32 (const float32_t * a) { float32x4_t result; - __asm__ ("ld1 {%0.4s},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.4s}, %1" + : "=w"(result) + : "Utv"(({const float32x4_t *_a = (float32x4_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -7754,10 +7754,10 @@ __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) vld1q_f64 (const float64_t * a) { float64x2_t result; - __asm__ ("ld1 {%0.2d},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.2d}, %1" + : "=w"(result) + : "Utv"(({const float64x2_t *_a = (float64x2_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -7767,9 +7767,9 @@ vld1q_f64 (const float64_t * a) float32x4_t b_ = (b); \ const float32_t * a_ = (a); \ float32x4_t result; \ - __asm__ ("ld1 {%0.s}[%3],[%1]" \ + __asm__ ("ld1 {%0.s}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7780,9 +7780,9 @@ vld1q_f64 (const float64_t * a) float64x2_t b_ = (b); \ const float64_t * a_ = (a); \ float64x2_t result; \ - __asm__ ("ld1 {%0.d}[%3],[%1]" \ + __asm__ ("ld1 {%0.d}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7793,9 +7793,9 @@ vld1q_f64 (const float64_t * a) poly8x16_t b_ = (b); \ const poly8_t * a_ = (a); \ poly8x16_t result; \ - __asm__ ("ld1 {%0.b}[%3],[%1]" \ + __asm__ ("ld1 {%0.b}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7806,9 +7806,9 @@ vld1q_f64 (const float64_t * a) poly16x8_t b_ = (b); \ const poly16_t * a_ = (a); \ poly16x8_t result; \ - __asm__ ("ld1 {%0.h}[%3],[%1]" \ + __asm__ ("ld1 {%0.h}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7819,9 +7819,9 @@ vld1q_f64 (const float64_t * a) int8x16_t b_ = (b); \ const int8_t * a_ = (a); \ int8x16_t result; \ - __asm__ ("ld1 {%0.b}[%3],[%1]" \ + __asm__ ("ld1 {%0.b}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7832,9 +7832,9 @@ vld1q_f64 (const float64_t * a) int16x8_t b_ = (b); \ const int16_t * a_ = (a); \ int16x8_t result; \ - __asm__ ("ld1 {%0.h}[%3],[%1]" \ + __asm__ ("ld1 {%0.h}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7845,9 +7845,9 @@ vld1q_f64 (const float64_t * a) int32x4_t b_ = (b); \ const int32_t * a_ = (a); \ int32x4_t result; \ - __asm__ ("ld1 {%0.s}[%3],[%1]" \ + __asm__ ("ld1 {%0.s}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7858,9 +7858,9 @@ vld1q_f64 (const float64_t * a) int64x2_t b_ = (b); \ const int64_t * a_ = (a); \ int64x2_t result; \ - __asm__ ("ld1 {%0.d}[%3],[%1]" \ + __asm__ ("ld1 {%0.d}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7871,9 +7871,9 @@ vld1q_f64 (const float64_t * a) uint8x16_t b_ = (b); \ const uint8_t * a_ = (a); \ uint8x16_t result; \ - __asm__ ("ld1 {%0.b}[%3],[%1]" \ + __asm__ ("ld1 {%0.b}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7884,9 +7884,9 @@ vld1q_f64 (const float64_t * a) uint16x8_t b_ = (b); \ const uint16_t * a_ = (a); \ uint16x8_t result; \ - __asm__ ("ld1 {%0.h}[%3],[%1]" \ + __asm__ ("ld1 {%0.h}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7897,9 +7897,9 @@ vld1q_f64 (const float64_t * a) uint32x4_t b_ = (b); \ const uint32_t * a_ = (a); \ uint32x4_t result; \ - __asm__ ("ld1 {%0.s}[%3],[%1]" \ + __asm__ ("ld1 {%0.s}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7910,9 +7910,9 @@ vld1q_f64 (const float64_t * a) uint64x2_t b_ = (b); \ const uint64_t * a_ = (a); \ uint64x2_t result; \ - __asm__ ("ld1 {%0.d}[%3],[%1]" \ + __asm__ ("ld1 {%0.d}[%1], %2" \ : "=w"(result) \ - : "r"(a_), "0"(b_), "i"(c) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ : /* No clobbers */); \ result; \ }) @@ -7921,10 +7921,10 @@ __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vld1q_p8 (const poly8_t * a) { poly8x16_t result; - __asm__ ("ld1 {%0.16b},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.16b}, %1" + : "=w"(result) + : "Utv"(({const poly8x16_t *_a = (poly8x16_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -7932,10 +7932,10 @@ __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) vld1q_p16 (const poly16_t * a) { poly16x8_t result; - __asm__ ("ld1 {%0.8h},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.16b}, %1" + : "=w"(result) + : "Utv"(({const poly16x8_t *_a = (poly16x8_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -7943,10 +7943,10 @@ __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vld1q_s8 (const int8_t * a) { int8x16_t result; - __asm__ ("ld1 {%0.16b},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.16b}, %1" + : "=w"(result) + : "Utv"(({const int8x16_t *_a = (int8x16_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -7954,10 +7954,10 @@ __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vld1q_s16 (const int16_t * a) { int16x8_t result; - __asm__ ("ld1 {%0.8h},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.8h}, %1" + : "=w"(result) + : "Utv"(({const int16x8_t *_a = (int16x8_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -7965,10 +7965,10 @@ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vld1q_s32 (const int32_t * a) { int32x4_t result; - __asm__ ("ld1 {%0.4s},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.4s}, %1" + : "=w"(result) + : "Utv"(({const int32x4_t *_a = (int32x4_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -7976,10 +7976,10 @@ __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vld1q_s64 (const int64_t * a) { int64x2_t result; - __asm__ ("ld1 {%0.2d},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.2d}, %1" + : "=w"(result) + : "Utv"(({const int64x2_t *_a = (int64x2_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -7987,10 +7987,10 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vld1q_u8 (const uint8_t * a) { uint8x16_t result; - __asm__ ("ld1 {%0.16b},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.16b}, %1" + : "=w"(result) + : "Utv"(({const uint8x16_t *_a = (uint8x16_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -7998,10 +7998,10 @@ __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vld1q_u16 (const uint16_t * a) { uint16x8_t result; - __asm__ ("ld1 {%0.8h},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.8h}, %1" + : "=w"(result) + : "Utv"(({const uint16x8_t *_a = (uint16x8_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -8009,10 +8009,10 @@ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vld1q_u32 (const uint32_t * a) { uint32x4_t result; - __asm__ ("ld1 {%0.4s},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.4s}, %1" + : "=w"(result) + : "Utv"(({const uint32x4_t *_a = (uint32x4_t *) a; *_a;})) + : /* No clobbers */); return result; } @@ -8020,10 +8020,10 @@ __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vld1q_u64 (const uint64_t * a) { uint64x2_t result; - __asm__ ("ld1 {%0.2d},[%1]" - : "=w"(result) - : "r"(a) - : "memory"); + __asm__ ("ld1 {%0.2d}, %1" + : "=w"(result) + : "Utv"(({const uint64x2_t *_a = (uint64x2_t *) a; *_a;})) + : /* No clobbers */); return result; } diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 0bb1650dc74..2399423ce9a 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -10744,6 +10744,13 @@ load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order, || (i != nops - 1 && unsorted_regs[i] == base_reg)) return 0; + /* Don't allow SP to be loaded unless it is also the base + register. It guarantees that SP is reset correctly when + an LDM instruction is interrupted. Otherwise, we might + end up with a corrupt stack. */ + if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM) + return 0; + unsorted_offsets[i] = INTVAL (offset); if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]]) order[0] = i; @@ -18567,7 +18574,7 @@ arm_print_operand (FILE *stream, rtx x, int code) "wC12", "wC13", "wC14", "wC15" }; - fprintf (stream, wc_reg_names [INTVAL (x)]); + fputs (wc_reg_names [INTVAL (x)], stream); } return; diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 252f18b40a8..b3ad42b376f 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -314,6 +314,12 @@ (V2SF "V2SI") (V4SF "V4SI") (DI "DI") (V2DI "V2DI")]) +(define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi") + (V4HI "v4hi") (V8HI "v8hi") + (V2SI "v2si") (V4SI "v4si") + (DI "di") (V2DI "v2di") + (V2SF "v2si") (V4SF "v4si")]) + ;; Get element type from double-width mode, for operations where we ;; don't care about signedness. (define_mode_attr V_if_elem [(V8QI "i8") (V16QI "i8") diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 2172a13c683..f91a6f7d08b 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -1698,80 +1698,144 @@ (define_expand "vcond<mode><mode>" [(set (match_operand:VDQW 0 "s_register_operand" "") (if_then_else:VDQW - (match_operator 3 "arm_comparison_operator" + (match_operator 3 "comparison_operator" [(match_operand:VDQW 4 "s_register_operand" "") (match_operand:VDQW 5 "nonmemory_operand" "")]) (match_operand:VDQW 1 "s_register_operand" "") (match_operand:VDQW 2 "s_register_operand" "")))] "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" { - rtx mask; - int inverse = 0, immediate_zero = 0; - /* See the description of "magic" bits in the 'T' case of - arm_print_operand. */ HOST_WIDE_INT magic_word = (<MODE>mode == V2SFmode || <MODE>mode == V4SFmode) ? 3 : 1; rtx magic_rtx = GEN_INT (magic_word); - - mask = gen_reg_rtx (<V_cmp_result>mode); - - if (operands[5] == CONST0_RTX (<MODE>mode)) - immediate_zero = 1; - else if (!REG_P (operands[5])) - operands[5] = force_reg (<MODE>mode, operands[5]); - + int inverse = 0; + int swap_bsl_operands = 0; + rtx mask = gen_reg_rtx (<V_cmp_result>mode); + rtx tmp = gen_reg_rtx (<V_cmp_result>mode); + + rtx (*base_comparison) (rtx, rtx, rtx, rtx); + rtx (*complimentary_comparison) (rtx, rtx, rtx, rtx); + switch (GET_CODE (operands[3])) { case GE: - emit_insn (gen_neon_vcge<mode> (mask, operands[4], operands[5], - magic_rtx)); + case LE: + case EQ: + if (!REG_P (operands[5]) + && (operands[5] != CONST0_RTX (<MODE>mode))) + operands[5] = force_reg (<MODE>mode, operands[5]); break; - + default: + if (!REG_P (operands[5])) + operands[5] = force_reg (<MODE>mode, operands[5]); + } + + switch (GET_CODE (operands[3])) + { + case LT: + case UNLT: + inverse = 1; + /* Fall through. */ + case GE: + case UNGE: + case ORDERED: + case UNORDERED: + base_comparison = gen_neon_vcge<mode>; + complimentary_comparison = gen_neon_vcgt<mode>; + break; + case LE: + case UNLE: + inverse = 1; + /* Fall through. */ case GT: - emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5], - magic_rtx)); + case UNGT: + base_comparison = gen_neon_vcgt<mode>; + complimentary_comparison = gen_neon_vcge<mode>; break; - case EQ: - emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5], - magic_rtx)); + case NE: + case UNEQ: + base_comparison = gen_neon_vceq<mode>; + complimentary_comparison = gen_neon_vceq<mode>; break; - + default: + gcc_unreachable (); + } + + switch (GET_CODE (operands[3])) + { + case LT: case LE: - if (immediate_zero) - emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5], - magic_rtx)); + case GT: + case GE: + case EQ: + /* The easy case. Here we emit one of vcge, vcgt or vceq. + As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: + a GE b -> a GE b + a GT b -> a GT b + a LE b -> b GE a + a LT b -> b GT a + a EQ b -> a EQ b */ + + if (!inverse) + emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx)); else - emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4], - magic_rtx)); + emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx)); break; - - case LT: - if (immediate_zero) - emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5], - magic_rtx)); + case UNLT: + case UNLE: + case UNGT: + case UNGE: + case NE: + /* Vector compare returns false for lanes which are unordered, so if we use + the inverse of the comparison we actually want to emit, then + swap the operands to BSL, we will end up with the correct result. + Note that a NE NaN and NaN NE b are true for all a, b. + + Our transformations are: + a GE b -> !(b GT a) + a GT b -> !(b GE a) + a LE b -> !(a GT b) + a LT b -> !(a GE b) + a NE b -> !(a EQ b) */ + + if (inverse) + emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx)); else - emit_insn (gen_neon_vcgt<mode> (mask, operands[5], operands[4], - magic_rtx)); + emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx)); + + swap_bsl_operands = 1; break; - - case NE: - emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5], - magic_rtx)); - inverse = 1; + case UNEQ: + /* We check (a > b || b > a). combining these comparisons give us + true iff !(a != b && a ORDERED b), swapping the operands to BSL + will then give us (a == b || a UNORDERED b) as intended. */ + + emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5], magic_rtx)); + emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4], magic_rtx)); + emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp)); + swap_bsl_operands = 1; + break; + case UNORDERED: + /* Operands are ORDERED iff (a > b || b >= a). + Swapping the operands to BSL will give the UNORDERED case. */ + swap_bsl_operands = 1; + /* Fall through. */ + case ORDERED: + emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5], magic_rtx)); + emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4], magic_rtx)); + emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp)); break; - default: gcc_unreachable (); } - - if (inverse) + + if (swap_bsl_operands) emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2], operands[1])); else emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1], operands[2])); - DONE; }) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index b835c5da2ab..abf2bbad30f 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -20434,7 +20434,7 @@ ix86_expand_vec_perm (rtx operands[]) vec[i * 2 + 1] = const1_rtx; } vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); - vt = force_const_mem (maskmode, vt); + vt = validize_mem (force_const_mem (maskmode, vt)); t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1, OPTAB_DIRECT); @@ -20631,7 +20631,7 @@ ix86_expand_vec_perm (rtx operands[]) for (i = 0; i < 16; ++i) vec[i] = GEN_INT (i/e * e); vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec)); - vt = force_const_mem (V16QImode, vt); + vt = validize_mem (force_const_mem (V16QImode, vt)); if (TARGET_XOP) emit_insn (gen_xop_pperm (mask, mask, mask, vt)); else @@ -20642,7 +20642,7 @@ ix86_expand_vec_perm (rtx operands[]) for (i = 0; i < 16; ++i) vec[i] = GEN_INT (i % e); vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec)); - vt = force_const_mem (V16QImode, vt); + vt = validize_mem (force_const_mem (V16QImode, vt)); emit_insn (gen_addv16qi3 (mask, mask, vt)); } @@ -33862,6 +33862,11 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) return true; if (!TARGET_PARTIAL_REG_STALL) return true; + /* LRA checks if the hard register is OK for the given mode. + QImode values can live in non-QI regs, so we allow all + registers here. */ + if (lra_in_progress) + return true; return !can_create_pseudo_p (); } /* We handle both integer and floats in the general purpose registers. */ @@ -34170,6 +34175,13 @@ ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total, { if (CONST_INT_P (XEXP (x, 1))) *total = cost->shift_const; + else if (GET_CODE (XEXP (x, 1)) == SUBREG + && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND) + { + /* Return the cost after shift-and truncation. */ + *total = cost->shift_var; + return true; + } else *total = cost->shift_var; } @@ -40716,7 +40728,9 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2, the even slots. For some cpus this is faster than a PSHUFD. */ if (odd_p) { - if (TARGET_XOP && mode == V4SImode) + /* For XOP use vpmacsdqh, but only for smult, as it is only + signed. */ + if (TARGET_XOP && mode == V4SImode && !uns_p) { x = force_reg (wmode, CONST0_RTX (wmode)); emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x)); diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 24d1012d5ce..63d65761b92 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -3281,7 +3281,7 @@ (cond [(eq_attr "alternative" "0,1,2") (const_string "fmov") (eq_attr "alternative" "3,4") - (const_string "multi") + (const_string "imov") (eq_attr "alternative" "5") (const_string "sselog1") (eq_attr "alternative" "9,10,11,14,15") @@ -3432,10 +3432,10 @@ (define_insn "*zero_extendsidi2_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" - "=r ,o,?*Ym,?*y,?*Yi,?*x") + "=r ,o,?*Ym,?!*y,?*Yi,?*x") (zero_extend:DI (match_operand:SI 1 "x86_64_zext_general_operand" - "rmWz,0,r ,m ,r ,m")))] + "rmWz,0,r ,m ,r ,m")))] "TARGET_64BIT" { switch (get_attr_type (insn)) @@ -3466,9 +3466,9 @@ (define_insn "*zero_extendsidi2" [(set (match_operand:DI 0 "nonimmediate_operand" - "=ro,?r,?o,?*Ym,?*y,?*Yi,?*x") + "=ro,?r,?o,?*Ym,?!*y,?*Yi,?*x") (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" - "0 ,rm,r ,r ,m ,r ,m")))] + "0 ,rm,r ,r ,m ,r ,m")))] "!TARGET_64BIT" "@ # @@ -9136,28 +9136,20 @@ }) ;; Avoid useless masking of count operand. -(define_insn_and_split "*ashl<mode>3_mask" +(define_insn "*ashl<mode>3_mask" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0") (subreg:QI (and:SI - (match_operand:SI 2 "nonimmediate_operand" "c") + (match_operand:SI 2 "register_operand" "c") (match_operand:SI 3 "const_int_operand" "n")) 0))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands) && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) == GET_MODE_BITSIZE (<MODE>mode)-1" - "#" - "&& 1" - [(parallel [(set (match_dup 0) - (ashift:SWI48 (match_dup 1) (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] { - if (can_create_pseudo_p ()) - operands [2] = force_reg (SImode, operands[2]); - - operands[2] = simplify_gen_subreg (QImode, operands[2], SImode, 0); + return "sal{<imodesuffix>}\t{%b2, %0|%0, %b2}"; } [(set_attr "type" "ishift") (set_attr "mode" "<MODE>")]) @@ -9646,28 +9638,20 @@ "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;") ;; Avoid useless masking of count operand. -(define_insn_and_split "*<shift_insn><mode>3_mask" +(define_insn "*<shift_insn><mode>3_mask" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0") (subreg:QI (and:SI - (match_operand:SI 2 "nonimmediate_operand" "c") + (match_operand:SI 2 "register_operand" "c") (match_operand:SI 3 "const_int_operand" "n")) 0))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) == GET_MODE_BITSIZE (<MODE>mode)-1" - "#" - "&& 1" - [(parallel [(set (match_dup 0) - (any_shiftrt:SWI48 (match_dup 1) (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] { - if (can_create_pseudo_p ()) - operands [2] = force_reg (SImode, operands[2]); - - operands[2] = simplify_gen_subreg (QImode, operands[2], SImode, 0); + return "<shift>{<imodesuffix>}\t{%b2, %0|%0, %b2}"; } [(set_attr "type" "ishift") (set_attr "mode" "<MODE>")]) @@ -10109,28 +10093,20 @@ "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;") ;; Avoid useless masking of count operand. -(define_insn_and_split "*<rotate_insn><mode>3_mask" +(define_insn "*<rotate_insn><mode>3_mask" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") (any_rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0") (subreg:QI (and:SI - (match_operand:SI 2 "nonimmediate_operand" "c") + (match_operand:SI 2 "register_operand" "c") (match_operand:SI 3 "const_int_operand" "n")) 0))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) == GET_MODE_BITSIZE (<MODE>mode)-1" - "#" - "&& 1" - [(parallel [(set (match_dup 0) - (any_rotate:SWI48 (match_dup 1) (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] { - if (can_create_pseudo_p ()) - operands [2] = force_reg (SImode, operands[2]); - - operands[2] = simplify_gen_subreg (QImode, operands[2], SImode, 0); + return "<rotate>{<imodesuffix>}\t{%b2, %0|%0, %b2}"; } [(set_attr "type" "rotate") (set_attr "mode" "<MODE>")]) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index e2bb68ba18f..585dc71038c 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -5638,7 +5638,7 @@ if (TARGET_SSE4_1) { if (CONSTANT_P (operands[2])) - operands[2] = force_const_mem (<MODE>mode, operands[2]); + operands[2] = validize_mem (force_const_mem (<MODE>mode, operands[2])); ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands); } else @@ -9942,7 +9942,8 @@ (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] "TARGET_XOP" { - operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2])); + operands[3] + = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2])); return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\"; } [(set_attr "type" "sseishft") diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c index 0d39483d908..dc6f2e42bcf 100644 --- a/gcc/config/pa/pa.c +++ b/gcc/config/pa/pa.c @@ -792,7 +792,9 @@ legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) /* Extract CODE_LABEL. */ orig = XEXP (orig, 0); add_reg_note (insn, REG_LABEL_OPERAND, orig); - LABEL_NUSES (orig)++; + /* Make sure we have label and not a note. */ + if (LABEL_P (orig)) + LABEL_NUSES (orig)++; } crtl->uses_pic_offset_table = 1; return reg; diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index dcb1f64d4ee..3fafd9b278c 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -208,8 +208,8 @@ ;; VSX moves (define_insn "*vsx_mov<mode>" - [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*Y,*r,*r,<VSr>,?wa,v,wZ,v") - (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,Y,r,j,j,W,v,wZ"))] + [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*Y,*r,*r,<VSr>,?wa,*r,v,wZ,v") + (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,Y,r,j,j,j,W,v,wZ"))] "VECTOR_MEM_VSX_P (<MODE>mode) && (register_operand (operands[0], <MODE>mode) || register_operand (operands[1], <MODE>mode))" @@ -239,23 +239,24 @@ case 6: case 7: case 8: + case 11: return "#"; case 9: case 10: return "xxlxor %x0,%x0,%x0"; - case 11: + case 12: return output_vec_const_move (operands); - case 12: + case 13: gcc_assert (MEM_P (operands[0]) && GET_CODE (XEXP (operands[0], 0)) != PRE_INC && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY); return "stvx %1,%y0"; - case 13: + case 14: gcc_assert (MEM_P (operands[0]) && GET_CODE (XEXP (operands[0], 0)) != PRE_INC && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC @@ -266,7 +267,7 @@ gcc_unreachable (); } } - [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,vecstore,vecload")]) + [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,*,vecstore,vecload")]) ;; Unlike other VSX moves, allow the GPRs, since a normal use of TImode is for ;; unions. However for plain data movement, slightly favor the vector loads diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 7e87dcd547d..ac33371937e 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -4647,6 +4647,9 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src) int smode_bsize, mode_bsize; rtx op, clobber; + if (bitsize + bitpos > GET_MODE_SIZE (mode)) + return false; + /* Generate INSERT IMMEDIATE (IILL et al). */ /* (set (ze (reg)) (const_int)). */ if (TARGET_ZARCH diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index 42ef5e142d8..8fb3f8a8b0d 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -689,7 +689,7 @@ ;; Extract contiguous bits and compare them against zero. (define_insn "tstsi_t_zero_extract_eq" [(set (reg:SI T_REG) - (eq:SI (zero_extract:SI (match_operand 0 "logical_operand" "z") + (eq:SI (zero_extract:SI (match_operand:SI 0 "logical_operand" "z") (match_operand:SI 1 "const_int_operand") (match_operand:SI 2 "const_int_operand")) (const_int 0)))] diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index 08c2894fbbc..fb01ae92a2a 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -4321,13 +4321,14 @@ mem_min_alignment (rtx mem, int desired) mapped into one sparc_mode_class mode. */ enum sparc_mode_class { - S_MODE, D_MODE, T_MODE, O_MODE, + H_MODE, S_MODE, D_MODE, T_MODE, O_MODE, SF_MODE, DF_MODE, TF_MODE, OF_MODE, CC_MODE, CCFP_MODE }; /* Modes for single-word and smaller quantities. */ -#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE)) +#define S_MODES \ + ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE)) /* Modes for double-word and smaller quantities. */ #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE)) @@ -4338,13 +4339,11 @@ enum sparc_mode_class { /* Modes for 8-word and smaller quantities. */ #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE)) -/* Modes for single-float quantities. We must allow any single word or - smaller quantity. This is because the fix/float conversion instructions - take integer inputs/outputs from the float registers. */ -#define SF_MODES (S_MODES) +/* Modes for single-float quantities. */ +#define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE)) /* Modes for double-float and smaller quantities. */ -#define DF_MODES (D_MODES) +#define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE)) /* Modes for quad-float and smaller quantities. */ #define TF_MODES (DF_MODES | (1 << (int) TF_MODE)) @@ -4440,7 +4439,9 @@ sparc_init_modes (void) case MODE_INT: case MODE_PARTIAL_INT: case MODE_COMPLEX_INT: - if (GET_MODE_SIZE (i) <= 4) + if (GET_MODE_SIZE (i) < 4) + sparc_mode_class[i] = 1 << (int) H_MODE; + else if (GET_MODE_SIZE (i) == 4) sparc_mode_class[i] = 1 << (int) S_MODE; else if (GET_MODE_SIZE (i) == 8) sparc_mode_class[i] = 1 << (int) D_MODE; @@ -4452,14 +4453,16 @@ sparc_init_modes (void) sparc_mode_class[i] = 0; break; case MODE_VECTOR_INT: - if (GET_MODE_SIZE (i) <= 4) - sparc_mode_class[i] = 1 << (int)SF_MODE; + if (GET_MODE_SIZE (i) == 4) + sparc_mode_class[i] = 1 << (int) SF_MODE; else if (GET_MODE_SIZE (i) == 8) - sparc_mode_class[i] = 1 << (int)DF_MODE; + sparc_mode_class[i] = 1 << (int) DF_MODE; + else + sparc_mode_class[i] = 0; break; case MODE_FLOAT: case MODE_COMPLEX_FLOAT: - if (GET_MODE_SIZE (i) <= 4) + if (GET_MODE_SIZE (i) == 4) sparc_mode_class[i] = 1 << (int) SF_MODE; else if (GET_MODE_SIZE (i) == 8) sparc_mode_class[i] = 1 << (int) DF_MODE; diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h index 6b02b45a24c..c6122c115cd 100644 --- a/gcc/config/sparc/sparc.h +++ b/gcc/config/sparc/sparc.h @@ -327,6 +327,8 @@ extern enum cmodel sparc_cmodel; %{mcpu=sparclite86x:-Asparclite} \ %{mcpu=f930:-Asparclite} %{mcpu=f934:-Asparclite} \ %{mcpu=v8:-Av8} \ +%{mcpu=supersparc:-Av8} \ +%{mcpu=hypersparc:-Av8} \ %{mcpu=leon:-Av8} \ %{mv8plus:-Av8plus} \ %{mcpu=v9:-Av9} \ |