aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorMatthew Gretton-Dann <matthew.gretton-dann@linaro.org>2013-05-14 09:55:27 +0000
committerMatthew Gretton-Dann <matthew.gretton-dann@linaro.org>2013-05-14 09:55:27 +0000
commitb5ae17f88f50279e4573f2959d42922c0c30b1db (patch)
tree5569c9b0222f5ff5603ae258582f94936128c806 /gcc/config
parent9c29cfa31a86897535882b5042585a7eb4bf3e2b (diff)
parentf9bf04177d912f484abfb9c3985d5e3b5c8e0bf0 (diff)
Merge from branches/gcc-4_8-branch r198615.
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_8-branch@198871 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-simd.md34
-rw-r--r--gcc/config/aarch64/aarch64.c47
-rw-r--r--gcc/config/aarch64/aarch64.md4
-rw-r--r--gcc/config/aarch64/arm_neon.h480
-rw-r--r--gcc/config/arm/arm.c9
-rw-r--r--gcc/config/arm/iterators.md6
-rw-r--r--gcc/config/arm/neon.md150
-rw-r--r--gcc/config/i386/i386.c22
-rw-r--r--gcc/config/i386/i386.md52
-rw-r--r--gcc/config/i386/sse.md5
-rw-r--r--gcc/config/pa/pa.c4
-rw-r--r--gcc/config/rs6000/vsx.md13
-rw-r--r--gcc/config/s390/s390.c3
-rw-r--r--gcc/config/sh/sh.md2
-rw-r--r--gcc/config/sparc/sparc.c27
-rw-r--r--gcc/config/sparc/sparc.h2
16 files changed, 499 insertions, 361 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index f85446b988e..37416901d9c 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1794,6 +1794,7 @@
"TARGET_SIMD"
{
int inverse = 0;
+ int use_zero_form = 0;
int swap_bsl_operands = 0;
rtx op1 = operands[1];
rtx op2 = operands[2];
@@ -1806,12 +1807,16 @@
switch (GET_CODE (operands[3]))
{
case GE:
+ case GT:
case LE:
+ case LT:
case EQ:
- if (!REG_P (operands[5])
- && (operands[5] != CONST0_RTX (<MODE>mode)))
- operands[5] = force_reg (<MODE>mode, operands[5]);
- break;
+ if (operands[5] == CONST0_RTX (<MODE>mode))
+ {
+ use_zero_form = 1;
+ break;
+ }
+ /* Fall through. */
default:
if (!REG_P (operands[5]))
operands[5] = force_reg (<MODE>mode, operands[5]);
@@ -1862,7 +1867,26 @@
a GT b -> a GT b
a LE b -> b GE a
a LT b -> b GT a
- a EQ b -> a EQ b */
+ a EQ b -> a EQ b
+ Note that there also exist direct comparison against 0 forms,
+ so catch those as a special case. */
+ if (use_zero_form)
+ {
+ inverse = 0;
+ switch (GET_CODE (operands[3]))
+ {
+ case LT:
+ base_comparison = gen_aarch64_cmlt<mode>;
+ break;
+ case LE:
+ base_comparison = gen_aarch64_cmle<mode>;
+ break;
+ default:
+ /* Do nothing, other zero form cases already have the correct
+ base_comparison. */
+ break;
+ }
+ }
if (!inverse)
emit_insn (base_comparison (mask, operands[4], operands[5]));
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 1615a16b53a..9164911673c 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3366,6 +3366,16 @@ aarch64_print_operand (FILE *f, rtx x, char code)
asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
break;
+ case 'X':
+ /* Print integer constant in hex. */
+ if (GET_CODE (x) != CONST_INT)
+ {
+ output_operand_lossage ("invalid operand for '%%%c'", code);
+ return;
+ }
+ asm_fprintf (f, "0x%x", UINTVAL (x));
+ break;
+
case 'w':
case 'x':
/* Print a general register name or the zero register (32-bit or
@@ -3870,14 +3880,21 @@ aarch64_can_eliminate (const int from, const int to)
}
else
{
- /* If we decided that we didn't need a frame pointer but then used
- LR in the function, then we do need a frame pointer after all, so
- prevent this elimination to ensure a frame pointer is used. */
+ /* If we decided that we didn't need a leaf frame pointer but then used
+ LR in the function, then we'll want a frame pointer after all, so
+ prevent this elimination to ensure a frame pointer is used.
+ NOTE: the original value of flag_omit_frame_pointer gets trashed
+ IFF flag_omit_leaf_frame_pointer is true, so we check the value
+ of faked_omit_frame_pointer here (which is true when we always
+ wish to keep non-leaf frame pointers but only wish to keep leaf frame
+ pointers when LR is clobbered). */
if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
- && df_regs_ever_live_p (LR_REGNUM))
+ && df_regs_ever_live_p (LR_REGNUM)
+ && faked_omit_frame_pointer)
return false;
}
+
return true;
}
@@ -7064,12 +7081,30 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
}
static void
+aarch64_print_extension (void)
+{
+ const struct aarch64_option_extension *opt = NULL;
+
+ for (opt = all_extensions; opt->name != NULL; opt++)
+ if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
+ asm_fprintf (asm_out_file, "+%s", opt->name);
+
+ asm_fprintf (asm_out_file, "\n");
+}
+
+static void
aarch64_start_file (void)
{
if (selected_arch)
- asm_fprintf (asm_out_file, "\t.arch %s\n", selected_arch->name);
+ {
+ asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
+ aarch64_print_extension ();
+ }
else if (selected_cpu)
- asm_fprintf (asm_out_file, "\t.cpu %s\n", selected_cpu->name);
+ {
+ asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
+ aarch64_print_extension ();
+ }
default_file_start();
}
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index b4e03a7d812..222dc69cb2f 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -860,8 +860,8 @@
(match_operand:GPI 2 "const_int_operand" "n"))]
"INTVAL (operands[1]) < GET_MODE_BITSIZE (<MODE>mode)
&& INTVAL (operands[1]) % 16 == 0
- && INTVAL (operands[2]) <= 0xffff"
- "movk\\t%<w>0, %2, lsl %1"
+ && UINTVAL (operands[2]) <= 0xffff"
+ "movk\\t%<w>0, %X2, lsl %1"
[(set_attr "v8type" "movk")
(set_attr "mode" "<MODE>")]
)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 29e4d64e052..5f1ab115c52 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -7191,10 +7191,10 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_dup_f32 (const float32_t * a)
{
float32x2_t result;
- __asm__ ("ld1r {%0.2s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.2s}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7202,10 +7202,10 @@ __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vld1_dup_f64 (const float64_t * a)
{
float64x1_t result;
- __asm__ ("ld1 {%0.1d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.1d}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7213,10 +7213,10 @@ __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vld1_dup_p8 (const poly8_t * a)
{
poly8x8_t result;
- __asm__ ("ld1r {%0.8b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.8b}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7224,10 +7224,10 @@ __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vld1_dup_p16 (const poly16_t * a)
{
poly16x4_t result;
- __asm__ ("ld1r {%0.4h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.4h}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7235,10 +7235,10 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vld1_dup_s8 (const int8_t * a)
{
int8x8_t result;
- __asm__ ("ld1r {%0.8b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.8b}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7246,10 +7246,10 @@ __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vld1_dup_s16 (const int16_t * a)
{
int16x4_t result;
- __asm__ ("ld1r {%0.4h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.4h}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7257,10 +7257,10 @@ __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vld1_dup_s32 (const int32_t * a)
{
int32x2_t result;
- __asm__ ("ld1r {%0.2s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.2s}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7268,10 +7268,10 @@ __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vld1_dup_s64 (const int64_t * a)
{
int64x1_t result;
- __asm__ ("ld1 {%0.1d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.1d}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7279,10 +7279,10 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vld1_dup_u8 (const uint8_t * a)
{
uint8x8_t result;
- __asm__ ("ld1r {%0.8b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.8b}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7290,10 +7290,10 @@ __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vld1_dup_u16 (const uint16_t * a)
{
uint16x4_t result;
- __asm__ ("ld1r {%0.4h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.4h}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7301,10 +7301,10 @@ __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vld1_dup_u32 (const uint32_t * a)
{
uint32x2_t result;
- __asm__ ("ld1r {%0.2s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.2s}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7312,10 +7312,10 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vld1_dup_u64 (const uint64_t * a)
{
uint64x1_t result;
- __asm__ ("ld1 {%0.1d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.1d}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7323,10 +7323,10 @@ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vld1_f32 (const float32_t * a)
{
float32x2_t result;
- __asm__ ("ld1 {%0.2s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.2s}, %1"
+ : "=w"(result)
+ : "Utv"(({const float32x2_t *_a = (float32x2_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -7334,10 +7334,10 @@ __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vld1_f64 (const float64_t * a)
{
float64x1_t result;
- __asm__ ("ld1 {%0.1d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.1d}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7347,9 +7347,9 @@ vld1_f64 (const float64_t * a)
float32x2_t b_ = (b); \
const float32_t * a_ = (a); \
float32x2_t result; \
- __asm__ ("ld1 {%0.s}[%3],[%1]" \
+ __asm__ ("ld1 {%0.s}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7360,9 +7360,9 @@ vld1_f64 (const float64_t * a)
float64x1_t b_ = (b); \
const float64_t * a_ = (a); \
float64x1_t result; \
- __asm__ ("ld1 {%0.d}[%3],[%1]" \
+ __asm__ ("ld1 {%0.d}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7373,9 +7373,9 @@ vld1_f64 (const float64_t * a)
poly8x8_t b_ = (b); \
const poly8_t * a_ = (a); \
poly8x8_t result; \
- __asm__ ("ld1 {%0.b}[%3],[%1]" \
+ __asm__ ("ld1 {%0.b}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7386,9 +7386,9 @@ vld1_f64 (const float64_t * a)
poly16x4_t b_ = (b); \
const poly16_t * a_ = (a); \
poly16x4_t result; \
- __asm__ ("ld1 {%0.h}[%3],[%1]" \
+ __asm__ ("ld1 {%0.h}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7399,9 +7399,9 @@ vld1_f64 (const float64_t * a)
int8x8_t b_ = (b); \
const int8_t * a_ = (a); \
int8x8_t result; \
- __asm__ ("ld1 {%0.b}[%3],[%1]" \
+ __asm__ ("ld1 {%0.b}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7412,9 +7412,9 @@ vld1_f64 (const float64_t * a)
int16x4_t b_ = (b); \
const int16_t * a_ = (a); \
int16x4_t result; \
- __asm__ ("ld1 {%0.h}[%3],[%1]" \
+ __asm__ ("ld1 {%0.h}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7425,9 +7425,9 @@ vld1_f64 (const float64_t * a)
int32x2_t b_ = (b); \
const int32_t * a_ = (a); \
int32x2_t result; \
- __asm__ ("ld1 {%0.s}[%3],[%1]" \
+ __asm__ ("ld1 {%0.s}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7438,9 +7438,9 @@ vld1_f64 (const float64_t * a)
int64x1_t b_ = (b); \
const int64_t * a_ = (a); \
int64x1_t result; \
- __asm__ ("ld1 {%0.d}[%3],[%1]" \
+ __asm__ ("ld1 {%0.d}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7451,9 +7451,9 @@ vld1_f64 (const float64_t * a)
uint8x8_t b_ = (b); \
const uint8_t * a_ = (a); \
uint8x8_t result; \
- __asm__ ("ld1 {%0.b}[%3],[%1]" \
+ __asm__ ("ld1 {%0.b}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7464,9 +7464,9 @@ vld1_f64 (const float64_t * a)
uint16x4_t b_ = (b); \
const uint16_t * a_ = (a); \
uint16x4_t result; \
- __asm__ ("ld1 {%0.h}[%3],[%1]" \
+ __asm__ ("ld1 {%0.h}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7477,9 +7477,9 @@ vld1_f64 (const float64_t * a)
uint32x2_t b_ = (b); \
const uint32_t * a_ = (a); \
uint32x2_t result; \
- __asm__ ("ld1 {%0.s}[%3],[%1]" \
+ __asm__ ("ld1 {%0.s}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7490,9 +7490,9 @@ vld1_f64 (const float64_t * a)
uint64x1_t b_ = (b); \
const uint64_t * a_ = (a); \
uint64x1_t result; \
- __asm__ ("ld1 {%0.d}[%3],[%1]" \
+ __asm__ ("ld1 {%0.d}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7501,10 +7501,10 @@ __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vld1_p8 (const poly8_t * a)
{
poly8x8_t result;
- __asm__ ("ld1 {%0.8b}, [%1]"
- : "=w"(result)
- : "r"(a)
- : /* No clobbers */);
+ __asm__ ("ld1 {%0.8b}, %1"
+ : "=w"(result)
+ : "Utv"(({const poly8x8_t *_a = (poly8x8_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -7512,10 +7512,10 @@ __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
vld1_p16 (const poly16_t * a)
{
poly16x4_t result;
- __asm__ ("ld1 {%0.4h}, [%1]"
- : "=w"(result)
- : "r"(a)
- : /* No clobbers */);
+ __asm__ ("ld1 {%0.4h}, %1"
+ : "=w"(result)
+ : "Utv"(({const poly16x4_t *_a = (poly16x4_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -7523,10 +7523,10 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vld1_s8 (const int8_t * a)
{
int8x8_t result;
- __asm__ ("ld1 {%0.8b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.8b}, %1"
+ : "=w"(result)
+ : "Utv"(({const int8x8_t *_a = (int8x8_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -7534,10 +7534,10 @@ __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vld1_s16 (const int16_t * a)
{
int16x4_t result;
- __asm__ ("ld1 {%0.4h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.4h}, %1"
+ : "=w"(result)
+ : "Utv"(({const int16x4_t *_a = (int16x4_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -7545,10 +7545,10 @@ __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vld1_s32 (const int32_t * a)
{
int32x2_t result;
- __asm__ ("ld1 {%0.2s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.2s}, %1"
+ : "=w"(result)
+ : "Utv"(({const int32x2_t *_a = (int32x2_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -7556,10 +7556,10 @@ __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vld1_s64 (const int64_t * a)
{
int64x1_t result;
- __asm__ ("ld1 {%0.1d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.1d}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7567,10 +7567,10 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vld1_u8 (const uint8_t * a)
{
uint8x8_t result;
- __asm__ ("ld1 {%0.8b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.8b}, %1"
+ : "=w"(result)
+ : "Utv"(({const uint8x8_t *_a = (uint8x8_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -7578,10 +7578,10 @@ __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vld1_u16 (const uint16_t * a)
{
uint16x4_t result;
- __asm__ ("ld1 {%0.4h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.4h}, %1"
+ : "=w"(result)
+ : "Utv"(({const uint16x4_t *_a = (uint16x4_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -7589,10 +7589,10 @@ __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vld1_u32 (const uint32_t * a)
{
uint32x2_t result;
- __asm__ ("ld1 {%0.2s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.2s}, %1"
+ : "=w"(result)
+ : "Utv"(({const uint32x2_t *_a = (uint32x2_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -7600,10 +7600,10 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vld1_u64 (const uint64_t * a)
{
uint64x1_t result;
- __asm__ ("ld1 {%0.1d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.1d}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7611,10 +7611,10 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_dup_f32 (const float32_t * a)
{
float32x4_t result;
- __asm__ ("ld1r {%0.4s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.4s}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7622,10 +7622,10 @@ __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vld1q_dup_f64 (const float64_t * a)
{
float64x2_t result;
- __asm__ ("ld1r {%0.2d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.2d}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7633,10 +7633,10 @@ __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vld1q_dup_p8 (const poly8_t * a)
{
poly8x16_t result;
- __asm__ ("ld1r {%0.16b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.16b}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7644,10 +7644,10 @@ __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vld1q_dup_p16 (const poly16_t * a)
{
poly16x8_t result;
- __asm__ ("ld1r {%0.8h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.8h}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7655,10 +7655,10 @@ __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vld1q_dup_s8 (const int8_t * a)
{
int8x16_t result;
- __asm__ ("ld1r {%0.16b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.16b}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7666,10 +7666,10 @@ __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vld1q_dup_s16 (const int16_t * a)
{
int16x8_t result;
- __asm__ ("ld1r {%0.8h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.8h}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7677,10 +7677,10 @@ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vld1q_dup_s32 (const int32_t * a)
{
int32x4_t result;
- __asm__ ("ld1r {%0.4s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.4s}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7688,10 +7688,10 @@ __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vld1q_dup_s64 (const int64_t * a)
{
int64x2_t result;
- __asm__ ("ld1r {%0.2d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.2d}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7699,10 +7699,10 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vld1q_dup_u8 (const uint8_t * a)
{
uint8x16_t result;
- __asm__ ("ld1r {%0.16b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.16b}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7710,10 +7710,10 @@ __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vld1q_dup_u16 (const uint16_t * a)
{
uint16x8_t result;
- __asm__ ("ld1r {%0.8h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.8h}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7721,10 +7721,10 @@ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vld1q_dup_u32 (const uint32_t * a)
{
uint32x4_t result;
- __asm__ ("ld1r {%0.4s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.4s}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7732,10 +7732,10 @@ __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vld1q_dup_u64 (const uint64_t * a)
{
uint64x2_t result;
- __asm__ ("ld1r {%0.2d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.2d}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
@@ -7743,10 +7743,10 @@ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vld1q_f32 (const float32_t * a)
{
float32x4_t result;
- __asm__ ("ld1 {%0.4s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.4s}, %1"
+ : "=w"(result)
+ : "Utv"(({const float32x4_t *_a = (float32x4_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -7754,10 +7754,10 @@ __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vld1q_f64 (const float64_t * a)
{
float64x2_t result;
- __asm__ ("ld1 {%0.2d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.2d}, %1"
+ : "=w"(result)
+ : "Utv"(({const float64x2_t *_a = (float64x2_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -7767,9 +7767,9 @@ vld1q_f64 (const float64_t * a)
float32x4_t b_ = (b); \
const float32_t * a_ = (a); \
float32x4_t result; \
- __asm__ ("ld1 {%0.s}[%3],[%1]" \
+ __asm__ ("ld1 {%0.s}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7780,9 +7780,9 @@ vld1q_f64 (const float64_t * a)
float64x2_t b_ = (b); \
const float64_t * a_ = (a); \
float64x2_t result; \
- __asm__ ("ld1 {%0.d}[%3],[%1]" \
+ __asm__ ("ld1 {%0.d}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7793,9 +7793,9 @@ vld1q_f64 (const float64_t * a)
poly8x16_t b_ = (b); \
const poly8_t * a_ = (a); \
poly8x16_t result; \
- __asm__ ("ld1 {%0.b}[%3],[%1]" \
+ __asm__ ("ld1 {%0.b}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7806,9 +7806,9 @@ vld1q_f64 (const float64_t * a)
poly16x8_t b_ = (b); \
const poly16_t * a_ = (a); \
poly16x8_t result; \
- __asm__ ("ld1 {%0.h}[%3],[%1]" \
+ __asm__ ("ld1 {%0.h}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7819,9 +7819,9 @@ vld1q_f64 (const float64_t * a)
int8x16_t b_ = (b); \
const int8_t * a_ = (a); \
int8x16_t result; \
- __asm__ ("ld1 {%0.b}[%3],[%1]" \
+ __asm__ ("ld1 {%0.b}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7832,9 +7832,9 @@ vld1q_f64 (const float64_t * a)
int16x8_t b_ = (b); \
const int16_t * a_ = (a); \
int16x8_t result; \
- __asm__ ("ld1 {%0.h}[%3],[%1]" \
+ __asm__ ("ld1 {%0.h}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7845,9 +7845,9 @@ vld1q_f64 (const float64_t * a)
int32x4_t b_ = (b); \
const int32_t * a_ = (a); \
int32x4_t result; \
- __asm__ ("ld1 {%0.s}[%3],[%1]" \
+ __asm__ ("ld1 {%0.s}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7858,9 +7858,9 @@ vld1q_f64 (const float64_t * a)
int64x2_t b_ = (b); \
const int64_t * a_ = (a); \
int64x2_t result; \
- __asm__ ("ld1 {%0.d}[%3],[%1]" \
+ __asm__ ("ld1 {%0.d}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7871,9 +7871,9 @@ vld1q_f64 (const float64_t * a)
uint8x16_t b_ = (b); \
const uint8_t * a_ = (a); \
uint8x16_t result; \
- __asm__ ("ld1 {%0.b}[%3],[%1]" \
+ __asm__ ("ld1 {%0.b}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7884,9 +7884,9 @@ vld1q_f64 (const float64_t * a)
uint16x8_t b_ = (b); \
const uint16_t * a_ = (a); \
uint16x8_t result; \
- __asm__ ("ld1 {%0.h}[%3],[%1]" \
+ __asm__ ("ld1 {%0.h}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7897,9 +7897,9 @@ vld1q_f64 (const float64_t * a)
uint32x4_t b_ = (b); \
const uint32_t * a_ = (a); \
uint32x4_t result; \
- __asm__ ("ld1 {%0.s}[%3],[%1]" \
+ __asm__ ("ld1 {%0.s}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7910,9 +7910,9 @@ vld1q_f64 (const float64_t * a)
uint64x2_t b_ = (b); \
const uint64_t * a_ = (a); \
uint64x2_t result; \
- __asm__ ("ld1 {%0.d}[%3],[%1]" \
+ __asm__ ("ld1 {%0.d}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
@@ -7921,10 +7921,10 @@ __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
vld1q_p8 (const poly8_t * a)
{
poly8x16_t result;
- __asm__ ("ld1 {%0.16b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.16b}, %1"
+ : "=w"(result)
+ : "Utv"(({const poly8x16_t *_a = (poly8x16_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -7932,10 +7932,10 @@ __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
vld1q_p16 (const poly16_t * a)
{
poly16x8_t result;
- __asm__ ("ld1 {%0.8h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.16b}, %1"
+ : "=w"(result)
+ : "Utv"(({const poly16x8_t *_a = (poly16x8_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -7943,10 +7943,10 @@ __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vld1q_s8 (const int8_t * a)
{
int8x16_t result;
- __asm__ ("ld1 {%0.16b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.16b}, %1"
+ : "=w"(result)
+ : "Utv"(({const int8x16_t *_a = (int8x16_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -7954,10 +7954,10 @@ __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vld1q_s16 (const int16_t * a)
{
int16x8_t result;
- __asm__ ("ld1 {%0.8h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.8h}, %1"
+ : "=w"(result)
+ : "Utv"(({const int16x8_t *_a = (int16x8_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -7965,10 +7965,10 @@ __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vld1q_s32 (const int32_t * a)
{
int32x4_t result;
- __asm__ ("ld1 {%0.4s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.4s}, %1"
+ : "=w"(result)
+ : "Utv"(({const int32x4_t *_a = (int32x4_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -7976,10 +7976,10 @@ __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
vld1q_s64 (const int64_t * a)
{
int64x2_t result;
- __asm__ ("ld1 {%0.2d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.2d}, %1"
+ : "=w"(result)
+ : "Utv"(({const int64x2_t *_a = (int64x2_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -7987,10 +7987,10 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vld1q_u8 (const uint8_t * a)
{
uint8x16_t result;
- __asm__ ("ld1 {%0.16b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.16b}, %1"
+ : "=w"(result)
+ : "Utv"(({const uint8x16_t *_a = (uint8x16_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -7998,10 +7998,10 @@ __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vld1q_u16 (const uint16_t * a)
{
uint16x8_t result;
- __asm__ ("ld1 {%0.8h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.8h}, %1"
+ : "=w"(result)
+ : "Utv"(({const uint16x8_t *_a = (uint16x8_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -8009,10 +8009,10 @@ __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vld1q_u32 (const uint32_t * a)
{
uint32x4_t result;
- __asm__ ("ld1 {%0.4s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.4s}, %1"
+ : "=w"(result)
+ : "Utv"(({const uint32x4_t *_a = (uint32x4_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
@@ -8020,10 +8020,10 @@ __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vld1q_u64 (const uint64_t * a)
{
uint64x2_t result;
- __asm__ ("ld1 {%0.2d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.2d}, %1"
+ : "=w"(result)
+ : "Utv"(({const uint64x2_t *_a = (uint64x2_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 0bb1650dc74..2399423ce9a 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -10744,6 +10744,13 @@ load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
|| (i != nops - 1 && unsorted_regs[i] == base_reg))
return 0;
+ /* Don't allow SP to be loaded unless it is also the base
+ register. It guarantees that SP is reset correctly when
+ an LDM instruction is interrupted. Otherwise, we might
+ end up with a corrupt stack. */
+ if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
+ return 0;
+
unsorted_offsets[i] = INTVAL (offset);
if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
order[0] = i;
@@ -18567,7 +18574,7 @@ arm_print_operand (FILE *stream, rtx x, int code)
"wC12", "wC13", "wC14", "wC15"
};
- fprintf (stream, wc_reg_names [INTVAL (x)]);
+ fputs (wc_reg_names [INTVAL (x)], stream);
}
return;
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 252f18b40a8..b3ad42b376f 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -314,6 +314,12 @@
(V2SF "V2SI") (V4SF "V4SI")
(DI "DI") (V2DI "V2DI")])
+(define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi")
+ (V4HI "v4hi") (V8HI "v8hi")
+ (V2SI "v2si") (V4SI "v4si")
+ (DI "di") (V2DI "v2di")
+ (V2SF "v2si") (V4SF "v4si")])
+
;; Get element type from double-width mode, for operations where we
;; don't care about signedness.
(define_mode_attr V_if_elem [(V8QI "i8") (V16QI "i8")
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 2172a13c683..f91a6f7d08b 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -1698,80 +1698,144 @@
(define_expand "vcond<mode><mode>"
[(set (match_operand:VDQW 0 "s_register_operand" "")
(if_then_else:VDQW
- (match_operator 3 "arm_comparison_operator"
+ (match_operator 3 "comparison_operator"
[(match_operand:VDQW 4 "s_register_operand" "")
(match_operand:VDQW 5 "nonmemory_operand" "")])
(match_operand:VDQW 1 "s_register_operand" "")
(match_operand:VDQW 2 "s_register_operand" "")))]
"TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
{
- rtx mask;
- int inverse = 0, immediate_zero = 0;
- /* See the description of "magic" bits in the 'T' case of
- arm_print_operand. */
HOST_WIDE_INT magic_word = (<MODE>mode == V2SFmode || <MODE>mode == V4SFmode)
? 3 : 1;
rtx magic_rtx = GEN_INT (magic_word);
-
- mask = gen_reg_rtx (<V_cmp_result>mode);
-
- if (operands[5] == CONST0_RTX (<MODE>mode))
- immediate_zero = 1;
- else if (!REG_P (operands[5]))
- operands[5] = force_reg (<MODE>mode, operands[5]);
-
+ int inverse = 0;
+ int swap_bsl_operands = 0;
+ rtx mask = gen_reg_rtx (<V_cmp_result>mode);
+ rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
+
+ rtx (*base_comparison) (rtx, rtx, rtx, rtx);
+ rtx (*complimentary_comparison) (rtx, rtx, rtx, rtx);
+
switch (GET_CODE (operands[3]))
{
case GE:
- emit_insn (gen_neon_vcge<mode> (mask, operands[4], operands[5],
- magic_rtx));
+ case LE:
+ case EQ:
+ if (!REG_P (operands[5])
+ && (operands[5] != CONST0_RTX (<MODE>mode)))
+ operands[5] = force_reg (<MODE>mode, operands[5]);
break;
-
+ default:
+ if (!REG_P (operands[5]))
+ operands[5] = force_reg (<MODE>mode, operands[5]);
+ }
+
+ switch (GET_CODE (operands[3]))
+ {
+ case LT:
+ case UNLT:
+ inverse = 1;
+ /* Fall through. */
+ case GE:
+ case UNGE:
+ case ORDERED:
+ case UNORDERED:
+ base_comparison = gen_neon_vcge<mode>;
+ complimentary_comparison = gen_neon_vcgt<mode>;
+ break;
+ case LE:
+ case UNLE:
+ inverse = 1;
+ /* Fall through. */
case GT:
- emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5],
- magic_rtx));
+ case UNGT:
+ base_comparison = gen_neon_vcgt<mode>;
+ complimentary_comparison = gen_neon_vcge<mode>;
break;
-
case EQ:
- emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
- magic_rtx));
+ case NE:
+ case UNEQ:
+ base_comparison = gen_neon_vceq<mode>;
+ complimentary_comparison = gen_neon_vceq<mode>;
break;
-
+ default:
+ gcc_unreachable ();
+ }
+
+ switch (GET_CODE (operands[3]))
+ {
+ case LT:
case LE:
- if (immediate_zero)
- emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5],
- magic_rtx));
+ case GT:
+ case GE:
+ case EQ:
+ /* The easy case. Here we emit one of vcge, vcgt or vceq.
+ As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
+ a GE b -> a GE b
+ a GT b -> a GT b
+ a LE b -> b GE a
+ a LT b -> b GT a
+ a EQ b -> a EQ b */
+
+ if (!inverse)
+ emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
else
- emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4],
- magic_rtx));
+ emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
break;
-
- case LT:
- if (immediate_zero)
- emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5],
- magic_rtx));
+ case UNLT:
+ case UNLE:
+ case UNGT:
+ case UNGE:
+ case NE:
+ /* Vector compare returns false for lanes which are unordered, so if we use
+ the inverse of the comparison we actually want to emit, then
+ swap the operands to BSL, we will end up with the correct result.
+ Note that a NE NaN and NaN NE b are true for all a, b.
+
+ Our transformations are:
+ a GE b -> !(b GT a)
+ a GT b -> !(b GE a)
+ a LE b -> !(a GT b)
+ a LT b -> !(a GE b)
+ a NE b -> !(a EQ b) */
+
+ if (inverse)
+ emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx));
else
- emit_insn (gen_neon_vcgt<mode> (mask, operands[5], operands[4],
- magic_rtx));
+ emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx));
+
+ swap_bsl_operands = 1;
break;
-
- case NE:
- emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
- magic_rtx));
- inverse = 1;
+ case UNEQ:
+ /* We check (a > b || b > a). combining these comparisons give us
+ true iff !(a != b && a ORDERED b), swapping the operands to BSL
+ will then give us (a == b || a UNORDERED b) as intended. */
+
+ emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5], magic_rtx));
+ emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4], magic_rtx));
+ emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
+ swap_bsl_operands = 1;
+ break;
+ case UNORDERED:
+ /* Operands are ORDERED iff (a > b || b >= a).
+ Swapping the operands to BSL will give the UNORDERED case. */
+ swap_bsl_operands = 1;
+ /* Fall through. */
+ case ORDERED:
+ emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5], magic_rtx));
+ emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4], magic_rtx));
+ emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
break;
-
default:
gcc_unreachable ();
}
-
- if (inverse)
+
+ if (swap_bsl_operands)
emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
operands[1]));
else
emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
operands[2]));
-
DONE;
})
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index b835c5da2ab..abf2bbad30f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20434,7 +20434,7 @@ ix86_expand_vec_perm (rtx operands[])
vec[i * 2 + 1] = const1_rtx;
}
vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
- vt = force_const_mem (maskmode, vt);
+ vt = validize_mem (force_const_mem (maskmode, vt));
t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
OPTAB_DIRECT);
@@ -20631,7 +20631,7 @@ ix86_expand_vec_perm (rtx operands[])
for (i = 0; i < 16; ++i)
vec[i] = GEN_INT (i/e * e);
vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
- vt = force_const_mem (V16QImode, vt);
+ vt = validize_mem (force_const_mem (V16QImode, vt));
if (TARGET_XOP)
emit_insn (gen_xop_pperm (mask, mask, mask, vt));
else
@@ -20642,7 +20642,7 @@ ix86_expand_vec_perm (rtx operands[])
for (i = 0; i < 16; ++i)
vec[i] = GEN_INT (i % e);
vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
- vt = force_const_mem (V16QImode, vt);
+ vt = validize_mem (force_const_mem (V16QImode, vt));
emit_insn (gen_addv16qi3 (mask, mask, vt));
}
@@ -33862,6 +33862,11 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
return true;
if (!TARGET_PARTIAL_REG_STALL)
return true;
+ /* LRA checks if the hard register is OK for the given mode.
+ QImode values can live in non-QI regs, so we allow all
+ registers here. */
+ if (lra_in_progress)
+ return true;
return !can_create_pseudo_p ();
}
/* We handle both integer and floats in the general purpose registers. */
@@ -34170,6 +34175,13 @@ ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
{
if (CONST_INT_P (XEXP (x, 1)))
*total = cost->shift_const;
+ else if (GET_CODE (XEXP (x, 1)) == SUBREG
+ && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
+ {
+ /* Return the cost after shift-and truncation. */
+ *total = cost->shift_var;
+ return true;
+ }
else
*total = cost->shift_var;
}
@@ -40716,7 +40728,9 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
the even slots. For some cpus this is faster than a PSHUFD. */
if (odd_p)
{
- if (TARGET_XOP && mode == V4SImode)
+ /* For XOP use vpmacsdqh, but only for smult, as it is only
+ signed. */
+ if (TARGET_XOP && mode == V4SImode && !uns_p)
{
x = force_reg (wmode, CONST0_RTX (wmode));
emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 24d1012d5ce..63d65761b92 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3281,7 +3281,7 @@
(cond [(eq_attr "alternative" "0,1,2")
(const_string "fmov")
(eq_attr "alternative" "3,4")
- (const_string "multi")
+ (const_string "imov")
(eq_attr "alternative" "5")
(const_string "sselog1")
(eq_attr "alternative" "9,10,11,14,15")
@@ -3432,10 +3432,10 @@
(define_insn "*zero_extendsidi2_rex64"
[(set (match_operand:DI 0 "nonimmediate_operand"
- "=r ,o,?*Ym,?*y,?*Yi,?*x")
+ "=r ,o,?*Ym,?!*y,?*Yi,?*x")
(zero_extend:DI
(match_operand:SI 1 "x86_64_zext_general_operand"
- "rmWz,0,r ,m ,r ,m")))]
+ "rmWz,0,r ,m ,r ,m")))]
"TARGET_64BIT"
{
switch (get_attr_type (insn))
@@ -3466,9 +3466,9 @@
(define_insn "*zero_extendsidi2"
[(set (match_operand:DI 0 "nonimmediate_operand"
- "=ro,?r,?o,?*Ym,?*y,?*Yi,?*x")
+ "=ro,?r,?o,?*Ym,?!*y,?*Yi,?*x")
(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand"
- "0 ,rm,r ,r ,m ,r ,m")))]
+ "0 ,rm,r ,r ,m ,r ,m")))]
"!TARGET_64BIT"
"@
#
@@ -9136,28 +9136,20 @@
})
;; Avoid useless masking of count operand.
-(define_insn_and_split "*ashl<mode>3_mask"
+(define_insn "*ashl<mode>3_mask"
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
(ashift:SWI48
(match_operand:SWI48 1 "nonimmediate_operand" "0")
(subreg:QI
(and:SI
- (match_operand:SI 2 "nonimmediate_operand" "c")
+ (match_operand:SI 2 "register_operand" "c")
(match_operand:SI 3 "const_int_operand" "n")) 0)))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
&& (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
== GET_MODE_BITSIZE (<MODE>mode)-1"
- "#"
- "&& 1"
- [(parallel [(set (match_dup 0)
- (ashift:SWI48 (match_dup 1) (match_dup 2)))
- (clobber (reg:CC FLAGS_REG))])]
{
- if (can_create_pseudo_p ())
- operands [2] = force_reg (SImode, operands[2]);
-
- operands[2] = simplify_gen_subreg (QImode, operands[2], SImode, 0);
+ return "sal{<imodesuffix>}\t{%b2, %0|%0, %b2}";
}
[(set_attr "type" "ishift")
(set_attr "mode" "<MODE>")])
@@ -9646,28 +9638,20 @@
"ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
;; Avoid useless masking of count operand.
-(define_insn_and_split "*<shift_insn><mode>3_mask"
+(define_insn "*<shift_insn><mode>3_mask"
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
(any_shiftrt:SWI48
(match_operand:SWI48 1 "nonimmediate_operand" "0")
(subreg:QI
(and:SI
- (match_operand:SI 2 "nonimmediate_operand" "c")
+ (match_operand:SI 2 "register_operand" "c")
(match_operand:SI 3 "const_int_operand" "n")) 0)))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
&& (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
== GET_MODE_BITSIZE (<MODE>mode)-1"
- "#"
- "&& 1"
- [(parallel [(set (match_dup 0)
- (any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))
- (clobber (reg:CC FLAGS_REG))])]
{
- if (can_create_pseudo_p ())
- operands [2] = force_reg (SImode, operands[2]);
-
- operands[2] = simplify_gen_subreg (QImode, operands[2], SImode, 0);
+ return "<shift>{<imodesuffix>}\t{%b2, %0|%0, %b2}";
}
[(set_attr "type" "ishift")
(set_attr "mode" "<MODE>")])
@@ -10109,28 +10093,20 @@
"ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
;; Avoid useless masking of count operand.
-(define_insn_and_split "*<rotate_insn><mode>3_mask"
+(define_insn "*<rotate_insn><mode>3_mask"
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
(any_rotate:SWI48
(match_operand:SWI48 1 "nonimmediate_operand" "0")
(subreg:QI
(and:SI
- (match_operand:SI 2 "nonimmediate_operand" "c")
+ (match_operand:SI 2 "register_operand" "c")
(match_operand:SI 3 "const_int_operand" "n")) 0)))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
&& (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
== GET_MODE_BITSIZE (<MODE>mode)-1"
- "#"
- "&& 1"
- [(parallel [(set (match_dup 0)
- (any_rotate:SWI48 (match_dup 1) (match_dup 2)))
- (clobber (reg:CC FLAGS_REG))])]
{
- if (can_create_pseudo_p ())
- operands [2] = force_reg (SImode, operands[2]);
-
- operands[2] = simplify_gen_subreg (QImode, operands[2], SImode, 0);
+ return "<rotate>{<imodesuffix>}\t{%b2, %0|%0, %b2}";
}
[(set_attr "type" "rotate")
(set_attr "mode" "<MODE>")])
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index e2bb68ba18f..585dc71038c 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5638,7 +5638,7 @@
if (TARGET_SSE4_1)
{
if (CONSTANT_P (operands[2]))
- operands[2] = force_const_mem (<MODE>mode, operands[2]);
+ operands[2] = validize_mem (force_const_mem (<MODE>mode, operands[2]));
ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
}
else
@@ -9942,7 +9942,8 @@
(match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
"TARGET_XOP"
{
- operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
+ operands[3]
+ = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
}
[(set_attr "type" "sseishft")
diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c
index 0d39483d908..dc6f2e42bcf 100644
--- a/gcc/config/pa/pa.c
+++ b/gcc/config/pa/pa.c
@@ -792,7 +792,9 @@ legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
/* Extract CODE_LABEL. */
orig = XEXP (orig, 0);
add_reg_note (insn, REG_LABEL_OPERAND, orig);
- LABEL_NUSES (orig)++;
+ /* Make sure we have label and not a note. */
+ if (LABEL_P (orig))
+ LABEL_NUSES (orig)++;
}
crtl->uses_pic_offset_table = 1;
return reg;
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index dcb1f64d4ee..3fafd9b278c 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -208,8 +208,8 @@
;; VSX moves
(define_insn "*vsx_mov<mode>"
- [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*Y,*r,*r,<VSr>,?wa,v,wZ,v")
- (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,Y,r,j,j,W,v,wZ"))]
+ [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*Y,*r,*r,<VSr>,?wa,*r,v,wZ,v")
+ (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,Y,r,j,j,j,W,v,wZ"))]
"VECTOR_MEM_VSX_P (<MODE>mode)
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
@@ -239,23 +239,24 @@
case 6:
case 7:
case 8:
+ case 11:
return "#";
case 9:
case 10:
return "xxlxor %x0,%x0,%x0";
- case 11:
+ case 12:
return output_vec_const_move (operands);
- case 12:
+ case 13:
gcc_assert (MEM_P (operands[0])
&& GET_CODE (XEXP (operands[0], 0)) != PRE_INC
&& GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
&& GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
return "stvx %1,%y0";
- case 13:
+ case 14:
gcc_assert (MEM_P (operands[0])
&& GET_CODE (XEXP (operands[0], 0)) != PRE_INC
&& GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
@@ -266,7 +267,7 @@
gcc_unreachable ();
}
}
- [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,vecstore,vecload")])
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,*,vecstore,vecload")])
;; Unlike other VSX moves, allow the GPRs, since a normal use of TImode is for
;; unions. However for plain data movement, slightly favor the vector loads
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 7e87dcd547d..ac33371937e 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -4647,6 +4647,9 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
int smode_bsize, mode_bsize;
rtx op, clobber;
+ if (bitsize + bitpos > GET_MODE_SIZE (mode))
+ return false;
+
/* Generate INSERT IMMEDIATE (IILL et al). */
/* (set (ze (reg)) (const_int)). */
if (TARGET_ZARCH
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 42ef5e142d8..8fb3f8a8b0d 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -689,7 +689,7 @@
;; Extract contiguous bits and compare them against zero.
(define_insn "tstsi_t_zero_extract_eq"
[(set (reg:SI T_REG)
- (eq:SI (zero_extract:SI (match_operand 0 "logical_operand" "z")
+ (eq:SI (zero_extract:SI (match_operand:SI 0 "logical_operand" "z")
(match_operand:SI 1 "const_int_operand")
(match_operand:SI 2 "const_int_operand"))
(const_int 0)))]
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 08c2894fbbc..fb01ae92a2a 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -4321,13 +4321,14 @@ mem_min_alignment (rtx mem, int desired)
mapped into one sparc_mode_class mode. */
enum sparc_mode_class {
- S_MODE, D_MODE, T_MODE, O_MODE,
+ H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
SF_MODE, DF_MODE, TF_MODE, OF_MODE,
CC_MODE, CCFP_MODE
};
/* Modes for single-word and smaller quantities. */
-#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
+#define S_MODES \
+ ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
/* Modes for double-word and smaller quantities. */
#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
@@ -4338,13 +4339,11 @@ enum sparc_mode_class {
/* Modes for 8-word and smaller quantities. */
#define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
-/* Modes for single-float quantities. We must allow any single word or
- smaller quantity. This is because the fix/float conversion instructions
- take integer inputs/outputs from the float registers. */
-#define SF_MODES (S_MODES)
+/* Modes for single-float quantities. */
+#define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
/* Modes for double-float and smaller quantities. */
-#define DF_MODES (D_MODES)
+#define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
/* Modes for quad-float and smaller quantities. */
#define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
@@ -4440,7 +4439,9 @@ sparc_init_modes (void)
case MODE_INT:
case MODE_PARTIAL_INT:
case MODE_COMPLEX_INT:
- if (GET_MODE_SIZE (i) <= 4)
+ if (GET_MODE_SIZE (i) < 4)
+ sparc_mode_class[i] = 1 << (int) H_MODE;
+ else if (GET_MODE_SIZE (i) == 4)
sparc_mode_class[i] = 1 << (int) S_MODE;
else if (GET_MODE_SIZE (i) == 8)
sparc_mode_class[i] = 1 << (int) D_MODE;
@@ -4452,14 +4453,16 @@ sparc_init_modes (void)
sparc_mode_class[i] = 0;
break;
case MODE_VECTOR_INT:
- if (GET_MODE_SIZE (i) <= 4)
- sparc_mode_class[i] = 1 << (int)SF_MODE;
+ if (GET_MODE_SIZE (i) == 4)
+ sparc_mode_class[i] = 1 << (int) SF_MODE;
else if (GET_MODE_SIZE (i) == 8)
- sparc_mode_class[i] = 1 << (int)DF_MODE;
+ sparc_mode_class[i] = 1 << (int) DF_MODE;
+ else
+ sparc_mode_class[i] = 0;
break;
case MODE_FLOAT:
case MODE_COMPLEX_FLOAT:
- if (GET_MODE_SIZE (i) <= 4)
+ if (GET_MODE_SIZE (i) == 4)
sparc_mode_class[i] = 1 << (int) SF_MODE;
else if (GET_MODE_SIZE (i) == 8)
sparc_mode_class[i] = 1 << (int) DF_MODE;
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index 6b02b45a24c..c6122c115cd 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -327,6 +327,8 @@ extern enum cmodel sparc_cmodel;
%{mcpu=sparclite86x:-Asparclite} \
%{mcpu=f930:-Asparclite} %{mcpu=f934:-Asparclite} \
%{mcpu=v8:-Av8} \
+%{mcpu=supersparc:-Av8} \
+%{mcpu=hypersparc:-Av8} \
%{mcpu=leon:-Av8} \
%{mv8plus:-Av8plus} \
%{mcpu=v9:-Av9} \