aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYvan Roux <yvan.roux@linaro.org>2015-10-06 19:59:52 +0200
committerLinaro Code Review <review@review.linaro.org>2015-10-08 16:22:06 +0000
commita1f430e5371a8c5b7dc92850333e573b8f4cd9c5 (patch)
treebd44cd7e670d1d5379659214765df5b3805ecb8a
parentc347a8109bc1ff9ea0c2c09d26ea1916a77a2b2d (diff)
gcc/
Backport from trunk r224519. 2015-06-16 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64-arches.def: Add "armv8.1-a". * config/aarch64/aarch64-options-extensions.def: Update "fP", "simd" and "crypto". Add "lse", "pan", "lor" and "rdma". * gcc/config/aarch64/aarch64.h (AARCH64_FL_LSE): New. (AARCH64_FL_PAN): New. (AARCH64_FL_LOR): New. (AARCH64_FL_RDMA): New. (AARCH64_FL_FOR_ARCH8_1): New. * doc/invoke.texi (AArch64 Options): Add "armv8.1-a" to -march. Add "lse", "pan", "lor", "rdma" to feature modifiers. gcc/ Backport from trunk r225118. 2015-06-29 Matthew Wahab <matthew.wahab@arm.com> * doc/invoke.texi (Aarch64 Options, -march): Split out arch and feature description, split out the native option, add a link to the feature documentation, rearrange and slightly rewrite text. (Aarch64 options, -mcpu): Likewise. (Aarch64 options, Feature Modifiers): Add an anchor. Mention +rdma implies Adv. SIMD. gcc/ Backport from trunk r226857. 2015-08-13 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64.h (AARCH64_ISA_LSE): New. (TARGET_LSE): New. gcc/ Backport from trunk r226858. 2015-08-13 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64-protos.h (aarch64_gen_atomic_cas): Declare. * config/aarch64/aarch64.c (aarch64_expand_compare_and_swap): Choose appropriate instruction pattern for the target. (aarch64_gen_atomic_cas): New. * config/aarch64/atomics.md (UNSPECV_ATOMIC_CAS): New. (atomic_compare_and_swap<mode>_1): Rename to aarch64_compare_and_swap<mode>. Fix some indentation. (aarch64_compare_and_swap<mode>_lse): New. (aarch64_atomic_cas<mode>): New. gcc/testsuite/ Backport from trunk r226859. 2015-08-13 Matthew Wahab <matthew.wahab@arm.com> * gcc.target/aarch64/atomic-inst-cas.c: New. * gcc.target/aarch64/atomic-inst-ops.inc: New. gcc/ Backport from trunk r227998. 2015-09-22 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64-protos.h (aarch64_gen_atomic_ldop): Declare. * config/aarch64/aarch64.c (aarch64_emit_atomic_swap): New. (aarch64_gen_atomic_ldop): New. (aarch64_split_atomic_op): Fix whitespace and add a comment. * config/aarch64/atomics.md (UNSPECV_ATOMIC_SWP): New. (aarch64_compare_and_swap<mode>_lse): Fix some whitespace. (atomic_exchange<mode>): Replace with an expander. (aarch64_atomic_exchange<mode>): New. (aarch64_atomic_exchange<mode>_lse): New. (aarch64_atomic_<atomic_optab><mode>): Fix some whitespace. (aarch64_atomic_swp<mode>): New. gcc/testsuite/ Backport from trunk r227998. 2015-09-22 Matthew Wahab <matthew.wahab@arm.com> * gcc.target/aarch64/atomic-inst-ops.inc: (TEST_MODEL): New. (TEST_ONE): New. * gcc.target/aarch64/atomic-inst-swap.c: New. gcc/ Backport from trunk r227999. 2015-09-22 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64.md (<LOGICAL:optab>_one_cmpl_<SHIFT:optab><mode>3): Make a named pattern. gcc/ Backport from trunk r228000. 2015-09-22 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64/atomics.md (UNSPECV_ATOMIC_LDOP): New. (UNSPECV_ATOMIC_LDOP_OR): New. (UNSPECV_ATOMIC_LDOP_BIC): New. (UNSPECV_ATOMIC_LDOP_XOR): New. (UNSPECV_ATOMIC_LDOP_PLUS): New. (ATOMIC_LDOP): New. (atomic_ldop): New. (aarch64_atomic_load<atomic_ldop><mode>): New. gcc/ Backport from trunk r228001. 2015-09-22 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64-protos.h (aarch64_atomic_ldop_supported_p): Declare. * config/aarch64/aarch64.c (aarch64_atomic_ldop_supported_p): New. (enum aarch64_atomic_load_op_code): New. (aarch64_emit_atomic_load_op): New. (aarch64_gen_atomic_ldop): Update to support load-operate patterns. * config/aarch64/atomics.md (atomic_<atomic_optab><mode>): Change to an expander. (aarch64_atomic_<atomic_optab><mode>): New. (aarch64_atomic_<atomic_optab><mode>_lse): New. (atomic_fetch_<atomic_optab><mode>): Change to an expander. (aarch64_atomic_fetch_<atomic_optab><mode>): New. (aarch64_atomic_fetch_<atomic_optab><mode>_lse): New. gcc/testsuite/ Backport from trunk r228001. 2015-09-22 Matthew Wahab <matthew.wahab@arm.com> * gcc.target/aarch64/atomic-inst-ldadd.c: New. * gcc.target/aarch64/atomic-inst-ldlogic.c: New. gcc/ Backport from trunk r228002. 2015-09-22 Matthew Wahab <matthew.wahab@arm.com> * config/aarch64/aarch64-protos.h (aarch64_gen_atomic_ldop): Adjust declaration. * config/aarch64/aarch64.c (aarch64_emit_bic): New. (aarch64_gen_atomic_ldop): Adjust comment. Add parameter out_result. Update to support update-fetch operations. * config/aarch64/atomics.md (aarch64_atomic_exchange<mode>_lse): Adjust for change to aarch64_gen_atomic_ldop. (aarch64_atomic_<atomic_optab><mode>_lse): Likewise. (aarch64_atomic_fetch_<atomic_optab><mode>_lse): Likewise. (atomic_<atomic_optab>_fetch<mode>): Change to an expander. (aarch64_atomic_<atomic_optab>_fetch<mode>): New. (aarch64_atomic_<atomic_optab>_fetch<mode>_lse): New. gcc/testsuite/ Backport from trunk r228002. 2015-09-22 Matthew Wahab <matthew.wahab@arm.com> * gcc.target/aarch64/atomic-inst-ldadd.c: Add tests for update-fetch operations. * gcc.target/aarch64/atomic-inst-ldlogic.c: Likewise. Change-Id: I4a24400be8d994e4a2dc79aff2c1e5b8d68adaed
-rw-r--r--gcc/config/aarch64/aarch64-arches.def1
-rw-r--r--gcc/config/aarch64/aarch64-option-extensions.def10
-rw-r--r--gcc/config/aarch64/aarch64-protos.h4
-rw-r--r--gcc/config/aarch64/aarch64.c341
-rw-r--r--gcc/config/aarch64/aarch64.h12
-rw-r--r--gcc/config/aarch64/aarch64.md2
-rw-r--r--gcc/config/aarch64/atomics.md375
-rw-r--r--gcc/doc/invoke.texi83
-rw-r--r--gcc/testsuite/gcc.target/aarch64/atomic-inst-cas.c61
-rw-r--r--gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c87
-rw-r--r--gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c155
-rw-r--r--gcc/testsuite/gcc.target/aarch64/atomic-inst-ops.inc66
-rw-r--r--gcc/testsuite/gcc.target/aarch64/atomic-inst-swp.c44
13 files changed, 1176 insertions, 65 deletions
diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
index bf4e185cda9..abbfce66bfe 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -27,3 +27,4 @@
the flags implied by the architecture. */
AARCH64_ARCH("armv8-a", generic, 8, AARCH64_FL_FOR_ARCH8)
+AARCH64_ARCH("armv8.1-a", generic, 8, AARCH64_FL_FOR_ARCH8_1)
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index f296296e9db..1762cc8d58f 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -39,7 +39,11 @@
AArch64, and therefore serves as a template for adding more CPUs in the
future. */
-AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO, "fp")
-AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO, "asimd")
-AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO, "aes pmull sha1 sha2")
+AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, "fp")
+AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, "asimd")
+AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO, "aes pmull sha1 sha2")
AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, AARCH64_FL_CRC, "crc32")
+AARCH64_OPT_EXTENSION("lse", AARCH64_FL_LSE, AARCH64_FL_LSE, "lse")
+AARCH64_OPT_EXTENSION("pan", AARCH64_FL_PAN, AARCH64_FL_PAN, "pan")
+AARCH64_OPT_EXTENSION("lor", AARCH64_FL_LOR, AARCH64_FL_LOR, "lor")
+AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA | AARCH64_FL_FPSIMD, AARCH64_FL_RDMA, "rdma")
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 965a11b7bee..2bb59b1bcb4 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -300,6 +300,10 @@ rtx aarch64_load_tp (rtx);
void aarch64_expand_compare_and_swap (rtx op[]);
void aarch64_split_compare_and_swap (rtx op[]);
+void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx);
+
+bool aarch64_atomic_ldop_supported_p (enum rtx_code);
+void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
bool aarch64_gen_adjusted_ldpstp (rtx *, bool, enum machine_mode, RTX_CODE);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index aaa825ce32f..3851434cf86 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9362,7 +9362,23 @@ aarch64_expand_compare_and_swap (rtx operands[])
{
rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
machine_mode mode, cmp_mode;
- rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+ typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+ int idx;
+ gen_cas_fn gen;
+ const gen_cas_fn split_cas[] =
+ {
+ gen_aarch64_compare_and_swapqi,
+ gen_aarch64_compare_and_swaphi,
+ gen_aarch64_compare_and_swapsi,
+ gen_aarch64_compare_and_swapdi
+ };
+ const gen_cas_fn atomic_cas[] =
+ {
+ gen_aarch64_compare_and_swapqi_lse,
+ gen_aarch64_compare_and_swaphi_lse,
+ gen_aarch64_compare_and_swapsi_lse,
+ gen_aarch64_compare_and_swapdi_lse
+ };
bval = operands[0];
rval = operands[1];
@@ -9407,13 +9423,17 @@ aarch64_expand_compare_and_swap (rtx operands[])
switch (mode)
{
- case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
- case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
- case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
- case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
+ case QImode: idx = 0; break;
+ case HImode: idx = 1; break;
+ case SImode: idx = 2; break;
+ case DImode: idx = 3; break;
default:
gcc_unreachable ();
}
+ if (TARGET_LSE)
+ gen = atomic_cas[idx];
+ else
+ gen = split_cas[idx];
emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
@@ -9425,6 +9445,32 @@ aarch64_expand_compare_and_swap (rtx operands[])
emit_insn (gen_rtx_SET (VOIDmode, bval, x));
}
+/* Test whether the target supports using a atomic load-operate instruction.
+ CODE is the operation and AFTER is TRUE if the data in memory after the
+ operation should be returned and FALSE if the data before the operation
+ should be returned. Returns FALSE if the operation isn't supported by the
+ architecture. */
+
+bool
+aarch64_atomic_ldop_supported_p (enum rtx_code code)
+{
+ if (!TARGET_LSE)
+ return false;
+
+ switch (code)
+ {
+ case SET:
+ case AND:
+ case IOR:
+ case XOR:
+ case MINUS:
+ case PLUS:
+ return true;
+ default:
+ return false;
+ }
+}
+
/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
sequence implementing an atomic operation. */
@@ -9442,6 +9488,42 @@ aarch64_emit_post_barrier (enum memmodel model)
}
}
+/* Emit an atomic compare-and-swap operation. RVAL is the destination register
+ for the data in memory. EXPECTED is the value expected to be in memory.
+ DESIRED is the value to store to memory. MEM is the memory location. MODEL
+ is the memory ordering to use. */
+
+void
+aarch64_gen_atomic_cas (rtx rval, rtx mem,
+ rtx expected, rtx desired,
+ rtx model)
+{
+ rtx (*gen) (rtx, rtx, rtx, rtx);
+ machine_mode mode;
+
+ mode = GET_MODE (mem);
+
+ switch (mode)
+ {
+ case QImode: gen = gen_aarch64_atomic_casqi; break;
+ case HImode: gen = gen_aarch64_atomic_cashi; break;
+ case SImode: gen = gen_aarch64_atomic_cassi; break;
+ case DImode: gen = gen_aarch64_atomic_casdi; break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Move the expected value into the CAS destination register. */
+ emit_insn (gen_rtx_SET (VOIDmode, rval, expected));
+
+ /* Emit the CAS. */
+ emit_insn (gen (rval, mem, desired, model));
+
+ /* Compare the expected value with the value loaded by the CAS, to establish
+ whether the swap was made. */
+ aarch64_gen_compare_reg (EQ, rval, expected);
+}
+
/* Split a compare and swap pattern. */
void
@@ -9510,11 +9592,257 @@ aarch64_split_compare_and_swap (rtx operands[])
aarch64_emit_post_barrier (model);
}
+/* Emit a BIC instruction. */
+
+static void
+aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
+{
+ rtx shift_rtx = GEN_INT (shift);
+ rtx (*gen) (rtx, rtx, rtx, rtx);
+
+ switch (mode)
+ {
+ case SImode: gen = gen_and_one_cmpl_lshrsi3; break;
+ case DImode: gen = gen_and_one_cmpl_lshrdi3; break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_insn (gen (dst, s2, shift_rtx, s1));
+}
+
+/* Emit an atomic swap. */
+
+static void
+aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
+ rtx mem, rtx model)
+{
+ rtx (*gen) (rtx, rtx, rtx, rtx);
+
+ switch (mode)
+ {
+ case QImode: gen = gen_aarch64_atomic_swpqi; break;
+ case HImode: gen = gen_aarch64_atomic_swphi; break;
+ case SImode: gen = gen_aarch64_atomic_swpsi; break;
+ case DImode: gen = gen_aarch64_atomic_swpdi; break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_insn (gen (dst, mem, value, model));
+}
+
+/* Operations supported by aarch64_emit_atomic_load_op. */
+
+enum aarch64_atomic_load_op_code
+{
+ AARCH64_LDOP_PLUS, /* A + B */
+ AARCH64_LDOP_XOR, /* A ^ B */
+ AARCH64_LDOP_OR, /* A | B */
+ AARCH64_LDOP_BIC /* A & ~B */
+};
+
+/* Emit an atomic load-operate. */
+
+static void
+aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
+ machine_mode mode, rtx dst, rtx src,
+ rtx mem, rtx model)
+{
+ typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
+ const aarch64_atomic_load_op_fn plus[] =
+ {
+ gen_aarch64_atomic_loadaddqi,
+ gen_aarch64_atomic_loadaddhi,
+ gen_aarch64_atomic_loadaddsi,
+ gen_aarch64_atomic_loadadddi
+ };
+ const aarch64_atomic_load_op_fn eor[] =
+ {
+ gen_aarch64_atomic_loadeorqi,
+ gen_aarch64_atomic_loadeorhi,
+ gen_aarch64_atomic_loadeorsi,
+ gen_aarch64_atomic_loadeordi
+ };
+ const aarch64_atomic_load_op_fn ior[] =
+ {
+ gen_aarch64_atomic_loadsetqi,
+ gen_aarch64_atomic_loadsethi,
+ gen_aarch64_atomic_loadsetsi,
+ gen_aarch64_atomic_loadsetdi
+ };
+ const aarch64_atomic_load_op_fn bic[] =
+ {
+ gen_aarch64_atomic_loadclrqi,
+ gen_aarch64_atomic_loadclrhi,
+ gen_aarch64_atomic_loadclrsi,
+ gen_aarch64_atomic_loadclrdi
+ };
+ aarch64_atomic_load_op_fn gen;
+ int idx = 0;
+
+ switch (mode)
+ {
+ case QImode: idx = 0; break;
+ case HImode: idx = 1; break;
+ case SImode: idx = 2; break;
+ case DImode: idx = 3; break;
+ default:
+ gcc_unreachable ();
+ }
+
+ switch (code)
+ {
+ case AARCH64_LDOP_PLUS: gen = plus[idx]; break;
+ case AARCH64_LDOP_XOR: gen = eor[idx]; break;
+ case AARCH64_LDOP_OR: gen = ior[idx]; break;
+ case AARCH64_LDOP_BIC: gen = bic[idx]; break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_insn (gen (dst, mem, src, model));
+}
+
+/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
+ location to store the data read from memory. OUT_RESULT is the location to
+ store the result of the operation. MEM is the memory location to read and
+ modify. MODEL_RTX is the memory ordering to use. VALUE is the second
+ operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
+ be NULL. */
+
+void
+aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
+ rtx mem, rtx value, rtx model_rtx)
+{
+ machine_mode mode = GET_MODE (mem);
+ machine_mode wmode = (mode == DImode ? DImode : SImode);
+ const bool short_mode = (mode < SImode);
+ aarch64_atomic_load_op_code ldop_code;
+ rtx src;
+ rtx x;
+
+ if (out_data)
+ out_data = gen_lowpart (mode, out_data);
+
+ if (out_result)
+ out_result = gen_lowpart (mode, out_result);
+
+ /* Make sure the value is in a register, putting it into a destination
+ register if it needs to be manipulated. */
+ if (!register_operand (value, mode)
+ || code == AND || code == MINUS)
+ {
+ src = out_result ? out_result : out_data;
+ emit_move_insn (src, gen_lowpart (mode, value));
+ }
+ else
+ src = value;
+ gcc_assert (register_operand (src, mode));
+
+ /* Preprocess the data for the operation as necessary. If the operation is
+ a SET then emit a swap instruction and finish. */
+ switch (code)
+ {
+ case SET:
+ aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
+ return;
+
+ case MINUS:
+ /* Negate the value and treat it as a PLUS. */
+ {
+ rtx neg_src;
+
+ /* Resize the value if necessary. */
+ if (short_mode)
+ src = gen_lowpart (wmode, src);
+
+ neg_src = gen_rtx_NEG (wmode, src);
+ emit_insn (gen_rtx_SET (VOIDmode, src, neg_src));
+
+ if (short_mode)
+ src = gen_lowpart (mode, src);
+ }
+ /* Fall-through. */
+ case PLUS:
+ ldop_code = AARCH64_LDOP_PLUS;
+ break;
+
+ case IOR:
+ ldop_code = AARCH64_LDOP_OR;
+ break;
+
+ case XOR:
+ ldop_code = AARCH64_LDOP_XOR;
+ break;
+
+ case AND:
+ {
+ rtx not_src;
+
+ /* Resize the value if necessary. */
+ if (short_mode)
+ src = gen_lowpart (wmode, src);
+
+ not_src = gen_rtx_NOT (wmode, src);
+ emit_insn (gen_rtx_SET (VOIDmode, src, not_src));
+
+ if (short_mode)
+ src = gen_lowpart (mode, src);
+ }
+ ldop_code = AARCH64_LDOP_BIC;
+ break;
+
+ default:
+ /* The operation can't be done with atomic instructions. */
+ gcc_unreachable ();
+ }
+
+ aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
+
+ /* If necessary, calculate the data in memory after the update by redoing the
+ operation from values in registers. */
+ if (!out_result)
+ return;
+
+ if (short_mode)
+ {
+ src = gen_lowpart (wmode, src);
+ out_data = gen_lowpart (wmode, out_data);
+ out_result = gen_lowpart (wmode, out_result);
+ }
+
+ x = NULL_RTX;
+
+ switch (code)
+ {
+ case MINUS:
+ case PLUS:
+ x = gen_rtx_PLUS (wmode, out_data, src);
+ break;
+ case IOR:
+ x = gen_rtx_IOR (wmode, out_data, src);
+ break;
+ case XOR:
+ x = gen_rtx_XOR (wmode, out_data, src);
+ break;
+ case AND:
+ aarch64_emit_bic (wmode, out_result, out_data, src, 0);
+ return;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_set_insn (out_result, x);
+
+ return;
+}
+
/* Split an atomic operation. */
void
aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
- rtx value, rtx model_rtx, rtx cond)
+ rtx value, rtx model_rtx, rtx cond)
{
machine_mode mode = GET_MODE (mem);
machine_mode wmode = (mode == DImode ? DImode : SImode);
@@ -9523,6 +9851,7 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
rtx_code_label *label;
rtx x;
+ /* Split the atomic operation into a sequence. */
label = gen_label_rtx ();
emit_label (label);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 25b9927ab3c..ee49d27723c 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -201,6 +201,11 @@ extern unsigned aarch64_architecture_version;
#define AARCH64_FL_CRC (1 << 3) /* Has CRC. */
/* Has static dispatch of FMA. */
#define AARCH64_FL_USE_FMA_STEERING_PASS (1 << 4)
+/* ARMv8.1 architecture extensions. */
+#define AARCH64_FL_LSE (1 << 5) /* Has Large System Extensions. */
+#define AARCH64_FL_PAN (1 << 6) /* Has Privileged Access Never. */
+#define AARCH64_FL_LOR (1 << 7) /* Has Limited Ordering regions. */
+#define AARCH64_FL_RDMA (1 << 8) /* Has ARMv8.1 Adv.SIMD. */
/* Has FP and SIMD. */
#define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD)
@@ -210,6 +215,9 @@ extern unsigned aarch64_architecture_version;
/* Architecture flags that effect instruction selection. */
#define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD)
+#define AARCH64_FL_FOR_ARCH8_1 \
+ (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_PAN \
+ | AARCH64_FL_LOR | AARCH64_FL_RDMA)
/* Macros to test ISA flags. */
extern unsigned long aarch64_isa_flags;
@@ -217,6 +225,7 @@ extern unsigned long aarch64_isa_flags;
#define AARCH64_ISA_CRYPTO (aarch64_isa_flags & AARCH64_FL_CRYPTO)
#define AARCH64_ISA_FP (aarch64_isa_flags & AARCH64_FL_FP)
#define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD)
+#define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE)
/* Macros to test tuning flags. */
extern unsigned long aarch64_tune_flags;
@@ -229,6 +238,9 @@ extern unsigned long aarch64_tune_flags;
/* CRC instructions that can be enabled through +crc arch extension. */
#define TARGET_CRC32 (AARCH64_ISA_CRC)
+/* Atomic instructions that can be enabled through the +lse extension. */
+#define TARGET_LSE (AARCH64_ISA_LSE)
+
/* Standard register usage. */
/* 31 64-bit general purpose registers R0-R30:
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 8dd0b02a47f..e234000db0e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3262,7 +3262,7 @@
[(set_attr "type" "logics_reg")]
)
-(define_insn "*<LOGICAL:optab>_one_cmpl_<SHIFT:optab><mode>3"
+(define_insn "<LOGICAL:optab>_one_cmpl_<SHIFT:optab><mode>3"
[(set (match_operand:GPI 0 "register_operand" "=r")
(LOGICAL:GPI (not:GPI
(SHIFT:GPI
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index 6e6be99a300..e7ac5f6fc1c 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -26,9 +26,28 @@
UNSPECV_STL ; Represent an atomic store or store-release.
UNSPECV_ATOMIC_CMPSW ; Represent an atomic compare swap.
UNSPECV_ATOMIC_EXCHG ; Represent an atomic exchange.
+ UNSPECV_ATOMIC_CAS ; Represent an atomic CAS.
+ UNSPECV_ATOMIC_SWP ; Represent an atomic SWP.
UNSPECV_ATOMIC_OP ; Represent an atomic operation.
+ UNSPECV_ATOMIC_LDOP ; Represent an atomic load-operation
+ UNSPECV_ATOMIC_LDOP_OR ; Represent an atomic load-or
+ UNSPECV_ATOMIC_LDOP_BIC ; Represent an atomic load-bic
+ UNSPECV_ATOMIC_LDOP_XOR ; Represent an atomic load-xor
+ UNSPECV_ATOMIC_LDOP_PLUS ; Represent an atomic load-add
])
+;; Iterators for load-operate instructions.
+
+(define_int_iterator ATOMIC_LDOP
+ [UNSPECV_ATOMIC_LDOP_OR UNSPECV_ATOMIC_LDOP_BIC
+ UNSPECV_ATOMIC_LDOP_XOR UNSPECV_ATOMIC_LDOP_PLUS])
+
+(define_int_attr atomic_ldop
+ [(UNSPECV_ATOMIC_LDOP_OR "set") (UNSPECV_ATOMIC_LDOP_BIC "clr")
+ (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
+
+;; Instruction patterns.
+
(define_expand "atomic_compare_and_swap<mode>"
[(match_operand:SI 0 "register_operand" "") ;; bool out
(match_operand:ALLI 1 "register_operand" "") ;; val out
@@ -45,10 +64,10 @@
}
)
-(define_insn_and_split "atomic_compare_and_swap<mode>_1"
+(define_insn_and_split "aarch64_compare_and_swap<mode>"
[(set (reg:CC CC_REGNUM) ;; bool out
(unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
- (set (match_operand:SI 0 "register_operand" "=&r") ;; val out
+ (set (match_operand:SI 0 "register_operand" "=&r") ;; val out
(zero_extend:SI
(match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory
(set (match_dup 1)
@@ -57,7 +76,7 @@
(match_operand:SHORT 3 "register_operand" "r") ;; desired
(match_operand:SI 4 "const_int_operand") ;; is_weak
(match_operand:SI 5 "const_int_operand") ;; mod_s
- (match_operand:SI 6 "const_int_operand")] ;; mod_f
+ (match_operand:SI 6 "const_int_operand")] ;; mod_f
UNSPECV_ATOMIC_CMPSW))
(clobber (match_scratch:SI 7 "=&r"))]
""
@@ -70,17 +89,17 @@
}
)
-(define_insn_and_split "atomic_compare_and_swap<mode>_1"
+(define_insn_and_split "aarch64_compare_and_swap<mode>"
[(set (reg:CC CC_REGNUM) ;; bool out
(unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
(set (match_operand:GPI 0 "register_operand" "=&r") ;; val out
- (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
+ (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
(set (match_dup 1)
(unspec_volatile:GPI
[(match_operand:GPI 2 "aarch64_plus_operand" "rI") ;; expect
(match_operand:GPI 3 "register_operand" "r") ;; desired
- (match_operand:SI 4 "const_int_operand") ;; is_weak
- (match_operand:SI 5 "const_int_operand") ;; mod_s
+ (match_operand:SI 4 "const_int_operand") ;; is_weak
+ (match_operand:SI 5 "const_int_operand") ;; mod_s
(match_operand:SI 6 "const_int_operand")] ;; mod_f
UNSPECV_ATOMIC_CMPSW))
(clobber (match_scratch:SI 7 "=&r"))]
@@ -94,7 +113,79 @@
}
)
-(define_insn_and_split "atomic_exchange<mode>"
+(define_insn_and_split "aarch64_compare_and_swap<mode>_lse"
+ [(set (reg:CC CC_REGNUM) ;; bool out
+ (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
+ (set (match_operand:SI 0 "register_operand" "=&r") ;; val out
+ (zero_extend:SI
+ (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory
+ (set (match_dup 1)
+ (unspec_volatile:SHORT
+ [(match_operand:SI 2 "aarch64_plus_operand" "rI") ;; expected
+ (match_operand:SHORT 3 "register_operand" "r") ;; desired
+ (match_operand:SI 4 "const_int_operand") ;; is_weak
+ (match_operand:SI 5 "const_int_operand") ;; mod_s
+ (match_operand:SI 6 "const_int_operand")] ;; mod_f
+ UNSPECV_ATOMIC_CMPSW))]
+ "TARGET_LSE"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ {
+ aarch64_gen_atomic_cas (operands[0], operands[1],
+ operands[2], operands[3],
+ operands[5]);
+ DONE;
+ }
+)
+
+(define_insn_and_split "aarch64_compare_and_swap<mode>_lse"
+ [(set (reg:CC CC_REGNUM) ;; bool out
+ (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
+ (set (match_operand:GPI 0 "register_operand" "=&r") ;; val out
+ (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
+ (set (match_dup 1)
+ (unspec_volatile:GPI
+ [(match_operand:GPI 2 "aarch64_plus_operand" "rI") ;; expect
+ (match_operand:GPI 3 "register_operand" "r") ;; desired
+ (match_operand:SI 4 "const_int_operand") ;; is_weak
+ (match_operand:SI 5 "const_int_operand") ;; mod_s
+ (match_operand:SI 6 "const_int_operand")] ;; mod_f
+ UNSPECV_ATOMIC_CMPSW))]
+ "TARGET_LSE"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ {
+ aarch64_gen_atomic_cas (operands[0], operands[1],
+ operands[2], operands[3],
+ operands[5]);
+ DONE;
+ }
+)
+
+(define_expand "atomic_exchange<mode>"
+ [(match_operand:ALLI 0 "register_operand" "")
+ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
+ (match_operand:ALLI 2 "register_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")]
+ ""
+ {
+ rtx (*gen) (rtx, rtx, rtx, rtx);
+
+ /* Use an atomic SWP when available. */
+ if (TARGET_LSE)
+ gen = gen_aarch64_atomic_exchange<mode>_lse;
+ else
+ gen = gen_aarch64_atomic_exchange<mode>;
+
+ emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
+
+ DONE;
+ }
+)
+
+(define_insn_and_split "aarch64_atomic_exchange<mode>"
[(set (match_operand:ALLI 0 "register_operand" "=&r") ;; output
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
(set (match_dup 1)
@@ -110,28 +201,87 @@
[(const_int 0)]
{
aarch64_split_atomic_op (SET, operands[0], NULL, operands[1],
- operands[2], operands[3], operands[4]);
+ operands[2], operands[3], operands[4]);
+ DONE;
+ }
+)
+
+(define_insn_and_split "aarch64_atomic_exchange<mode>_lse"
+ [(set (match_operand:ALLI 0 "register_operand" "=&r")
+ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
+ (set (match_dup 1)
+ (unspec_volatile:ALLI
+ [(match_operand:ALLI 2 "register_operand" "r")
+ (match_operand:SI 3 "const_int_operand" "")]
+ UNSPECV_ATOMIC_EXCHG))]
+ "TARGET_LSE"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ {
+ aarch64_gen_atomic_ldop (SET, operands[0], NULL, operands[1],
+ operands[2], operands[3]);
+ DONE;
+ }
+)
+
+(define_expand "atomic_<atomic_optab><mode>"
+ [(match_operand:ALLI 0 "aarch64_sync_memory_operand" "")
+ (atomic_op:ALLI
+ (match_operand:ALLI 1 "<atomic_op_operand>" "")
+ (match_operand:SI 2 "const_int_operand"))]
+ ""
+ {
+ rtx (*gen) (rtx, rtx, rtx);
+
+ /* Use an atomic load-operate instruction when possible. */
+ if (aarch64_atomic_ldop_supported_p (<CODE>))
+ gen = gen_aarch64_atomic_<atomic_optab><mode>_lse;
+ else
+ gen = gen_aarch64_atomic_<atomic_optab><mode>;
+
+ emit_insn (gen (operands[0], operands[1], operands[2]));
+
DONE;
}
)
-(define_insn_and_split "atomic_<atomic_optab><mode>"
+(define_insn_and_split "aarch64_atomic_<atomic_optab><mode>"
+ [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
+ (unspec_volatile:ALLI
+ [(atomic_op:ALLI (match_dup 0)
+ (match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>"))
+ (match_operand:SI 2 "const_int_operand")]
+ UNSPECV_ATOMIC_OP))
+ (clobber (reg:CC CC_REGNUM))
+ (clobber (match_scratch:ALLI 3 "=&r"))
+ (clobber (match_scratch:SI 4 "=&r"))]
+ ""
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ {
+ aarch64_split_atomic_op (<CODE>, NULL, operands[3], operands[0],
+ operands[1], operands[2], operands[4]);
+ DONE;
+ }
+)
+
+(define_insn_and_split "aarch64_atomic_<atomic_optab><mode>_lse"
[(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
(unspec_volatile:ALLI
[(atomic_op:ALLI (match_dup 0)
(match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>"))
- (match_operand:SI 2 "const_int_operand")] ;; model
+ (match_operand:SI 2 "const_int_operand")]
UNSPECV_ATOMIC_OP))
- (clobber (reg:CC CC_REGNUM))
- (clobber (match_scratch:ALLI 3 "=&r"))
- (clobber (match_scratch:SI 4 "=&r"))]
- ""
+ (clobber (match_scratch:ALLI 3 "=&r"))]
+ "TARGET_LSE"
"#"
"&& reload_completed"
[(const_int 0)]
{
- aarch64_split_atomic_op (<CODE>, NULL, operands[3], operands[0],
- operands[1], operands[2], operands[4]);
+ aarch64_gen_atomic_ldop (<CODE>, operands[3], NULL, operands[0],
+ operands[1], operands[2]);
DONE;
}
)
@@ -158,7 +308,30 @@
}
)
-(define_insn_and_split "atomic_fetch_<atomic_optab><mode>"
+;; Load-operate-store, returning the updated memory data.
+
+(define_expand "atomic_fetch_<atomic_optab><mode>"
+ [(match_operand:ALLI 0 "register_operand" "")
+ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
+ (atomic_op:ALLI
+ (match_operand:ALLI 2 "<atomic_op_operand>" "")
+ (match_operand:SI 3 "const_int_operand"))]
+ ""
+{
+ rtx (*gen) (rtx, rtx, rtx, rtx);
+
+ /* Use an atomic load-operate instruction when possible. */
+ if (aarch64_atomic_ldop_supported_p (<CODE>))
+ gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>_lse;
+ else
+ gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>;
+
+ emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
+
+ DONE;
+})
+
+(define_insn_and_split "aarch64_atomic_fetch_<atomic_optab><mode>"
[(set (match_operand:ALLI 0 "register_operand" "=&r")
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
(set (match_dup 1)
@@ -181,6 +354,26 @@
}
)
+(define_insn_and_split "aarch64_atomic_fetch_<atomic_optab><mode>_lse"
+ [(set (match_operand:ALLI 0 "register_operand" "=&r")
+ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
+ (set (match_dup 1)
+ (unspec_volatile:ALLI
+ [(atomic_op:ALLI (match_dup 1)
+ (match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>"))
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPECV_ATOMIC_LDOP))]
+ "TARGET_LSE"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ {
+ aarch64_gen_atomic_ldop (<CODE>, operands[0], NULL, operands[1],
+ operands[2], operands[3]);
+ DONE;
+ }
+)
+
(define_insn_and_split "atomic_fetch_nand<mode>"
[(set (match_operand:ALLI 0 "register_operand" "=&r")
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
@@ -205,7 +398,31 @@
}
)
-(define_insn_and_split "atomic_<atomic_optab>_fetch<mode>"
+;; Load-operate-store, returning the original memory data.
+
+(define_expand "atomic_<atomic_optab>_fetch<mode>"
+ [(match_operand:ALLI 0 "register_operand" "")
+ (atomic_op:ALLI
+ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
+ (match_operand:ALLI 2 "<atomic_op_operand>" ""))
+ (match_operand:SI 3 "const_int_operand")]
+ ""
+{
+ rtx (*gen) (rtx, rtx, rtx, rtx);
+ rtx value = operands[2];
+
+ /* Use an atomic load-operate instruction when possible. */
+ if (aarch64_atomic_ldop_supported_p (<CODE>))
+ gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>_lse;
+ else
+ gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>;
+
+ emit_insn (gen (operands[0], operands[1], value, operands[3]));
+
+ DONE;
+})
+
+(define_insn_and_split "aarch64_atomic_<atomic_optab>_fetch<mode>"
[(set (match_operand:ALLI 0 "register_operand" "=&r")
(atomic_op:ALLI
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
@@ -228,6 +445,29 @@
}
)
+(define_insn_and_split "aarch64_atomic_<atomic_optab>_fetch<mode>_lse"
+ [(set (match_operand:ALLI 0 "register_operand" "=&r")
+ (atomic_op:ALLI
+ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
+ (match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>")))
+ (set (match_dup 1)
+ (unspec_volatile:ALLI
+ [(match_dup 1)
+ (match_dup 2)
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPECV_ATOMIC_LDOP))
+ (clobber (match_scratch:ALLI 4 "=r"))]
+ "TARGET_LSE"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ {
+ aarch64_gen_atomic_ldop (<CODE>, operands[4], operands[0], operands[1],
+ operands[2], operands[3]);
+ DONE;
+ }
+)
+
(define_insn_and_split "atomic_nand_fetch<mode>"
[(set (match_operand:ALLI 0 "register_operand" "=&r")
(not:ALLI
@@ -370,3 +610,100 @@
return "dmb\\tish";
}
)
+
+;; ARMv8.1 LSE instructions.
+
+;; Atomic swap with memory.
+(define_insn "aarch64_atomic_swp<mode>"
+ [(set (match_operand:ALLI 0 "register_operand" "+&r")
+ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
+ (set (match_dup 1)
+ (unspec_volatile:ALLI
+ [(match_operand:ALLI 2 "register_operand" "r")
+ (match_operand:SI 3 "const_int_operand" "")]
+ UNSPECV_ATOMIC_SWP))]
+ "TARGET_LSE && reload_completed"
+ {
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+ if (is_mm_relaxed (model))
+ return "swp<atomic_sfx>\t%<w>2, %<w>0, %1";
+ else if (is_mm_acquire (model) || is_mm_consume (model))
+ return "swpa<atomic_sfx>\t%<w>2, %<w>0, %1";
+ else if (is_mm_release (model))
+ return "swpl<atomic_sfx>\t%<w>2, %<w>0, %1";
+ else
+ return "swpal<atomic_sfx>\t%<w>2, %<w>0, %1";
+ })
+
+;; Atomic compare-and-swap: HI and smaller modes.
+
+(define_insn "aarch64_atomic_cas<mode>"
+ [(set (match_operand:SI 0 "register_operand" "+&r") ;; out
+ (zero_extend:SI
+ (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory.
+ (set (match_dup 1)
+ (unspec_volatile:SHORT
+ [(match_dup 0)
+ (match_operand:SHORT 2 "register_operand" "r") ;; value.
+ (match_operand:SI 3 "const_int_operand" "")] ;; model.
+ UNSPECV_ATOMIC_CAS))]
+ "TARGET_LSE && reload_completed"
+{
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+ if (is_mm_relaxed (model))
+ return "cas<atomic_sfx>\t%<w>0, %<w>2, %1";
+ else if (is_mm_acquire (model) || is_mm_consume (model))
+ return "casa<atomic_sfx>\t%<w>0, %<w>2, %1";
+ else if (is_mm_release (model))
+ return "casl<atomic_sfx>\t%<w>0, %<w>2, %1";
+ else
+ return "casal<atomic_sfx>\t%<w>0, %<w>2, %1";
+})
+
+;; Atomic compare-and-swap: SI and larger modes.
+
+(define_insn "aarch64_atomic_cas<mode>"
+ [(set (match_operand:GPI 0 "register_operand" "+&r") ;; out
+ (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory.
+ (set (match_dup 1)
+ (unspec_volatile:GPI
+ [(match_dup 0)
+ (match_operand:GPI 2 "register_operand" "r") ;; value.
+ (match_operand:SI 3 "const_int_operand" "")] ;; model.
+ UNSPECV_ATOMIC_CAS))]
+ "TARGET_LSE && reload_completed"
+{
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+ if (is_mm_relaxed (model))
+ return "cas<atomic_sfx>\t%<w>0, %<w>2, %1";
+ else if (is_mm_acquire (model) || is_mm_consume (model))
+ return "casa<atomic_sfx>\t%<w>0, %<w>2, %1";
+ else if (is_mm_release (model))
+ return "casl<atomic_sfx>\t%<w>0, %<w>2, %1";
+ else
+ return "casal<atomic_sfx>\t%<w>0, %<w>2, %1";
+})
+
+;; Atomic load-op: Load data, operate, store result, keep data.
+
+(define_insn "aarch64_atomic_load<atomic_ldop><mode>"
+ [(set (match_operand:ALLI 0 "register_operand" "=r")
+ (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
+ (set (match_dup 1)
+ (unspec_volatile:ALLI
+ [(match_dup 1)
+ (match_operand:ALLI 2 "register_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ ATOMIC_LDOP))]
+ "TARGET_LSE && reload_completed"
+ {
+ enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+ if (is_mm_relaxed (model))
+ return "ld<atomic_ldop><atomic_sfx>\t%<w>2, %<w>0, %1";
+ else if (is_mm_acquire (model) || is_mm_consume (model))
+ return "ld<atomic_ldop>a<atomic_sfx>\t%<w>2, %<w>0, %1";
+ else if (is_mm_release (model))
+ return "ld<atomic_ldop>l<atomic_sfx>\t%<w>2, %<w>0, %1";
+ else
+ return "ld<atomic_ldop>al<atomic_sfx>\t%<w>2, %<w>0, %1";
+ })
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 4e44d80e81f..23ec319efbc 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -12326,24 +12326,26 @@ corresponding flag to the linker.
@opindex march
Specify the name of the target architecture, optionally suffixed by one or
more feature modifiers. This option has the form
-@option{-march=@var{arch}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}, where the
-only permissible value for @var{arch} is @samp{armv8-a}.
-The permissible values for @var{feature} are documented in the sub-section
-below. Additionally on native AArch64 GNU/Linux systems the value
+@option{-march=@var{arch}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}.
+
+The permissible values for @var{arch} are @samp{armv8-a} or
+@samp{armv8.1-a}.
+
+For the permissible values for @var{feature}, see the sub-section on
+@ref{aarch64-feature-modifiers,,@option{-march} and @option{-mcpu}
+Feature Modifiers}. Where conflicting feature modifiers are
+specified, the right-most feature is used.
+
+Additionally on native AArch64 GNU/Linux systems the value
@samp{native} is available. This option causes the compiler to pick the
architecture of the host system. If the compiler is unable to recognize the
architecture of the host system this option has no effect.
-Where conflicting feature modifiers are specified, the right-most feature is
-used.
-
-GCC uses this name to determine what kind of instructions it can emit when
-generating assembly code.
-
-Where @option{-march} is specified without either of @option{-mtune}
-or @option{-mcpu} also being specified, the code is tuned to perform
-well across a range of target processors implementing the target
-architecture.
+GCC uses @var{name} to determine what kind of instructions it can emit
+when generating assembly code. If @option{-march} is specified
+without either of @option{-mtune} or @option{-mcpu} also being
+specified, the code is tuned to perform well across a range of target
+processors implementing the target architecture.
@item -mtune=@var{name}
@opindex mtune
@@ -12356,12 +12358,11 @@ Additionally, this option can specify that GCC should tune the performance
of the code for a big.LITTLE system. Permissible values for this
option are: @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53}.
-Additionally on native AArch64 GNU/Linux systems the value @samp{native}
-is available.
-This option causes the compiler to pick the architecture of and tune the
-performance of the code for the processor of the host system.
-If the compiler is unable to recognize the processor of the host system
-this option has no effect.
+Additionally on native AArch64 GNU/Linux systems the value
+@samp{native} is available. This option causes the compiler to pick
+the architecture of and tune the performance of the code for the
+processor of the host system. If the compiler is unable to recognize
+the processor of the host system this option has no effect.
Where none of @option{-mtune=}, @option{-mcpu=} or @option{-march=}
are specified, the code is tuned to perform well across a range
@@ -12371,23 +12372,23 @@ This option cannot be suffixed by feature modifiers.
@item -mcpu=@var{name}
@opindex mcpu
-Specify the name of the target processor, optionally suffixed by one or more
-feature modifiers. This option has the form
-@option{-mcpu=@var{cpu}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}, where the
-permissible values for @var{cpu} are the same as those available for
-@option{-mtune}. Additionally on native AArch64 GNU/Linux systems the
-value @samp{native} is available.
-This option causes the compiler to tune the performance of the code for the
-processor of the host system. If the compiler is unable to recognize the
-processor of the host system this option has no effect.
-
-The permissible values for @var{feature} are documented in the sub-section
-below.
-
-Where conflicting feature modifiers are specified, the right-most feature is
-used.
+Specify the name of the target processor, optionally suffixed by one
+or more feature modifiers. This option has the form
+@option{-mcpu=@var{cpu}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}, where
+the permissible values for @var{cpu} are the same as those available
+for @option{-mtune}. The permissible values for @var{feature} are
+documented in the sub-section on
+@ref{aarch64-feature-modifiers,,@option{-march} and @option{-mcpu}
+Feature Modifiers}. Where conflicting feature modifiers are
+specified, the right-most feature is used.
+
+Additionally on native AArch64 GNU/Linux systems the value
+@samp{native} is available. This option causes the compiler to tune
+the performance of the code for the processor of the host system. If
+the compiler is unable to recognize the processor of the host system
+this option has no effect.
-GCC uses this name to determine what kind of instructions it can emit when
+GCC uses @var{name} to determine what kind of instructions it can emit when
generating assembly code (as if by @option{-march}) and to determine
the target processor for which to tune for performance (as if
by @option{-mtune}). Where this option is used in conjunction
@@ -12396,6 +12397,7 @@ over the appropriate part of this option.
@end table
@subsubsection @option{-march} and @option{-mcpu} Feature Modifiers
+@anchor{aarch64-feature-modifiers}
@cindex @option{-march} feature modifiers
@cindex @option{-mcpu} feature modifiers
Feature modifiers used with @option{-march} and @option{-mcpu} can be one
@@ -12412,6 +12414,15 @@ Enable floating-point instructions.
Enable Advanced SIMD instructions. This implies floating-point instructions
are enabled. This is the default for all current possible values for options
@option{-march} and @option{-mcpu=}.
+@item lse
+Enable Large System Extension instructions.
+@item pan
+Enable Privileged Access Never support.
+@item lor
+Enable Limited Ordering Regions support.
+@item rdma
+Enable ARMv8.1 Advanced SIMD instructions. This implies Advanced SIMD
+is enabled.
@end table
@node Adapteva Epiphany Options
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-inst-cas.c b/gcc/testsuite/gcc.target/aarch64/atomic-inst-cas.c
new file mode 100644
index 00000000000..f40af62bc1f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-inst-cas.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8-a+lse -fno-ipa-icf" } */
+
+/* Test ARMv8.1-A CAS instruction. */
+
+#include "atomic-inst-ops.inc"
+
+#define TEST TEST_TWO
+
+#define CAS_ATOMIC(FN, TY, MODEL1, MODEL2) \
+ int FNNAME (FN, TY) (TY* val, TY* foo, TY* bar) \
+ { \
+ int model_s = MODEL1; \
+ int model_f = MODEL2; \
+ /* The success memory ordering must be at least as strong as \
+ the failure memory ordering. */ \
+ if (model_s < model_f) \
+ return 0; \
+ /* Ignore invalid memory orderings. */ \
+ if (model_f == __ATOMIC_RELEASE || model_f == __ATOMIC_ACQ_REL) \
+ return 0; \
+ return __atomic_compare_exchange_n (val, foo, bar, 0, model_s, model_f); \
+ }
+
+#define CAS_ATOMIC_NORETURN(FN, TY, MODEL1, MODEL2) \
+ void FNNAME (FN, TY) (TY* val, TY* foo, TY* bar) \
+ { \
+ int model_s = MODEL1; \
+ int model_f = MODEL2; \
+ /* The success memory ordering must be at least as strong as \
+ the failure memory ordering. */ \
+ if (model_s < model_f) \
+ return; \
+ /* Ignore invalid memory orderings. */ \
+ if (model_f == __ATOMIC_RELEASE || model_f == __ATOMIC_ACQ_REL) \
+ return; \
+ __atomic_compare_exchange_n (val, foo, bar, 0, model_s, model_f); \
+ }
+
+TEST (cas_atomic, CAS_ATOMIC)
+TEST (cas_atomic_noreturn, CAS_ATOMIC_NORETURN)
+
+
+/* { dg-final { scan-assembler-times "casb\t" 4} } */
+/* { dg-final { scan-assembler-times "casab\t" 20} } */
+/* { dg-final { scan-assembler-times "caslb\t" 4} } */
+/* { dg-final { scan-assembler-times "casalb\t" 36} } */
+
+/* { dg-final { scan-assembler-times "cash\t" 4} } */
+/* { dg-final { scan-assembler-times "casah\t" 20} } */
+/* { dg-final { scan-assembler-times "caslh\t" 4} } */
+/* { dg-final { scan-assembler-times "casalh\t" 36} } */
+
+/* { dg-final { scan-assembler-times "cas\t" 8} } */
+/* { dg-final { scan-assembler-times "casa\t" 40} } */
+/* { dg-final { scan-assembler-times "casl\t" 8} } */
+/* { dg-final { scan-assembler-times "casal\t" 72} } */
+
+/* { dg-final { scan-assembler-not "ldaxr\t" } } */
+/* { dg-final { scan-assembler-not "stlxr\t" } } */
+/* { dg-final { scan-assembler-not "dmb" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c
new file mode 100644
index 00000000000..875c006f8cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c
@@ -0,0 +1,87 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8-a+lse -fno-ipa-icf" } */
+
+/* Test ARMv8.1-A Load-ADD instruction. */
+
+#include "atomic-inst-ops.inc"
+
+#define TEST TEST_ONE
+
+#define LOAD_ADD(FN, TY, MODEL) \
+ TY FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ return __atomic_fetch_add (val, foo, MODEL); \
+ }
+
+#define LOAD_ADD_NORETURN(FN, TY, MODEL) \
+ void FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ __atomic_fetch_add (val, foo, MODEL); \
+ }
+
+#define LOAD_SUB(FN, TY, MODEL) \
+ TY FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ return __atomic_fetch_sub (val, foo, MODEL); \
+ }
+
+#define LOAD_SUB_NORETURN(FN, TY, MODEL) \
+ void FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ __atomic_fetch_sub (val, foo, MODEL); \
+ }
+
+#define ADD_LOAD(FN, TY, MODEL) \
+ TY FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ return __atomic_add_fetch (val, foo, MODEL); \
+ }
+
+#define ADD_LOAD_NORETURN(FN, TY, MODEL) \
+ void FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ __atomic_add_fetch (val, foo, MODEL); \
+ }
+
+#define SUB_LOAD(FN, TY, MODEL) \
+ TY FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ return __atomic_sub_fetch (val, foo, MODEL); \
+ }
+
+#define SUB_LOAD_NORETURN(FN, TY, MODEL) \
+ void FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ __atomic_sub_fetch (val, foo, MODEL); \
+ }
+
+TEST (load_add, LOAD_ADD)
+TEST (load_add_notreturn, LOAD_ADD_NORETURN)
+
+TEST (load_sub, LOAD_SUB)
+TEST (load_sub_notreturn, LOAD_SUB_NORETURN)
+
+TEST (add_load, ADD_LOAD)
+TEST (add_load_notreturn, ADD_LOAD_NORETURN)
+
+TEST (sub_load, SUB_LOAD)
+TEST (sub_load_notreturn, SUB_LOAD_NORETURN)
+
+/* { dg-final { scan-assembler-times "ldaddb\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddab\t" 32} } */
+/* { dg-final { scan-assembler-times "ldaddlb\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddalb\t" 32} } */
+
+/* { dg-final { scan-assembler-times "ldaddh\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddah\t" 32} } */
+/* { dg-final { scan-assembler-times "ldaddlh\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddalh\t" 32} } */
+
+/* { dg-final { scan-assembler-times "ldadd\t" 32} } */
+/* { dg-final { scan-assembler-times "ldadda\t" 64} } */
+/* { dg-final { scan-assembler-times "ldaddl\t" 32} } */
+/* { dg-final { scan-assembler-times "ldaddal\t" 64} } */
+
+/* { dg-final { scan-assembler-not "ldaxr\t" } } */
+/* { dg-final { scan-assembler-not "stlxr\t" } } */
+/* { dg-final { scan-assembler-not "dmb" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c
new file mode 100644
index 00000000000..4246121f971
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c
@@ -0,0 +1,155 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8-a+lse -fno-ipa-icf" } */
+
+/* Test ARMv8.1-A LD<logic-op> instruction. */
+
+#include "atomic-inst-ops.inc"
+
+#define TEST TEST_ONE
+
+#define LOAD_OR(FN, TY, MODEL) \
+ TY FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ return __atomic_fetch_or (val, foo, MODEL); \
+ }
+
+#define LOAD_OR_NORETURN(FN, TY, MODEL) \
+ void FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ __atomic_fetch_or (val, foo, MODEL); \
+ }
+
+#define LOAD_AND(FN, TY, MODEL) \
+ TY FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ return __atomic_fetch_and (val, foo, MODEL); \
+ }
+
+#define LOAD_AND_NORETURN(FN, TY, MODEL) \
+ void FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ __atomic_fetch_and (val, foo, MODEL); \
+ }
+
+#define LOAD_XOR(FN, TY, MODEL) \
+ TY FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ return __atomic_fetch_xor (val, foo, MODEL); \
+ }
+
+#define LOAD_XOR_NORETURN(FN, TY, MODEL) \
+ void FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ __atomic_fetch_xor (val, foo, MODEL); \
+ }
+
+#define OR_LOAD(FN, TY, MODEL) \
+ TY FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ return __atomic_or_fetch (val, foo, MODEL); \
+ }
+
+#define OR_LOAD_NORETURN(FN, TY, MODEL) \
+ void FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ __atomic_or_fetch (val, foo, MODEL); \
+ }
+
+#define AND_LOAD(FN, TY, MODEL) \
+ TY FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ return __atomic_and_fetch (val, foo, MODEL); \
+ }
+
+#define AND_LOAD_NORETURN(FN, TY, MODEL) \
+ void FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ __atomic_and_fetch (val, foo, MODEL); \
+ }
+
+#define XOR_LOAD(FN, TY, MODEL) \
+ TY FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ return __atomic_xor_fetch (val, foo, MODEL); \
+ }
+
+#define XOR_LOAD_NORETURN(FN, TY, MODEL) \
+ void FNNAME (FN, TY) (TY* val, TY* foo) \
+ { \
+ __atomic_xor_fetch (val, foo, MODEL); \
+ }
+
+
+TEST (load_or, LOAD_OR)
+TEST (load_or_notreturn, LOAD_OR_NORETURN)
+
+TEST (load_and, LOAD_AND)
+TEST (load_and_notreturn, LOAD_AND_NORETURN)
+
+TEST (load_xor, LOAD_XOR)
+TEST (load_xor_notreturn, LOAD_XOR_NORETURN)
+
+TEST (or_load, OR_LOAD)
+TEST (or_load_notreturn, OR_LOAD_NORETURN)
+
+TEST (and_load, AND_LOAD)
+TEST (and_load_notreturn, AND_LOAD_NORETURN)
+
+TEST (xor_load, XOR_LOAD)
+TEST (xor_load_notreturn, XOR_LOAD_NORETURN)
+
+
+/* Load-OR. */
+
+/* { dg-final { scan-assembler-times "ldsetb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetab\t" 16} } */
+/* { dg-final { scan-assembler-times "ldsetlb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetalb\t" 16} } */
+
+/* { dg-final { scan-assembler-times "ldseth\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetah\t" 16} } */
+/* { dg-final { scan-assembler-times "ldsetlh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetalh\t" 16} } */
+
+/* { dg-final { scan-assembler-times "ldset\t" 16} } */
+/* { dg-final { scan-assembler-times "ldseta\t" 32} } */
+/* { dg-final { scan-assembler-times "ldsetl\t" 16} } */
+/* { dg-final { scan-assembler-times "ldsetal\t" 32} } */
+
+/* Load-AND. */
+
+/* { dg-final { scan-assembler-times "ldclrb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrab\t" 16} } */
+/* { dg-final { scan-assembler-times "ldclrlb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclralb\t" 16} } */
+
+/* { dg-final { scan-assembler-times "ldclrh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrah\t" 16} } */
+/* { dg-final { scan-assembler-times "ldclrlh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclralh\t" 16} } */
+
+/* { dg-final { scan-assembler-times "ldclr\t" 16} */
+/* { dg-final { scan-assembler-times "ldclra\t" 32} } */
+/* { dg-final { scan-assembler-times "ldclrl\t" 16} } */
+/* { dg-final { scan-assembler-times "ldclral\t" 32} } */
+
+/* Load-XOR. */
+
+/* { dg-final { scan-assembler-times "ldeorb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorab\t" 16} } */
+/* { dg-final { scan-assembler-times "ldeorlb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeoralb\t" 16} } */
+
+/* { dg-final { scan-assembler-times "ldeorh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorah\t" 16} } */
+/* { dg-final { scan-assembler-times "ldeorlh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeoralh\t" 16} } */
+
+/* { dg-final { scan-assembler-times "ldeor\t" 16} */
+/* { dg-final { scan-assembler-times "ldeora\t" 32} } */
+/* { dg-final { scan-assembler-times "ldeorl\t" 16} } */
+/* { dg-final { scan-assembler-times "ldeoral\t" 32} } */
+
+/* { dg-final { scan-assembler-not "ldaxr\t" } } */
+/* { dg-final { scan-assembler-not "stlxr\t" } } */
+/* { dg-final { scan-assembler-not "dmb" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ops.inc b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ops.inc
new file mode 100644
index 00000000000..c2fdcba7930
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ops.inc
@@ -0,0 +1,66 @@
+/* Support code for atomic instruction tests. */
+
+/* Define types names without spaces. */
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+typedef long long longlong;
+typedef unsigned long long ulonglong;
+typedef __int128_t int128;
+typedef __uint128_t uint128;
+
+#define FNNAME(NAME,TY) NAME
+
+/* Expand one-model functions. */
+#define TEST_M1(NAME, FN, TY, MODEL, DUMMY) \
+ FN (test_##NAME##_##TY, TY, MODEL)
+
+/* Expand two-model functions. */
+#define TEST_M2(NAME, FN, TY, MODEL1, MODEL2) \
+ FN (test_##NAME##_##TY, TY, MODEL1, MODEL2)
+
+/* Typest to test. */
+#define TEST_TY(NAME, FN, N, MODEL1, MODEL2) \
+ TEST_M##N (NAME, FN, char, MODEL1, MODEL2) \
+ TEST_M##N (NAME, FN, uchar, MODEL1, MODEL2) \
+ TEST_M##N (NAME, FN, short, MODEL1, MODEL2) \
+ TEST_M##N (NAME, FN, ushort, MODEL1, MODEL2) \
+ TEST_M##N (NAME, FN, int, MODEL1, MODEL2) \
+ TEST_M##N (NAME, FN, uint, MODEL1, MODEL2) \
+ TEST_M##N (NAME, FN, longlong, MODEL1, MODEL2) \
+ TEST_M##N (NAME, FN, ulonglong, MODEL1, MODEL2) \
+ TEST_M##N (NAME, FN, int128, MODEL1, MODEL2) \
+ TEST_M##N (NAME, FN, uint128, MODEL1, MODEL2)
+
+/* Models to test. */
+#define TEST_MODEL(NAME, FN, N) \
+ TEST_TY (NAME##_relaxed, FN, N, __ATOMIC_RELAXED, DUMMY) \
+ TEST_TY (NAME##_consume, FN, N, __ATOMIC_CONSUME, DUMMY) \
+ TEST_TY (NAME##_acquire, FN, N, __ATOMIC_ACQUIRE, DUMMY) \
+ TEST_TY (NAME##_release, FN, N, __ATOMIC_RELEASE, DUMMY) \
+ TEST_TY (NAME##_acq_rel, FN, N, __ATOMIC_ACQ_REL, DUMMY) \
+ TEST_TY (NAME##_seq_cst, FN, N, __ATOMIC_SEQ_CST, DUMMY) \
+
+/* Cross-product of models to test. */
+#define TEST_MODEL_M1(NAME, FN, N, M) \
+ TEST_TY (NAME##_relaxed, FN, N, M, __ATOMIC_RELAXED) \
+ TEST_TY (NAME##_consume, FN, N, M, __ATOMIC_CONSUME) \
+ TEST_TY (NAME##_acquire, FN, N, M, __ATOMIC_ACQUIRE) \
+ TEST_TY (NAME##_release, FN, N, M, __ATOMIC_RELEASE) \
+ TEST_TY (NAME##_acq_rel, FN, N, M, __ATOMIC_ACQ_REL) \
+ TEST_TY (NAME##_seq_cst, FN, N, M, __ATOMIC_SEQ_CST) \
+
+#define TEST_MODEL_M2(NAME, FN) \
+ TEST_MODEL_M1 (NAME##_relaxed, FN, 2, __ATOMIC_RELAXED) \
+ TEST_MODEL_M1 (NAME##_consume, FN, 2, __ATOMIC_CONSUME) \
+ TEST_MODEL_M1 (NAME##_acquire, FN, 2, __ATOMIC_ACQUIRE) \
+ TEST_MODEL_M1 (NAME##_release, FN, 2, __ATOMIC_RELEASE) \
+ TEST_MODEL_M1 (NAME##_acq_rel, FN, 2, __ATOMIC_ACQ_REL) \
+ TEST_MODEL_M1 (NAME##_seq_cst, FN, 2, __ATOMIC_SEQ_CST) \
+
+/* Expand functions for a cross-product of memory models and types. */
+#define TEST_TWO(NAME, FN) TEST_MODEL_M2 (NAME, FN)
+
+/* Expand functions for a set of memory models and types. */
+#define TEST_ONE(NAME, FN) TEST_MODEL (NAME, FN, 1)
+
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-inst-swp.c b/gcc/testsuite/gcc.target/aarch64/atomic-inst-swp.c
new file mode 100644
index 00000000000..c42e997bdf7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-inst-swp.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8-a+lse -fno-ipa-icf" } */
+
+/* Test ARMv8.1-A SWP instruction. */
+
+#include "atomic-inst-ops.inc"
+
+#define TEST TEST_ONE
+
+#define SWAP_ATOMIC(FN, TY, MODEL) \
+ TY FNNAME (FN, TY) (TY* val, TY foo) \
+ { \
+ return __atomic_exchange_n (val, foo, MODEL); \
+ }
+
+#define SWAP_ATOMIC_NORETURN(FN, TY, MODEL) \
+ void FNNAME (FN, TY) (TY* val, TY* foo, TY* bar) \
+ { \
+ __atomic_exchange (val, foo, bar, MODEL); \
+ }
+
+
+TEST (swap_atomic, SWAP_ATOMIC)
+TEST (swap_atomic_noreturn, SWAP_ATOMIC_NORETURN)
+
+
+/* { dg-final { scan-assembler-times "swpb\t" 4} } */
+/* { dg-final { scan-assembler-times "swpab\t" 8} } */
+/* { dg-final { scan-assembler-times "swplb\t" 4} } */
+/* { dg-final { scan-assembler-times "swpalb\t" 8} } */
+
+/* { dg-final { scan-assembler-times "swph\t" 4} } */
+/* { dg-final { scan-assembler-times "swpah\t" 8} } */
+/* { dg-final { scan-assembler-times "swplh\t" 4} } */
+/* { dg-final { scan-assembler-times "swpalh\t" 8} } */
+
+/* { dg-final { scan-assembler-times "swp\t" 8} } */
+/* { dg-final { scan-assembler-times "swpa\t" 16} } */
+/* { dg-final { scan-assembler-times "swpl\t" 8} } */
+/* { dg-final { scan-assembler-times "swpal\t" 16} } */
+
+/* { dg-final { scan-assembler-not "ldaxr\t" } } */
+/* { dg-final { scan-assembler-not "stlxr\t" } } */
+/* { dg-final { scan-assembler-not "dmb" } } */