aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--target/arm/helper-mve.h5
-rw-r--r--target/arm/mve.decode10
-rw-r--r--target/arm/mve_helper.c99
-rw-r--r--target/arm/translate-mve.c66
4 files changed, 144 insertions, 36 deletions
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
index df461e2000..02f7e99d6a 100644
--- a/target/arm/helper-mve.h
+++ b/target/arm/helper-mve.h
@@ -65,6 +65,11 @@ DEF_HELPER_FLAGS_4(mve_vstrh_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vstrw_sg_os_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vstrd_sg_os_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrw_sg_wb_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vldrd_sg_wb_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrw_sg_wb_uw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vstrd_sg_wb_ud, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_3(mve_vdup, TCG_CALL_NO_WG, void, env, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vidupb, TCG_CALL_NO_WG, i32, env, ptr, i32, i32)
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
index b0e39f3672..76e9b9c721 100644
--- a/target/arm/mve.decode
+++ b/target/arm/mve.decode
@@ -43,6 +43,7 @@
&vmaxv qm rda size
&vabav qn qm rda size
&vldst_sg qd qm rn size msize os
+&vldst_sg_imm qd qm a w imm
# scatter-gather memory size is in bits 6:4
%sg_msize 6:1 4:1
@@ -54,6 +55,10 @@
@vldst_sg .... .... .... rn:4 .... ... size:2 ... ... os:1 &vldst_sg \
qd=%qd qm=%qm msize=%sg_msize
+# Qm is in the fields usually labeled Qn
+@vldst_sg_imm .... .... a:1 . w:1 . .... .... .... . imm:7 &vldst_sg_imm \
+ qd=%qd qm=%qn
+
@1op .... .... .... size:2 .. .... .... .... .... &1op qd=%qd qm=%qm
@1op_nosz .... .... .... .... .... .... .... .... &1op qd=%qd qm=%qm size=0
@2op .... .... .. size:2 .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn
@@ -148,6 +153,11 @@ VLDR_S_sg 111 0 1100 1 . 01 .... ... 0 111 . .... .... @vldst_sg
VLDR_U_sg 111 1 1100 1 . 01 .... ... 0 111 . .... .... @vldst_sg
VSTR_sg 111 0 1100 1 . 00 .... ... 0 111 . .... .... @vldst_sg
+VLDRW_sg_imm 111 1 1101 ... 1 ... 0 ... 1 1110 .... .... @vldst_sg_imm
+VLDRD_sg_imm 111 1 1101 ... 1 ... 0 ... 1 1111 .... .... @vldst_sg_imm
+VSTRW_sg_imm 111 1 1101 ... 0 ... 0 ... 1 1110 .... .... @vldst_sg_imm
+VSTRD_sg_imm 111 1 1101 ... 0 ... 0 ... 1 1111 .... .... @vldst_sg_imm
+
# Moves between 2 32-bit vector lanes and 2 general purpose registers
VMOV_to_2gp 1110 1100 0 . 00 rt2:4 ... 0 1111 000 idx:1 rt:4 qd=%qd
VMOV_from_2gp 1110 1100 0 . 01 rt2:4 ... 0 1111 000 idx:1 rt:4 qd=%qd
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
index d5ed2bbdea..821eb71a19 100644
--- a/target/arm/mve_helper.c
+++ b/target/arm/mve_helper.c
@@ -220,7 +220,7 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
* For loads, predicated lanes are zeroed instead of retaining
* their previous values.
*/
-#define DO_VLDR_SG(OP, LDTYPE, ESIZE, TYPE, OFFTYPE, ADDRFN) \
+#define DO_VLDR_SG(OP, LDTYPE, ESIZE, TYPE, OFFTYPE, ADDRFN, WB) \
void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm, \
uint32_t base) \
{ \
@@ -237,25 +237,35 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
addr = ADDRFN(base, m[H##ESIZE(e)]); \
d[H##ESIZE(e)] = (mask & 1) ? \
cpu_##LDTYPE##_data_ra(env, addr, GETPC()) : 0; \
+ if (WB) { \
+ m[H##ESIZE(e)] = addr; \
+ } \
} \
mve_advance_vpt(env); \
}
/* We know here TYPE is unsigned so always the same as the offset type */
-#define DO_VSTR_SG(OP, STTYPE, ESIZE, TYPE, ADDRFN) \
+#define DO_VSTR_SG(OP, STTYPE, ESIZE, TYPE, ADDRFN, WB) \
void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm, \
uint32_t base) \
{ \
TYPE *d = vd; \
TYPE *m = vm; \
uint16_t mask = mve_element_mask(env); \
+ uint16_t eci_mask = mve_eci_mask(env); \
unsigned e; \
uint32_t addr; \
- for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE, eci_mask >>= ESIZE) { \
+ if (!(eci_mask & 1)) { \
+ continue; \
+ } \
addr = ADDRFN(base, m[H##ESIZE(e)]); \
if (mask & 1) { \
cpu_##STTYPE##_data_ra(env, addr, d[H##ESIZE(e)], GETPC()); \
} \
+ if (WB) { \
+ m[H##ESIZE(e)] = addr; \
+ } \
} \
mve_advance_vpt(env); \
}
@@ -265,8 +275,10 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
* accesses, controlled by the predicate mask for the relevant beat,
* and with a single 32-bit offset in the first of the two Qm elements.
* Note that for QEMU our IMPDEF AIRCR.ENDIANNESS is always 0 (little).
+ * Address writeback happens on the odd beats and updates the address
+ * stored in the even-beat element.
*/
-#define DO_VLDR64_SG(OP, ADDRFN) \
+#define DO_VLDR64_SG(OP, ADDRFN, WB) \
void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm, \
uint32_t base) \
{ \
@@ -283,25 +295,35 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
addr = ADDRFN(base, m[H4(e & ~1)]); \
addr += 4 * (e & 1); \
d[H4(e)] = (mask & 1) ? cpu_ldl_data_ra(env, addr, GETPC()) : 0; \
+ if (WB && (e & 1)) { \
+ m[H4(e & ~1)] = addr - 4; \
+ } \
} \
mve_advance_vpt(env); \
}
-#define DO_VSTR64_SG(OP, ADDRFN) \
+#define DO_VSTR64_SG(OP, ADDRFN, WB) \
void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm, \
uint32_t base) \
{ \
uint32_t *d = vd; \
uint32_t *m = vm; \
uint16_t mask = mve_element_mask(env); \
+ uint16_t eci_mask = mve_eci_mask(env); \
unsigned e; \
uint32_t addr; \
- for (e = 0; e < 16 / 4; e++, mask >>= 4) { \
+ for (e = 0; e < 16 / 4; e++, mask >>= 4, eci_mask >>= 4) { \
+ if (!(eci_mask & 1)) { \
+ continue; \
+ } \
addr = ADDRFN(base, m[H4(e & ~1)]); \
addr += 4 * (e & 1); \
if (mask & 1) { \
cpu_stl_data_ra(env, addr, d[H4(e)], GETPC()); \
} \
+ if (WB && (e & 1)) { \
+ m[H4(e & ~1)] = addr - 4; \
+ } \
} \
mve_advance_vpt(env); \
}
@@ -311,36 +333,41 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
#define ADDR_ADD_OSW(BASE, OFFSET) ((BASE) + ((OFFSET) << 2))
#define ADDR_ADD_OSD(BASE, OFFSET) ((BASE) + ((OFFSET) << 3))
-DO_VLDR_SG(vldrb_sg_sh, ldsb, 2, int16_t, uint16_t, ADDR_ADD)
-DO_VLDR_SG(vldrb_sg_sw, ldsb, 4, int32_t, uint32_t, ADDR_ADD)
-DO_VLDR_SG(vldrh_sg_sw, ldsw, 4, int32_t, uint32_t, ADDR_ADD)
-
-DO_VLDR_SG(vldrb_sg_ub, ldub, 1, uint8_t, uint8_t, ADDR_ADD)
-DO_VLDR_SG(vldrb_sg_uh, ldub, 2, uint16_t, uint16_t, ADDR_ADD)
-DO_VLDR_SG(vldrb_sg_uw, ldub, 4, uint32_t, uint32_t, ADDR_ADD)
-DO_VLDR_SG(vldrh_sg_uh, lduw, 2, uint16_t, uint16_t, ADDR_ADD)
-DO_VLDR_SG(vldrh_sg_uw, lduw, 4, uint32_t, uint32_t, ADDR_ADD)
-DO_VLDR_SG(vldrw_sg_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD)
-DO_VLDR64_SG(vldrd_sg_ud, ADDR_ADD)
-
-DO_VLDR_SG(vldrh_sg_os_sw, ldsw, 4, int32_t, uint32_t, ADDR_ADD_OSH)
-DO_VLDR_SG(vldrh_sg_os_uh, lduw, 2, uint16_t, uint16_t, ADDR_ADD_OSH)
-DO_VLDR_SG(vldrh_sg_os_uw, lduw, 4, uint32_t, uint32_t, ADDR_ADD_OSH)
-DO_VLDR_SG(vldrw_sg_os_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD_OSW)
-DO_VLDR64_SG(vldrd_sg_os_ud, ADDR_ADD_OSD)
-
-DO_VSTR_SG(vstrb_sg_ub, stb, 1, uint8_t, ADDR_ADD)
-DO_VSTR_SG(vstrb_sg_uh, stb, 2, uint16_t, ADDR_ADD)
-DO_VSTR_SG(vstrb_sg_uw, stb, 4, uint32_t, ADDR_ADD)
-DO_VSTR_SG(vstrh_sg_uh, stw, 2, uint16_t, ADDR_ADD)
-DO_VSTR_SG(vstrh_sg_uw, stw, 4, uint32_t, ADDR_ADD)
-DO_VSTR_SG(vstrw_sg_uw, stl, 4, uint32_t, ADDR_ADD)
-DO_VSTR64_SG(vstrd_sg_ud, ADDR_ADD)
-
-DO_VSTR_SG(vstrh_sg_os_uh, stw, 2, uint16_t, ADDR_ADD_OSH)
-DO_VSTR_SG(vstrh_sg_os_uw, stw, 4, uint32_t, ADDR_ADD_OSH)
-DO_VSTR_SG(vstrw_sg_os_uw, stl, 4, uint32_t, ADDR_ADD_OSW)
-DO_VSTR64_SG(vstrd_sg_os_ud, ADDR_ADD_OSD)
+DO_VLDR_SG(vldrb_sg_sh, ldsb, 2, int16_t, uint16_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrb_sg_sw, ldsb, 4, int32_t, uint32_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrh_sg_sw, ldsw, 4, int32_t, uint32_t, ADDR_ADD, false)
+
+DO_VLDR_SG(vldrb_sg_ub, ldub, 1, uint8_t, uint8_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrb_sg_uh, ldub, 2, uint16_t, uint16_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrb_sg_uw, ldub, 4, uint32_t, uint32_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrh_sg_uh, lduw, 2, uint16_t, uint16_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrh_sg_uw, lduw, 4, uint32_t, uint32_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrw_sg_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD, false)
+DO_VLDR64_SG(vldrd_sg_ud, ADDR_ADD, false)
+
+DO_VLDR_SG(vldrh_sg_os_sw, ldsw, 4, int32_t, uint32_t, ADDR_ADD_OSH, false)
+DO_VLDR_SG(vldrh_sg_os_uh, lduw, 2, uint16_t, uint16_t, ADDR_ADD_OSH, false)
+DO_VLDR_SG(vldrh_sg_os_uw, lduw, 4, uint32_t, uint32_t, ADDR_ADD_OSH, false)
+DO_VLDR_SG(vldrw_sg_os_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD_OSW, false)
+DO_VLDR64_SG(vldrd_sg_os_ud, ADDR_ADD_OSD, false)
+
+DO_VSTR_SG(vstrb_sg_ub, stb, 1, uint8_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrb_sg_uh, stb, 2, uint16_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrb_sg_uw, stb, 4, uint32_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrh_sg_uh, stw, 2, uint16_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrh_sg_uw, stw, 4, uint32_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrw_sg_uw, stl, 4, uint32_t, ADDR_ADD, false)
+DO_VSTR64_SG(vstrd_sg_ud, ADDR_ADD, false)
+
+DO_VSTR_SG(vstrh_sg_os_uh, stw, 2, uint16_t, ADDR_ADD_OSH, false)
+DO_VSTR_SG(vstrh_sg_os_uw, stw, 4, uint32_t, ADDR_ADD_OSH, false)
+DO_VSTR_SG(vstrw_sg_os_uw, stl, 4, uint32_t, ADDR_ADD_OSW, false)
+DO_VSTR64_SG(vstrd_sg_os_ud, ADDR_ADD_OSD, false)
+
+DO_VLDR_SG(vldrw_sg_wb_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD, true)
+DO_VLDR64_SG(vldrd_sg_wb_ud, ADDR_ADD, true)
+DO_VSTR_SG(vstrw_sg_wb_uw, stl, 4, uint32_t, ADDR_ADD, true)
+DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true)
/*
* The mergemask(D, R, M) macro performs the operation "*D = R" but
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
index 9f035d4b0b..af3c6a7d0d 100644
--- a/target/arm/translate-mve.c
+++ b/target/arm/translate-mve.c
@@ -300,6 +300,72 @@ static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a)
#undef F
+static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a,
+ MVEGenLdStSGFn *fn, unsigned msize)
+{
+ uint32_t offset;
+ TCGv_ptr qd, qm;
+
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !mve_check_qreg_bank(s, a->qd | a->qm) ||
+ !fn) {
+ return false;
+ }
+
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ offset = a->imm << msize;
+ if (!a->a) {
+ offset = -offset;
+ }
+
+ qd = mve_qreg_ptr(a->qd);
+ qm = mve_qreg_ptr(a->qm);
+ fn(cpu_env, qd, qm, tcg_constant_i32(offset));
+ tcg_temp_free_ptr(qd);
+ tcg_temp_free_ptr(qm);
+ mve_update_eci(s);
+ return true;
+}
+
+static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
+{
+ static MVEGenLdStSGFn * const fns[] = {
+ gen_helper_mve_vldrw_sg_uw,
+ gen_helper_mve_vldrw_sg_wb_uw,
+ };
+ return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
+}
+
+static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
+{
+ static MVEGenLdStSGFn * const fns[] = {
+ gen_helper_mve_vldrd_sg_ud,
+ gen_helper_mve_vldrd_sg_wb_ud,
+ };
+ return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
+}
+
+static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
+{
+ static MVEGenLdStSGFn * const fns[] = {
+ gen_helper_mve_vstrw_sg_uw,
+ gen_helper_mve_vstrw_sg_wb_uw,
+ };
+ return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
+}
+
+static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
+{
+ static MVEGenLdStSGFn * const fns[] = {
+ gen_helper_mve_vstrd_sg_ud,
+ gen_helper_mve_vstrd_sg_wb_ud,
+ };
+ return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
+}
+
static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
{
TCGv_ptr qd;