aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharles Baylis <charles.baylis@linaro.org>2016-05-16 14:58:58 +0100
committerCharles Baylis <charles.baylis@linaro.org>2016-05-16 14:58:58 +0100
commitaf6bca9a331211e7f818267f6928ed600d453115 (patch)
tree8e40f1e6dbd710753285274b0dbe908f6d809a2d
parent039408ab5f7025689fee115c70d188d495f3f56a (diff)
-rw-r--r--src/lj_asm_arm64.h223
-rw-r--r--src/lj_emit_arm64.h77
-rw-r--r--src/lj_target_arm64.h34
-rw-r--r--src/luaconf.h3
4 files changed, 310 insertions, 27 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index 286e723..17614ef 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -8,8 +8,14 @@
/* Allocate a register with a hint. */
static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
{
- lua_unimpl();
- return 0;
+ Reg r = IR(ref)->r;
+ if (ra_noreg(r)) {
+ if (!ra_hashint(r) && !iscrossref(as, ref))
+ ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */
+ r = ra_allocref(as, ref, allow);
+ }
+ ra_noweak(as, r);
+ return r;
}
/* Allocate a scratch register pair. */
@@ -65,9 +71,20 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
}
/* Emit conditional branch to exit for guard. */
-static void asm_guardcc(ASMState *as, /*ARMCC*/ int cc)
-{
- lua_unimpl();
+static void asm_guardcc(ASMState *as, A64CC cc)
+{
+ MCode *target = exitstub_addr(as->J, as->snapno);
+ MCode *p = as->mcp;
+ if (LJ_UNLIKELY(p == as->invmcp)) {
+ as->loopinv = 1;
+ *p = A64I_BL | ((target-p) & 0x03ffffffu);
+ emit_branch(as, A64F_B_CC(A64I_B, cc^1), p+1);
+ return;
+ }
+ /* ARM64 doesn't have conditional BL, so we emit an unconditional BL
+ and branch around it with the opposite condition */
+ emit_branch(as, A64I_BL, target);
+ emit_branch(as, A64F_B_CC(A64I_B, cc^1), p+1);
}
/* -- Operand fusion ------------------------------------------------------ */
@@ -93,39 +110,76 @@ static int32_t asm_fuseabase(ASMState *as, IRRef ref)
static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
int lim)
{
- lua_unimpl();
- return 0;
+ /* !!!TODO NFI what this does, the comment about LDRD below looks dodgy */
+ lua_todo();
+ IRIns *ir = IR(ref);
+ if (ra_noreg(ir->r)) {
+ if (ir->o == IR_AREF) {
+ if (mayfuse(as, ref)) {
+ if (irref_isk(ir->op2)) {
+ IRRef tab = IR(ir->op1)->op1;
+ int32_t ofs = asm_fuseabase(as, tab);
+ IRRef refa = ofs ? tab : ir->op1;
+ ofs += 8*IR(ir->op2)->i;
+ if (ofs > -lim && ofs < lim) {
+ *ofsp = ofs;
+ return ra_alloc1(as, refa, allow);
+ }
+ }
+ }
+ } else if (ir->o == IR_HREFK) {
+ if (mayfuse(as, ref)) {
+ int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
+ if (ofs < lim) {
+ *ofsp = ofs;
+ return ra_alloc1(as, ir->op1, allow);
+ }
+ }
+ } else if (ir->o == IR_UREFC) {
+ if (irref_isk(ir->op1)) {
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
+ int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv);
+ *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */
+ return ra_allock(as, (ofs & ~255), allow);
+ }
+ }
+ }
+ *ofsp = 0;
+ return ra_alloc1(as, ref, allow);
}
/* Fuse m operand into arithmetic/logic instructions. */
-static uint32_t asm_fuseopm(ASMState *as, ARMIns ai, IRRef ref, RegSet allow)
+static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
{
IRIns *ir = IR(ref);
if (ra_hasreg(ir->r)) {
ra_noweak(as, ir->r);
- return ARMF_M(ir->r);
+ return A64F_M(ir->r);
} else if (irref_isk(ref)) {
uint32_t k = emit_isk12(ai, ir->i);
- if (k)
+ if (k == -1)
return k;
} else if (mayfuse(as, ref)) {
+#if 0
+ /* !!!TODO fuse shifts into this instruction, as ARM does */
if (ir->o >= IR_BSHL && ir->o <= IR_BROR) {
Reg m = ra_alloc1(as, ir->op1, allow);
ARMShift sh = ir->o == IR_BSHL ? ARMSH_LSL :
ir->o == IR_BSHR ? ARMSH_LSR :
ir->o == IR_BSAR ? ARMSH_ASR : ARMSH_ROR;
if (irref_isk(ir->op2)) {
- return m | ARMF_SH(sh, (IR(ir->op2)->i & 31));
+ return A64F_M(m) | ARMF_SH(sh, (IR(ir->op2)->i & 31));
} else {
Reg s = ra_alloc1(as, ir->op2, rset_exclude(allow, m));
- return m | ARMF_RSH(sh, s);
+ return A64F_M(m) | ARMF_RSH(sh, s);
}
} else if (ir->o == IR_ADD && ir->op1 == ir->op2) {
Reg m = ra_alloc1(as, ir->op1, allow);
- return m | ARMF_SH(ARMSH_LSL, 1);
+ return A64F_M(m) | ARMF_SH(ARMSH_LSL, 1);
}
+#endif
}
- return ra_allocref(as, ref, allow);
+ return A64F_M(ra_allocref(as, ref, allow));
}
/* Fuse shifts into loads/stores. Only bother with BSHL 2 => lsl #2. */
@@ -145,7 +199,7 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref,
/* Fuse to multiply-add/sub instruction. */
static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
{
- lua_unimpl();
+ lua_todo();
return 0;
}
@@ -230,7 +284,45 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
static void asm_hrefk(ASMState *as, IRIns *ir)
{
- lua_unimpl();
+ IRIns *kslot = IR(ir->op2);
+ IRIns *irkey = IR(kslot->op1);
+printf("%p\n",irkey->ptr.ptr64);
+ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
+ int32_t kofs = ofs + (int32_t)offsetof(Node, key);
+ Reg dest = (ra_used(ir) || ofs > 4095) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
+ Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
+ Reg key = RID_NONE, type = RID_TMP, idx = node;
+ RegSet allow = rset_exclude(RSET_GPR, node);
+ lua_assert(ofs % sizeof(Node) == 0);
+ /* !!!TODO check 4095 for AArch64 */
+ if (ofs > 4095) {
+ idx = dest;
+ rset_clear(allow, dest);
+ kofs = (int32_t)offsetof(Node, key);
+ } else if (ra_hasreg(dest)) {
+ emit_opk(as, A64I_ADDx, dest, node, ofs, allow); /*!!!TODO w or x */
+ }
+ asm_guardcc(as, CC_NE);
+ if (!irt_ispri(irkey->t)) {
+ key = ra_scratch(as, allow);
+ rset_clear(allow, key);
+ }
+ rset_clear(allow, type);
+ if (irt_isnum(irkey->t)) {
+ emit_ccmpk(as, A64I_CCMPw, CC_EQ, 0, type,
+ (int32_t)ir_knum(irkey)->u32.hi, allow);
+ emit_opk(as, A64I_CMPw, 0, key,
+ (int32_t)ir_knum(irkey)->u32.lo, allow);
+ } else {
+printf("%p\n",irkey->ptr.ptr64);
+ emit_ccmpk(as, A64I_CCMPw, CC_EQ, 0, type,
+ (int32_t)ir_knum(irkey)->u32.hi, allow);
+ emit_opk(as, A64I_CMNx, 0, type, -irt_toitype(irkey->t), allow);
+ }
+ emit_lso(as, A64I_LDRw, type, idx, kofs+4); /* !!!TODO w or x */
+ if (ra_hasreg(key)) emit_lso(as, A64I_LDRw, key, idx, kofs); /* !!!TODO w or x */
+ if (ofs > 4095)
+ emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR);
}
static void asm_uref(ASMState *as, IRIns *ir)
@@ -277,7 +369,19 @@ static A64Ins asm_fxstoreins(IRIns *ir)
static void asm_fload(ASMState *as, IRIns *ir)
{
- lua_unimpl();
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
+ A64Ins ai = asm_fxloadins(ir);
+ int32_t ofs;
+ if (ir->op2 == IRFL_TAB_ARRAY) {
+ ofs = asm_fuseabase(as, ir->op1);
+ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
+ emit_dn(as, (A64I_ADDx^A64I_BINOPk)|ofs, dest, idx);
+ return;
+ }
+ }
+ ofs = field_ofs[ir->op2];
+ emit_lso(as, ai, dest, idx, ofs);
}
static void asm_fstore(ASMState *as, IRIns *ir)
@@ -304,7 +408,27 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
static void asm_ahustore(ASMState *as, IRIns *ir)
{
- lua_unimpl();
+ if (ir->r != RID_SINK) {
+ RegSet allow = RSET_GPR;
+ Reg idx, src = RID_NONE, type = RID_NONE;
+ int32_t ofs = 0;
+ if (irt_isnum(ir->t)) {
+ src = ra_alloc1(as, ir->op2, RSET_FPR);
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow, 1024); /* !!!TODO what is 1024 */
+ emit_lso(as, A64I_STRd, src, idx, ofs);
+ } else
+ {
+ if (!irt_ispri(ir->t)) {
+ src = ra_alloc1(as, ir->op2, allow);
+ rset_clear(allow, src);
+ }
+ type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
+ idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), 4096);
+ if (ra_hasreg(src))
+ emit_lso(as, A64I_STRw, src, idx, ofs); /* !!!TODO STRx? */
+ emit_lso(as, A64I_STRw, type, idx, ofs+4); /* !!!TODO STRx? */
+ }
+ }
}
static void asm_sload(ASMState *as, IRIns *ir)
@@ -327,7 +451,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
static void asm_tbar(ASMState *as, IRIns *ir)
{
- lua_unimpl();
+ lua_todo();
}
static void asm_obar(ASMState *as, IRIns *ir)
@@ -361,18 +485,55 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
{
- lua_unimpl();
- return 0;
+ IRIns *ir;
+ if (irref_isk(rref))
+ return 0; /* Don't swap constants to the left. */
+ if (irref_isk(lref))
+ return 1; /* But swap constants to the right. */
+ ir = IR(rref);
+ /* !!!TODO check for AArch64 fuseable ops here instead */
+#if 0
+ if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
+ (ir->o == IR_ADD && ir->op1 == ir->op2))
+ return 0; /* Don't swap fusable operands to the left. */
+ ir = IR(lref);
+ if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
+ (ir->o == IR_ADD && ir->op1 == ir->op2))
+ return 1; /* But swap fusable operands to the right. */
+#endif
+ return 0; /* Otherwise don't swap. */
}
static void asm_intop(ASMState *as, IRIns *ir, A64Ins ai)
{
- lua_unimpl();
+ IRRef lref = ir->op1, rref = ir->op2;
+ Reg left, dest = ra_dest(as, ir, RSET_GPR);
+ uint32_t m;
+ /* !!!TODO AArch64 doesn't have RSB, so swapping is harder than ARM */
+#if 0
+ if (asm_swapops(as, lref, rref)) {
+ IRRef tmp = lref; lref = rref; rref = tmp;
+ if ((ai & ~A64I_S) == A64I_SUB || (ai & ~A64I_S) == A64I_SBC)
+ ai ^= (A64I_SUB^A64I_RSB);
+ }
+#endif
+ left = ra_hintalloc(as, lref, dest, RSET_GPR);
+ m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
+ if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */
+ asm_guardcc(as, CC_VS);
+ ai |= A64I_S;
+ }
+ emit_dn(as, ai^m, dest, left);
}
static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai)
{
- lua_unimpl();
+ if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */
+ as->flagmcp = NULL;
+ as->mcp++;
+ ai |= A64I_S;
+ }
+ asm_intop(as, ir, ai);
}
static void asm_intneg(ASMState *as, IRIns *ir, A64Ins ai)
@@ -388,7 +549,12 @@ static void asm_intmul(ASMState *as, IRIns *ir)
static void asm_add(ASMState *as, IRIns *ir)
{
- lua_unimpl();
+ if (irt_isnum(ir->t)) {
+ if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd))
+ asm_fparith(as, ir, A64I_ADDd);
+ return;
+ }
+ asm_intop_s(as, ir, A64I_ADDx);
}
static void asm_sub(ASMState *as, IRIns *ir)
@@ -580,7 +746,14 @@ static void asm_gc_check(ASMState *as)
/* Fixup the loop branch. */
static void asm_loop_fixup(ASMState *as)
{
- lua_unimpl();
+ MCode *p = as->mctop;
+ MCode *target = as->mcp;
+ if (as->loopinv) { /* Inverted loop branch? */
+ /* asm_guardcc already inverted the bcc and patched the final bl. */
+ p[-2] |= ((uint32_t)(target-p+2) & 0x00ffffffu);
+ } else {
+ p[-1] = A64I_B | ((uint32_t)((target-p)+1) & 0x03ffffffu);
+ }
}
/* -- Head of trace ------------------------------------------------------- */
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
index f04a79f..3501f31 100644
--- a/src/lj_emit_arm64.h
+++ b/src/lj_emit_arm64.h
@@ -3,6 +3,8 @@
** Copyright !!!TODO
*/
+static Reg ra_allock(ASMState *as, int32_t k, RegSet allow);
+
/* Load a 32 bit constant into a GPR. */
static void emit_loadi(ASMState *as, Reg r, int32_t i)
{
@@ -19,6 +21,13 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
#define emit_getgl(as, r, field) lua_unimpl()
+/* Encode constant in K12 format for data processing instructions. */
+static uint32_t emit_isk12(A64Ins ai, int32_t n)
+{
+ /* !!!TODO implement this! */
+ return -1;
+}
+
/* mov r, imm64 or shorter 32 bit extended load. */
static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
{
@@ -49,6 +58,74 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
lua_unimpl();
}
+static void emit_n(ASMState *as, A64Ins ai, Reg rn)
+{
+ *--as->mcp = ai | A64F_N(rn);
+}
+
+static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn)
+{
+ *--as->mcp = ai | A64F_D(rd) | A64F_N(rn);
+}
+
+static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm)
+{
+ *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm);
+}
+
+/* Emit an arithmetic/logic operation with a constant operand. */
+static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src,
+ int32_t i, RegSet allow)
+{
+ uint32_t k = emit_isk12(ai, i);
+ if (k != -1)
+ emit_dn(as, ai^k, dest, src);
+ else
+ emit_dnm(as, ai, dest, src, ra_allock(as, i, allow));
+}
+
+static void emit_ccmpr(ASMState *as, A64Ins ai, A64CC cond, int32_t nzcv, Reg
+rn, Reg rm)
+{
+ *--as->mcp = ai | A64F_N(rn) | A64F_M(rm) | A64F_NZCV(nzcv) | A64F_COND(cond);
+}
+
+static void emit_ccmpk(ASMState *as, A64Ins ai, A64CC cond, int32_t nzcv, Reg
+rn, int32_t k, RegSet allow)
+{
+ if (k >=0 && k <= 31)
+ *--as->mcp =
+ ai | A64F_N (rn) | A64F_M (k) | A64F_NZCV (nzcv) | A64F_COND (cond);
+ else
+ {
+ emit_ccmpr(as, ai, cond, nzcv, rn, ra_allock(as, k, allow));
+ }
+}
+
+/* -- Emit loads/stores --------------------------------------------------- */
+
+static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int32_t ofs)
+{
+ /* !!!TODO ARM emit_lso combines LDR/STR pairs into LDRD/STRD, something
+ similar possible here? */
+ /* !!!TODO support STUR encodings, these ranges don't match emit_arm64... */
+ lua_assert(ofs >= 0 && ofs <= 4096 && (ofs&3) == 0);
+ //if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;
+ *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_A(ofs >> 3);
+}
+
+/* -- Emit control-flow instructions -------------------------------------- */
+
+static void emit_branch(ASMState *as, A64Ins ai, MCode *target)
+{
+ MCode *p = as->mcp;
+ ptrdiff_t delta = target - p;
+ lua_assert(((delta + 0x02000000) >> 26) == 0);
+ *--p = ai | ((uint32_t)delta & 0x00ffffffu);
+ as->mcp = p;
+}
+
+
#define emit_jmp(as, target) lua_unimpl()
#define emit_setvmstate(as, i) UNUSED(i)
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
index aabc068..7fd2c49 100644
--- a/src/lj_target_arm64.h
+++ b/src/lj_target_arm64.h
@@ -140,19 +140,49 @@ typedef enum A64CC {
#define A64F_U16(x) ((x) << 5)
#define A64F_S26(x) (x)
#define A64F_S19(x) ((x) << 5)
+#define A64F_COND(cc) ((cc) << 12) /* for CCMP */
+#define A64F_NZCV(nzcv) ((nzcv) << 0) /* for CCMP */
+
+#define A64F_B_CC(insn, cc) (insn ^ cc)
typedef enum A64Ins {
+ A64I_S = 0x20000000,
A64I_MOVZw = 0x52800000,
A64I_MOVZx = 0xd2800000,
+ A64I_LDRw = 0xb9400000,
+ A64I_LDRx = 0xf9400000,
A64I_LDRLw = 0x18000000,
A64I_LDRLx = 0x58000000,
A64I_STR = 0xf9000000,
A64I_NOP = 0xd503201f,
+ A64I_ADDw = 0x0b000000,
+ A64I_ADDx = 0x8b000000,
+ A64I_ADDSw = 0x0b000000 | A64I_S,
+ A64I_ADDSx = 0x8b000000 | A64I_S,
A64I_B = 0x14000000,
A64I_BL = 0x94000000,
A64I_BR = 0xd61f0000,
- A64I_SUBSx = 0xcb000000,
- A64I_CMPx = 0xcb000000 | A64_D (RID_ZERO),
+ A64I_CCMPw = 0x7a400000, /* ccmp w0,w0,#0,eq */
+ A64I_CCMPx = 0xfa400000, /* ccmp x0,x0,#0,eq */
+ A64I_STRw = 0xb9000000, /* str w0,[x0] */
+ A64I_STRx = 0xf9000000, /* str x0,[x0] */
+ A64I_SUBx = 0xcb000000,
+ A64I_SUBw = 0x4b000000,
+ A64I_SUBSx = A64I_SUBx | A64I_S,
+ A64I_SUBSw = A64I_SUBw | A64I_S,
+
+ /* FP */
+ A64I_ADDd = 0x5ee08400,
+ A64I_FMADDd = 0x1f400000,
+ A64I_STRd = 0xfd000000, /* str d0,[x0] */
+
+ /* assembler aliases */
+ A64I_CMPw = A64I_SUBSw | A64F_D (RID_ZERO),
+ A64I_CMPx = A64I_SUBSx | A64F_D (RID_ZERO),
+ A64I_CMNx = A64I_ADDSx | A64F_D (RID_ZERO),
+
+ /* fields */
+ A64I_BINOPk = 0x1a000000, /* A64I_ADDx^A64I_BINOPk => ADD x0,x0,0 */
} A64Ins;
#endif
diff --git a/src/luaconf.h b/src/luaconf.h
index aa466f1..0b6b151 100644
--- a/src/luaconf.h
+++ b/src/luaconf.h
@@ -156,4 +156,7 @@
#define lua_unimpl() \
do { extern int printf(const char*,...); printf("Unimplemented: %s\n", __FUNCTION__); __builtin_abort(); } while (0)
+#define lua_todo() \
+ do { extern int printf(const char*,...); printf("todo: %s\n", __FUNCTION__); } while (0)
+
#endif