diff options
author | Charles Baylis <charles.baylis@linaro.org> | 2016-05-16 14:58:58 +0100 |
---|---|---|
committer | Charles Baylis <charles.baylis@linaro.org> | 2016-05-16 14:58:58 +0100 |
commit | af6bca9a331211e7f818267f6928ed600d453115 (patch) | |
tree | 8e40f1e6dbd710753285274b0dbe908f6d809a2d | |
parent | 039408ab5f7025689fee115c70d188d495f3f56a (diff) |
More stuffold/aarch64-v2.1
-rw-r--r-- | src/lj_asm_arm64.h | 223 | ||||
-rw-r--r-- | src/lj_emit_arm64.h | 77 | ||||
-rw-r--r-- | src/lj_target_arm64.h | 34 | ||||
-rw-r--r-- | src/luaconf.h | 3 |
4 files changed, 310 insertions, 27 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 286e723..17614ef 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -8,8 +8,14 @@ /* Allocate a register with a hint. */ static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) { - lua_unimpl(); - return 0; + Reg r = IR(ref)->r; + if (ra_noreg(r)) { + if (!ra_hashint(r) && !iscrossref(as, ref)) + ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ + r = ra_allocref(as, ref, allow); + } + ra_noweak(as, r); + return r; } /* Allocate a scratch register pair. */ @@ -65,9 +71,20 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits) } /* Emit conditional branch to exit for guard. */ -static void asm_guardcc(ASMState *as, /*ARMCC*/ int cc) -{ - lua_unimpl(); +static void asm_guardcc(ASMState *as, A64CC cc) +{ + MCode *target = exitstub_addr(as->J, as->snapno); + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; + *p = A64I_BL | ((target-p) & 0x03ffffffu); + emit_branch(as, A64F_B_CC(A64I_B, cc^1), p+1); + return; + } + /* ARM64 doesn't have conditional BL, so we emit an unconditional BL + and branch around it with the opposite condition */ + emit_branch(as, A64I_BL, target); + emit_branch(as, A64F_B_CC(A64I_B, cc^1), p+1); } /* -- Operand fusion ------------------------------------------------------ */ @@ -93,39 +110,76 @@ static int32_t asm_fuseabase(ASMState *as, IRRef ref) static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, int lim) { - lua_unimpl(); - return 0; + /* !!!TODO NFI what this does, the comment about LDRD below looks dodgy */ + lua_todo(); + IRIns *ir = IR(ref); + if (ra_noreg(ir->r)) { + if (ir->o == IR_AREF) { + if (mayfuse(as, ref)) { + if (irref_isk(ir->op2)) { + IRRef tab = IR(ir->op1)->op1; + int32_t ofs = asm_fuseabase(as, tab); + IRRef refa = ofs ? tab : ir->op1; + ofs += 8*IR(ir->op2)->i; + if (ofs > -lim && ofs < lim) { + *ofsp = ofs; + return ra_alloc1(as, refa, allow); + } + } + } + } else if (ir->o == IR_HREFK) { + if (mayfuse(as, ref)) { + int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); + if (ofs < lim) { + *ofsp = ofs; + return ra_alloc1(as, ir->op1, allow); + } + } + } else if (ir->o == IR_UREFC) { + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); + *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */ + return ra_allock(as, (ofs & ~255), allow); + } + } + } + *ofsp = 0; + return ra_alloc1(as, ref, allow); } /* Fuse m operand into arithmetic/logic instructions. */ -static uint32_t asm_fuseopm(ASMState *as, ARMIns ai, IRRef ref, RegSet allow) +static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) { IRIns *ir = IR(ref); if (ra_hasreg(ir->r)) { ra_noweak(as, ir->r); - return ARMF_M(ir->r); + return A64F_M(ir->r); } else if (irref_isk(ref)) { uint32_t k = emit_isk12(ai, ir->i); - if (k) + if (k == -1) return k; } else if (mayfuse(as, ref)) { +#if 0 + /* !!!TODO fuse shifts into this instruction, as ARM does */ if (ir->o >= IR_BSHL && ir->o <= IR_BROR) { Reg m = ra_alloc1(as, ir->op1, allow); ARMShift sh = ir->o == IR_BSHL ? ARMSH_LSL : ir->o == IR_BSHR ? ARMSH_LSR : ir->o == IR_BSAR ? ARMSH_ASR : ARMSH_ROR; if (irref_isk(ir->op2)) { - return m | ARMF_SH(sh, (IR(ir->op2)->i & 31)); + return A64F_M(m) | ARMF_SH(sh, (IR(ir->op2)->i & 31)); } else { Reg s = ra_alloc1(as, ir->op2, rset_exclude(allow, m)); - return m | ARMF_RSH(sh, s); + return A64F_M(m) | ARMF_RSH(sh, s); } } else if (ir->o == IR_ADD && ir->op1 == ir->op2) { Reg m = ra_alloc1(as, ir->op1, allow); - return m | ARMF_SH(ARMSH_LSL, 1); + return A64F_M(m) | ARMF_SH(ARMSH_LSL, 1); } +#endif } - return ra_allocref(as, ref, allow); + return A64F_M(ra_allocref(as, ref, allow)); } /* Fuse shifts into loads/stores. Only bother with BSHL 2 => lsl #2. */ @@ -145,7 +199,7 @@ static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, /* Fuse to multiply-add/sub instruction. */ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) { - lua_unimpl(); + lua_todo(); return 0; } @@ -230,7 +284,45 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) static void asm_hrefk(ASMState *as, IRIns *ir) { - lua_unimpl(); + IRIns *kslot = IR(ir->op2); + IRIns *irkey = IR(kslot->op1); +printf("%p\n",irkey->ptr.ptr64); + int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); + int32_t kofs = ofs + (int32_t)offsetof(Node, key); + Reg dest = (ra_used(ir) || ofs > 4095) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; + Reg node = ra_alloc1(as, ir->op1, RSET_GPR); + Reg key = RID_NONE, type = RID_TMP, idx = node; + RegSet allow = rset_exclude(RSET_GPR, node); + lua_assert(ofs % sizeof(Node) == 0); + /* !!!TODO check 4095 for AArch64 */ + if (ofs > 4095) { + idx = dest; + rset_clear(allow, dest); + kofs = (int32_t)offsetof(Node, key); + } else if (ra_hasreg(dest)) { + emit_opk(as, A64I_ADDx, dest, node, ofs, allow); /*!!!TODO w or x */ + } + asm_guardcc(as, CC_NE); + if (!irt_ispri(irkey->t)) { + key = ra_scratch(as, allow); + rset_clear(allow, key); + } + rset_clear(allow, type); + if (irt_isnum(irkey->t)) { + emit_ccmpk(as, A64I_CCMPw, CC_EQ, 0, type, + (int32_t)ir_knum(irkey)->u32.hi, allow); + emit_opk(as, A64I_CMPw, 0, key, + (int32_t)ir_knum(irkey)->u32.lo, allow); + } else { +printf("%p\n",irkey->ptr.ptr64); + emit_ccmpk(as, A64I_CCMPw, CC_EQ, 0, type, + (int32_t)ir_knum(irkey)->u32.hi, allow); + emit_opk(as, A64I_CMNx, 0, type, -irt_toitype(irkey->t), allow); + } + emit_lso(as, A64I_LDRw, type, idx, kofs+4); /* !!!TODO w or x */ + if (ra_hasreg(key)) emit_lso(as, A64I_LDRw, key, idx, kofs); /* !!!TODO w or x */ + if (ofs > 4095) + emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR); } static void asm_uref(ASMState *as, IRIns *ir) @@ -277,7 +369,19 @@ static A64Ins asm_fxstoreins(IRIns *ir) static void asm_fload(ASMState *as, IRIns *ir) { - lua_unimpl(); + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); + A64Ins ai = asm_fxloadins(ir); + int32_t ofs; + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ + emit_dn(as, (A64I_ADDx^A64I_BINOPk)|ofs, dest, idx); + return; + } + } + ofs = field_ofs[ir->op2]; + emit_lso(as, ai, dest, idx, ofs); } static void asm_fstore(ASMState *as, IRIns *ir) @@ -304,7 +408,27 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) static void asm_ahustore(ASMState *as, IRIns *ir) { - lua_unimpl(); + if (ir->r != RID_SINK) { + RegSet allow = RSET_GPR; + Reg idx, src = RID_NONE, type = RID_NONE; + int32_t ofs = 0; + if (irt_isnum(ir->t)) { + src = ra_alloc1(as, ir->op2, RSET_FPR); + idx = asm_fuseahuref(as, ir->op1, &ofs, allow, 1024); /* !!!TODO what is 1024 */ + emit_lso(as, A64I_STRd, src, idx, ofs); + } else + { + if (!irt_ispri(ir->t)) { + src = ra_alloc1(as, ir->op2, allow); + rset_clear(allow, src); + } + type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); + idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), 4096); + if (ra_hasreg(src)) + emit_lso(as, A64I_STRw, src, idx, ofs); /* !!!TODO STRx? */ + emit_lso(as, A64I_STRw, type, idx, ofs+4); /* !!!TODO STRx? */ + } + } } static void asm_sload(ASMState *as, IRIns *ir) @@ -327,7 +451,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) static void asm_tbar(ASMState *as, IRIns *ir) { - lua_unimpl(); + lua_todo(); } static void asm_obar(ASMState *as, IRIns *ir) @@ -361,18 +485,55 @@ static void asm_fpmath(ASMState *as, IRIns *ir) static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) { - lua_unimpl(); - return 0; + IRIns *ir; + if (irref_isk(rref)) + return 0; /* Don't swap constants to the left. */ + if (irref_isk(lref)) + return 1; /* But swap constants to the right. */ + ir = IR(rref); + /* !!!TODO check for AArch64 fuseable ops here instead */ +#if 0 + if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || + (ir->o == IR_ADD && ir->op1 == ir->op2)) + return 0; /* Don't swap fusable operands to the left. */ + ir = IR(lref); + if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || + (ir->o == IR_ADD && ir->op1 == ir->op2)) + return 1; /* But swap fusable operands to the right. */ +#endif + return 0; /* Otherwise don't swap. */ } static void asm_intop(ASMState *as, IRIns *ir, A64Ins ai) { - lua_unimpl(); + IRRef lref = ir->op1, rref = ir->op2; + Reg left, dest = ra_dest(as, ir, RSET_GPR); + uint32_t m; + /* !!!TODO AArch64 doesn't have RSB, so swapping is harder than ARM */ +#if 0 + if (asm_swapops(as, lref, rref)) { + IRRef tmp = lref; lref = rref; rref = tmp; + if ((ai & ~A64I_S) == A64I_SUB || (ai & ~A64I_S) == A64I_SBC) + ai ^= (A64I_SUB^A64I_RSB); + } +#endif + left = ra_hintalloc(as, lref, dest, RSET_GPR); + m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); + if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */ + asm_guardcc(as, CC_VS); + ai |= A64I_S; + } + emit_dn(as, ai^m, dest, left); } static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai) { - lua_unimpl(); + if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ + as->flagmcp = NULL; + as->mcp++; + ai |= A64I_S; + } + asm_intop(as, ir, ai); } static void asm_intneg(ASMState *as, IRIns *ir, A64Ins ai) @@ -388,7 +549,12 @@ static void asm_intmul(ASMState *as, IRIns *ir) static void asm_add(ASMState *as, IRIns *ir) { - lua_unimpl(); + if (irt_isnum(ir->t)) { + if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd)) + asm_fparith(as, ir, A64I_ADDd); + return; + } + asm_intop_s(as, ir, A64I_ADDx); } static void asm_sub(ASMState *as, IRIns *ir) @@ -580,7 +746,14 @@ static void asm_gc_check(ASMState *as) /* Fixup the loop branch. */ static void asm_loop_fixup(ASMState *as) { - lua_unimpl(); + MCode *p = as->mctop; + MCode *target = as->mcp; + if (as->loopinv) { /* Inverted loop branch? */ + /* asm_guardcc already inverted the bcc and patched the final bl. */ + p[-2] |= ((uint32_t)(target-p+2) & 0x00ffffffu); + } else { + p[-1] = A64I_B | ((uint32_t)((target-p)+1) & 0x03ffffffu); + } } /* -- Head of trace ------------------------------------------------------- */ diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index f04a79f..3501f31 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h @@ -3,6 +3,8 @@ ** Copyright !!!TODO */ +static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); + /* Load a 32 bit constant into a GPR. */ static void emit_loadi(ASMState *as, Reg r, int32_t i) { @@ -19,6 +21,13 @@ static void emit_loadn(ASMState *as, Reg r, cTValue *tv) #define emit_getgl(as, r, field) lua_unimpl() +/* Encode constant in K12 format for data processing instructions. */ +static uint32_t emit_isk12(A64Ins ai, int32_t n) +{ + /* !!!TODO implement this! */ + return -1; +} + /* mov r, imm64 or shorter 32 bit extended load. */ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) { @@ -49,6 +58,74 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) lua_unimpl(); } +static void emit_n(ASMState *as, A64Ins ai, Reg rn) +{ + *--as->mcp = ai | A64F_N(rn); +} + +static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn) +{ + *--as->mcp = ai | A64F_D(rd) | A64F_N(rn); +} + +static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm) +{ + *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm); +} + +/* Emit an arithmetic/logic operation with a constant operand. */ +static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src, + int32_t i, RegSet allow) +{ + uint32_t k = emit_isk12(ai, i); + if (k != -1) + emit_dn(as, ai^k, dest, src); + else + emit_dnm(as, ai, dest, src, ra_allock(as, i, allow)); +} + +static void emit_ccmpr(ASMState *as, A64Ins ai, A64CC cond, int32_t nzcv, Reg +rn, Reg rm) +{ + *--as->mcp = ai | A64F_N(rn) | A64F_M(rm) | A64F_NZCV(nzcv) | A64F_COND(cond); +} + +static void emit_ccmpk(ASMState *as, A64Ins ai, A64CC cond, int32_t nzcv, Reg +rn, int32_t k, RegSet allow) +{ + if (k >=0 && k <= 31) + *--as->mcp = + ai | A64F_N (rn) | A64F_M (k) | A64F_NZCV (nzcv) | A64F_COND (cond); + else + { + emit_ccmpr(as, ai, cond, nzcv, rn, ra_allock(as, k, allow)); + } +} + +/* -- Emit loads/stores --------------------------------------------------- */ + +static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int32_t ofs) +{ + /* !!!TODO ARM emit_lso combines LDR/STR pairs into LDRD/STRD, something + similar possible here? */ + /* !!!TODO support STUR encodings, these ranges don't match emit_arm64... */ + lua_assert(ofs >= 0 && ofs <= 4096 && (ofs&3) == 0); + //if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; + *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_A(ofs >> 3); +} + +/* -- Emit control-flow instructions -------------------------------------- */ + +static void emit_branch(ASMState *as, A64Ins ai, MCode *target) +{ + MCode *p = as->mcp; + ptrdiff_t delta = target - p; + lua_assert(((delta + 0x02000000) >> 26) == 0); + *--p = ai | ((uint32_t)delta & 0x00ffffffu); + as->mcp = p; +} + + #define emit_jmp(as, target) lua_unimpl() #define emit_setvmstate(as, i) UNUSED(i) diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h index aabc068..7fd2c49 100644 --- a/src/lj_target_arm64.h +++ b/src/lj_target_arm64.h @@ -140,19 +140,49 @@ typedef enum A64CC { #define A64F_U16(x) ((x) << 5) #define A64F_S26(x) (x) #define A64F_S19(x) ((x) << 5) +#define A64F_COND(cc) ((cc) << 12) /* for CCMP */ +#define A64F_NZCV(nzcv) ((nzcv) << 0) /* for CCMP */ + +#define A64F_B_CC(insn, cc) (insn ^ cc) typedef enum A64Ins { + A64I_S = 0x20000000, A64I_MOVZw = 0x52800000, A64I_MOVZx = 0xd2800000, + A64I_LDRw = 0xb9400000, + A64I_LDRx = 0xf9400000, A64I_LDRLw = 0x18000000, A64I_LDRLx = 0x58000000, A64I_STR = 0xf9000000, A64I_NOP = 0xd503201f, + A64I_ADDw = 0x0b000000, + A64I_ADDx = 0x8b000000, + A64I_ADDSw = 0x0b000000 | A64I_S, + A64I_ADDSx = 0x8b000000 | A64I_S, A64I_B = 0x14000000, A64I_BL = 0x94000000, A64I_BR = 0xd61f0000, - A64I_SUBSx = 0xcb000000, - A64I_CMPx = 0xcb000000 | A64_D (RID_ZERO), + A64I_CCMPw = 0x7a400000, /* ccmp w0,w0,#0,eq */ + A64I_CCMPx = 0xfa400000, /* ccmp x0,x0,#0,eq */ + A64I_STRw = 0xb9000000, /* str w0,[x0] */ + A64I_STRx = 0xf9000000, /* str x0,[x0] */ + A64I_SUBx = 0xcb000000, + A64I_SUBw = 0x4b000000, + A64I_SUBSx = A64I_SUBx | A64I_S, + A64I_SUBSw = A64I_SUBw | A64I_S, + + /* FP */ + A64I_ADDd = 0x5ee08400, + A64I_FMADDd = 0x1f400000, + A64I_STRd = 0xfd000000, /* str d0,[x0] */ + + /* assembler aliases */ + A64I_CMPw = A64I_SUBSw | A64F_D (RID_ZERO), + A64I_CMPx = A64I_SUBSx | A64F_D (RID_ZERO), + A64I_CMNx = A64I_ADDSx | A64F_D (RID_ZERO), + + /* fields */ + A64I_BINOPk = 0x1a000000, /* A64I_ADDx^A64I_BINOPk => ADD x0,x0,0 */ } A64Ins; #endif diff --git a/src/luaconf.h b/src/luaconf.h index aa466f1..0b6b151 100644 --- a/src/luaconf.h +++ b/src/luaconf.h @@ -156,4 +156,7 @@ #define lua_unimpl() \ do { extern int printf(const char*,...); printf("Unimplemented: %s\n", __FUNCTION__); __builtin_abort(); } while (0) +#define lua_todo() \ + do { extern int printf(const char*,...); printf("todo: %s\n", __FUNCTION__); } while (0) + #endif |